]> git.saurik.com Git - redis.git/blob - redis.c
302a2b419f75b3f6a8ee89780023fb3a8e92480b
[redis.git] / redis.c
1 /*
2 * Copyright (c) 2006-2009, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #define REDIS_VERSION "1.07"
31
32 #include "fmacros.h"
33 #include "config.h"
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <time.h>
39 #include <unistd.h>
40 #define __USE_POSIX199309
41 #include <signal.h>
42
43 #ifdef HAVE_BACKTRACE
44 #include <execinfo.h>
45 #include <ucontext.h>
46 #endif /* HAVE_BACKTRACE */
47
48 #include <sys/wait.h>
49 #include <errno.h>
50 #include <assert.h>
51 #include <ctype.h>
52 #include <stdarg.h>
53 #include <inttypes.h>
54 #include <arpa/inet.h>
55 #include <sys/stat.h>
56 #include <fcntl.h>
57 #include <sys/time.h>
58 #include <sys/resource.h>
59 #include <sys/uio.h>
60 #include <limits.h>
61 #include <math.h>
62
63 #if defined(__sun)
64 #include "solarisfixes.h"
65 #endif
66
67 #include "redis.h"
68 #include "ae.h" /* Event driven programming library */
69 #include "sds.h" /* Dynamic safe strings */
70 #include "anet.h" /* Networking the easy way */
71 #include "dict.h" /* Hash tables */
72 #include "adlist.h" /* Linked lists */
73 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
74 #include "lzf.h" /* LZF compression library */
75 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
76
77 /* Error codes */
78 #define REDIS_OK 0
79 #define REDIS_ERR -1
80
81 /* Static server configuration */
82 #define REDIS_SERVERPORT 6379 /* TCP port */
83 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
84 #define REDIS_IOBUF_LEN 1024
85 #define REDIS_LOADBUF_LEN 1024
86 #define REDIS_STATIC_ARGS 4
87 #define REDIS_DEFAULT_DBNUM 16
88 #define REDIS_CONFIGLINE_MAX 1024
89 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
90 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
91 #define REDIS_EXPIRELOOKUPS_PER_CRON 100 /* try to expire 100 keys/second */
92 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
93 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
94
95 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
96 #define REDIS_WRITEV_THRESHOLD 3
97 /* Max number of iovecs used for each writev call */
98 #define REDIS_WRITEV_IOVEC_COUNT 256
99
100 /* Hash table parameters */
101 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
102
103 /* Command flags */
104 #define REDIS_CMD_BULK 1 /* Bulk write command */
105 #define REDIS_CMD_INLINE 2 /* Inline command */
106 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
107 this flags will return an error when the 'maxmemory' option is set in the
108 config file and the server is using more than maxmemory bytes of memory.
109 In short this commands are denied on low memory conditions. */
110 #define REDIS_CMD_DENYOOM 4
111
112 /* Object types */
113 #define REDIS_STRING 0
114 #define REDIS_LIST 1
115 #define REDIS_SET 2
116 #define REDIS_ZSET 3
117 #define REDIS_HASH 4
118
119 /* Objects encoding */
120 #define REDIS_ENCODING_RAW 0 /* Raw representation */
121 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
122
123 /* Object types only used for dumping to disk */
124 #define REDIS_EXPIRETIME 253
125 #define REDIS_SELECTDB 254
126 #define REDIS_EOF 255
127
128 /* Defines related to the dump file format. To store 32 bits lengths for short
129 * keys requires a lot of space, so we check the most significant 2 bits of
130 * the first byte to interpreter the length:
131 *
132 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
133 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
134 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
135 * 11|000000 this means: specially encoded object will follow. The six bits
136 * number specify the kind of object that follows.
137 * See the REDIS_RDB_ENC_* defines.
138 *
139 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
140 * values, will fit inside. */
141 #define REDIS_RDB_6BITLEN 0
142 #define REDIS_RDB_14BITLEN 1
143 #define REDIS_RDB_32BITLEN 2
144 #define REDIS_RDB_ENCVAL 3
145 #define REDIS_RDB_LENERR UINT_MAX
146
147 /* When a length of a string object stored on disk has the first two bits
148 * set, the remaining two bits specify a special encoding for the object
149 * accordingly to the following defines: */
150 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
151 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
152 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
153 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
154
155 /* Client flags */
156 #define REDIS_CLOSE 1 /* This client connection should be closed ASAP */
157 #define REDIS_SLAVE 2 /* This client is a slave server */
158 #define REDIS_MASTER 4 /* This client is a master server */
159 #define REDIS_MONITOR 8 /* This client is a slave monitor, see MONITOR */
160
161 /* Slave replication state - slave side */
162 #define REDIS_REPL_NONE 0 /* No active replication */
163 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
164 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
165
166 /* Slave replication state - from the point of view of master
167 * Note that in SEND_BULK and ONLINE state the slave receives new updates
168 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
169 * to start the next background saving in order to send updates to it. */
170 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
171 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
172 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
173 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
174
175 /* List related stuff */
176 #define REDIS_HEAD 0
177 #define REDIS_TAIL 1
178
179 /* Sort operations */
180 #define REDIS_SORT_GET 0
181 #define REDIS_SORT_ASC 1
182 #define REDIS_SORT_DESC 2
183 #define REDIS_SORTKEY_MAX 1024
184
185 /* Log levels */
186 #define REDIS_DEBUG 0
187 #define REDIS_NOTICE 1
188 #define REDIS_WARNING 2
189
190 /* Anti-warning macro... */
191 #define REDIS_NOTUSED(V) ((void) V)
192
193 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
194 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
195
196 /* Append only defines */
197 #define APPENDFSYNC_NO 0
198 #define APPENDFSYNC_ALWAYS 1
199 #define APPENDFSYNC_EVERYSEC 2
200
201 /*================================= Data types ============================== */
202
203 /* A redis object, that is a type able to hold a string / list / set */
204 typedef struct redisObject {
205 void *ptr;
206 unsigned char type;
207 unsigned char encoding;
208 unsigned char notused[2];
209 int refcount;
210 } robj;
211
212 typedef struct redisDb {
213 dict *dict;
214 dict *expires;
215 int id;
216 } redisDb;
217
218 /* With multiplexing we need to take per-clinet state.
219 * Clients are taken in a liked list. */
220 typedef struct redisClient {
221 int fd;
222 redisDb *db;
223 int dictid;
224 sds querybuf;
225 robj **argv, **mbargv;
226 int argc, mbargc;
227 int bulklen; /* bulk read len. -1 if not in bulk read mode */
228 int multibulk; /* multi bulk command format active */
229 list *reply;
230 int sentlen;
231 time_t lastinteraction; /* time of the last interaction, used for timeout */
232 int flags; /* REDIS_CLOSE | REDIS_SLAVE | REDIS_MONITOR */
233 int slaveseldb; /* slave selected db, if this client is a slave */
234 int authenticated; /* when requirepass is non-NULL */
235 int replstate; /* replication state if this is a slave */
236 int repldbfd; /* replication DB file descriptor */
237 long repldboff; /* replication DB file offset */
238 off_t repldbsize; /* replication DB file size */
239 } redisClient;
240
241 struct saveparam {
242 time_t seconds;
243 int changes;
244 };
245
246 /* Global server state structure */
247 struct redisServer {
248 int port;
249 int fd;
250 redisDb *db;
251 dict *sharingpool;
252 unsigned int sharingpoolsize;
253 long long dirty; /* changes to DB from the last save */
254 list *clients;
255 list *slaves, *monitors;
256 char neterr[ANET_ERR_LEN];
257 aeEventLoop *el;
258 int cronloops; /* number of times the cron function run */
259 list *objfreelist; /* A list of freed objects to avoid malloc() */
260 time_t lastsave; /* Unix time of last save succeeede */
261 size_t usedmemory; /* Used memory in megabytes */
262 /* Fields used only for stats */
263 time_t stat_starttime; /* server start time */
264 long long stat_numcommands; /* number of processed commands */
265 long long stat_numconnections; /* number of connections received */
266 /* Configuration */
267 int verbosity;
268 int glueoutputbuf;
269 int maxidletime;
270 int dbnum;
271 int daemonize;
272 int appendonly;
273 int appendfsync;
274 time_t lastfsync;
275 int appendfd;
276 int appendseldb;
277 char *pidfile;
278 int bgsaveinprogress;
279 pid_t bgsavechildpid;
280 struct saveparam *saveparams;
281 int saveparamslen;
282 char *logfile;
283 char *bindaddr;
284 char *dbfilename;
285 char *appendfilename;
286 char *requirepass;
287 int shareobjects;
288 /* Replication related */
289 int isslave;
290 char *masterauth;
291 char *masterhost;
292 int masterport;
293 redisClient *master; /* client that is master for this slave */
294 int replstate;
295 unsigned int maxclients;
296 unsigned long maxmemory;
297 /* Sort parameters - qsort_r() is only available under BSD so we
298 * have to take this state global, in order to pass it to sortCompare() */
299 int sort_desc;
300 int sort_alpha;
301 int sort_bypattern;
302 };
303
304 typedef void redisCommandProc(redisClient *c);
305 struct redisCommand {
306 char *name;
307 redisCommandProc *proc;
308 int arity;
309 int flags;
310 };
311
312 struct redisFunctionSym {
313 char *name;
314 unsigned long pointer;
315 };
316
317 typedef struct _redisSortObject {
318 robj *obj;
319 union {
320 double score;
321 robj *cmpobj;
322 } u;
323 } redisSortObject;
324
325 typedef struct _redisSortOperation {
326 int type;
327 robj *pattern;
328 } redisSortOperation;
329
330 /* ZSETs use a specialized version of Skiplists */
331
332 typedef struct zskiplistNode {
333 struct zskiplistNode **forward;
334 struct zskiplistNode *backward;
335 double score;
336 robj *obj;
337 } zskiplistNode;
338
339 typedef struct zskiplist {
340 struct zskiplistNode *header, *tail;
341 unsigned long length;
342 int level;
343 } zskiplist;
344
345 typedef struct zset {
346 dict *dict;
347 zskiplist *zsl;
348 } zset;
349
350 /* Our shared "common" objects */
351
352 struct sharedObjectsStruct {
353 robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
354 *colon, *nullbulk, *nullmultibulk,
355 *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
356 *outofrangeerr, *plus,
357 *select0, *select1, *select2, *select3, *select4,
358 *select5, *select6, *select7, *select8, *select9;
359 } shared;
360
361 /* Global vars that are actally used as constants. The following double
362 * values are used for double on-disk serialization, and are initialized
363 * at runtime to avoid strange compiler optimizations. */
364
365 static double R_Zero, R_PosInf, R_NegInf, R_Nan;
366
367 /*================================ Prototypes =============================== */
368
369 static void freeStringObject(robj *o);
370 static void freeListObject(robj *o);
371 static void freeSetObject(robj *o);
372 static void decrRefCount(void *o);
373 static robj *createObject(int type, void *ptr);
374 static void freeClient(redisClient *c);
375 static int rdbLoad(char *filename);
376 static void addReply(redisClient *c, robj *obj);
377 static void addReplySds(redisClient *c, sds s);
378 static void incrRefCount(robj *o);
379 static int rdbSaveBackground(char *filename);
380 static robj *createStringObject(char *ptr, size_t len);
381 static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc);
382 static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
383 static int syncWithMaster(void);
384 static robj *tryObjectSharing(robj *o);
385 static int tryObjectEncoding(robj *o);
386 static robj *getDecodedObject(const robj *o);
387 static int removeExpire(redisDb *db, robj *key);
388 static int expireIfNeeded(redisDb *db, robj *key);
389 static int deleteIfVolatile(redisDb *db, robj *key);
390 static int deleteKey(redisDb *db, robj *key);
391 static time_t getExpire(redisDb *db, robj *key);
392 static int setExpire(redisDb *db, robj *key, time_t when);
393 static void updateSlavesWaitingBgsave(int bgsaveerr);
394 static void freeMemoryIfNeeded(void);
395 static int processCommand(redisClient *c);
396 static void setupSigSegvAction(void);
397 static void rdbRemoveTempFile(pid_t childpid);
398 static size_t stringObjectLen(robj *o);
399 static void processInputBuffer(redisClient *c);
400 static zskiplist *zslCreate(void);
401 static void zslFree(zskiplist *zsl);
402 static void zslInsert(zskiplist *zsl, double score, robj *obj);
403 static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask);
404
405 static void authCommand(redisClient *c);
406 static void pingCommand(redisClient *c);
407 static void echoCommand(redisClient *c);
408 static void setCommand(redisClient *c);
409 static void setnxCommand(redisClient *c);
410 static void getCommand(redisClient *c);
411 static void delCommand(redisClient *c);
412 static void existsCommand(redisClient *c);
413 static void incrCommand(redisClient *c);
414 static void decrCommand(redisClient *c);
415 static void incrbyCommand(redisClient *c);
416 static void decrbyCommand(redisClient *c);
417 static void selectCommand(redisClient *c);
418 static void randomkeyCommand(redisClient *c);
419 static void keysCommand(redisClient *c);
420 static void dbsizeCommand(redisClient *c);
421 static void lastsaveCommand(redisClient *c);
422 static void saveCommand(redisClient *c);
423 static void bgsaveCommand(redisClient *c);
424 static void shutdownCommand(redisClient *c);
425 static void moveCommand(redisClient *c);
426 static void renameCommand(redisClient *c);
427 static void renamenxCommand(redisClient *c);
428 static void lpushCommand(redisClient *c);
429 static void rpushCommand(redisClient *c);
430 static void lpopCommand(redisClient *c);
431 static void rpopCommand(redisClient *c);
432 static void llenCommand(redisClient *c);
433 static void lindexCommand(redisClient *c);
434 static void lrangeCommand(redisClient *c);
435 static void ltrimCommand(redisClient *c);
436 static void typeCommand(redisClient *c);
437 static void lsetCommand(redisClient *c);
438 static void saddCommand(redisClient *c);
439 static void sremCommand(redisClient *c);
440 static void smoveCommand(redisClient *c);
441 static void sismemberCommand(redisClient *c);
442 static void scardCommand(redisClient *c);
443 static void spopCommand(redisClient *c);
444 static void srandmemberCommand(redisClient *c);
445 static void sinterCommand(redisClient *c);
446 static void sinterstoreCommand(redisClient *c);
447 static void sunionCommand(redisClient *c);
448 static void sunionstoreCommand(redisClient *c);
449 static void sdiffCommand(redisClient *c);
450 static void sdiffstoreCommand(redisClient *c);
451 static void syncCommand(redisClient *c);
452 static void flushdbCommand(redisClient *c);
453 static void flushallCommand(redisClient *c);
454 static void sortCommand(redisClient *c);
455 static void lremCommand(redisClient *c);
456 static void rpoplpushcommand(redisClient *c);
457 static void infoCommand(redisClient *c);
458 static void mgetCommand(redisClient *c);
459 static void monitorCommand(redisClient *c);
460 static void expireCommand(redisClient *c);
461 static void expireatCommand(redisClient *c);
462 static void getsetCommand(redisClient *c);
463 static void ttlCommand(redisClient *c);
464 static void slaveofCommand(redisClient *c);
465 static void debugCommand(redisClient *c);
466 static void msetCommand(redisClient *c);
467 static void msetnxCommand(redisClient *c);
468 static void zaddCommand(redisClient *c);
469 static void zincrbyCommand(redisClient *c);
470 static void zrangeCommand(redisClient *c);
471 static void zrangebyscoreCommand(redisClient *c);
472 static void zrevrangeCommand(redisClient *c);
473 static void zcardCommand(redisClient *c);
474 static void zremCommand(redisClient *c);
475 static void zscoreCommand(redisClient *c);
476 static void zremrangebyscoreCommand(redisClient *c);
477
478 /*================================= Globals ================================= */
479
480 /* Global vars */
481 static struct redisServer server; /* server global state */
482 static struct redisCommand cmdTable[] = {
483 {"get",getCommand,2,REDIS_CMD_INLINE},
484 {"set",setCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
485 {"setnx",setnxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
486 {"del",delCommand,-2,REDIS_CMD_INLINE},
487 {"exists",existsCommand,2,REDIS_CMD_INLINE},
488 {"incr",incrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
489 {"decr",decrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
490 {"mget",mgetCommand,-2,REDIS_CMD_INLINE},
491 {"rpush",rpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
492 {"lpush",lpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
493 {"rpop",rpopCommand,2,REDIS_CMD_INLINE},
494 {"lpop",lpopCommand,2,REDIS_CMD_INLINE},
495 {"llen",llenCommand,2,REDIS_CMD_INLINE},
496 {"lindex",lindexCommand,3,REDIS_CMD_INLINE},
497 {"lset",lsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
498 {"lrange",lrangeCommand,4,REDIS_CMD_INLINE},
499 {"ltrim",ltrimCommand,4,REDIS_CMD_INLINE},
500 {"lrem",lremCommand,4,REDIS_CMD_BULK},
501 {"rpoplpush",rpoplpushcommand,3,REDIS_CMD_BULK},
502 {"sadd",saddCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
503 {"srem",sremCommand,3,REDIS_CMD_BULK},
504 {"smove",smoveCommand,4,REDIS_CMD_BULK},
505 {"sismember",sismemberCommand,3,REDIS_CMD_BULK},
506 {"scard",scardCommand,2,REDIS_CMD_INLINE},
507 {"spop",spopCommand,2,REDIS_CMD_INLINE},
508 {"srandmember",srandmemberCommand,2,REDIS_CMD_INLINE},
509 {"sinter",sinterCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
510 {"sinterstore",sinterstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
511 {"sunion",sunionCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
512 {"sunionstore",sunionstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
513 {"sdiff",sdiffCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
514 {"sdiffstore",sdiffstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
515 {"smembers",sinterCommand,2,REDIS_CMD_INLINE},
516 {"zadd",zaddCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
517 {"zincrby",zincrbyCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
518 {"zrem",zremCommand,3,REDIS_CMD_BULK},
519 {"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE},
520 {"zrange",zrangeCommand,4,REDIS_CMD_INLINE},
521 {"zrangebyscore",zrangebyscoreCommand,4,REDIS_CMD_INLINE},
522 {"zrevrange",zrevrangeCommand,4,REDIS_CMD_INLINE},
523 {"zcard",zcardCommand,2,REDIS_CMD_INLINE},
524 {"zscore",zscoreCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
525 {"incrby",incrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
526 {"decrby",decrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
527 {"getset",getsetCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
528 {"mset",msetCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
529 {"msetnx",msetnxCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
530 {"randomkey",randomkeyCommand,1,REDIS_CMD_INLINE},
531 {"select",selectCommand,2,REDIS_CMD_INLINE},
532 {"move",moveCommand,3,REDIS_CMD_INLINE},
533 {"rename",renameCommand,3,REDIS_CMD_INLINE},
534 {"renamenx",renamenxCommand,3,REDIS_CMD_INLINE},
535 {"expire",expireCommand,3,REDIS_CMD_INLINE},
536 {"expireat",expireatCommand,3,REDIS_CMD_INLINE},
537 {"keys",keysCommand,2,REDIS_CMD_INLINE},
538 {"dbsize",dbsizeCommand,1,REDIS_CMD_INLINE},
539 {"auth",authCommand,2,REDIS_CMD_INLINE},
540 {"ping",pingCommand,1,REDIS_CMD_INLINE},
541 {"echo",echoCommand,2,REDIS_CMD_BULK},
542 {"save",saveCommand,1,REDIS_CMD_INLINE},
543 {"bgsave",bgsaveCommand,1,REDIS_CMD_INLINE},
544 {"shutdown",shutdownCommand,1,REDIS_CMD_INLINE},
545 {"lastsave",lastsaveCommand,1,REDIS_CMD_INLINE},
546 {"type",typeCommand,2,REDIS_CMD_INLINE},
547 {"sync",syncCommand,1,REDIS_CMD_INLINE},
548 {"flushdb",flushdbCommand,1,REDIS_CMD_INLINE},
549 {"flushall",flushallCommand,1,REDIS_CMD_INLINE},
550 {"sort",sortCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
551 {"info",infoCommand,1,REDIS_CMD_INLINE},
552 {"monitor",monitorCommand,1,REDIS_CMD_INLINE},
553 {"ttl",ttlCommand,2,REDIS_CMD_INLINE},
554 {"slaveof",slaveofCommand,3,REDIS_CMD_INLINE},
555 {"debug",debugCommand,-2,REDIS_CMD_INLINE},
556 {NULL,NULL,0,0}
557 };
558
559 /*============================ Utility functions ============================ */
560
561 /* Glob-style pattern matching. */
562 int stringmatchlen(const char *pattern, int patternLen,
563 const char *string, int stringLen, int nocase)
564 {
565 while(patternLen) {
566 switch(pattern[0]) {
567 case '*':
568 while (pattern[1] == '*') {
569 pattern++;
570 patternLen--;
571 }
572 if (patternLen == 1)
573 return 1; /* match */
574 while(stringLen) {
575 if (stringmatchlen(pattern+1, patternLen-1,
576 string, stringLen, nocase))
577 return 1; /* match */
578 string++;
579 stringLen--;
580 }
581 return 0; /* no match */
582 break;
583 case '?':
584 if (stringLen == 0)
585 return 0; /* no match */
586 string++;
587 stringLen--;
588 break;
589 case '[':
590 {
591 int not, match;
592
593 pattern++;
594 patternLen--;
595 not = pattern[0] == '^';
596 if (not) {
597 pattern++;
598 patternLen--;
599 }
600 match = 0;
601 while(1) {
602 if (pattern[0] == '\\') {
603 pattern++;
604 patternLen--;
605 if (pattern[0] == string[0])
606 match = 1;
607 } else if (pattern[0] == ']') {
608 break;
609 } else if (patternLen == 0) {
610 pattern--;
611 patternLen++;
612 break;
613 } else if (pattern[1] == '-' && patternLen >= 3) {
614 int start = pattern[0];
615 int end = pattern[2];
616 int c = string[0];
617 if (start > end) {
618 int t = start;
619 start = end;
620 end = t;
621 }
622 if (nocase) {
623 start = tolower(start);
624 end = tolower(end);
625 c = tolower(c);
626 }
627 pattern += 2;
628 patternLen -= 2;
629 if (c >= start && c <= end)
630 match = 1;
631 } else {
632 if (!nocase) {
633 if (pattern[0] == string[0])
634 match = 1;
635 } else {
636 if (tolower((int)pattern[0]) == tolower((int)string[0]))
637 match = 1;
638 }
639 }
640 pattern++;
641 patternLen--;
642 }
643 if (not)
644 match = !match;
645 if (!match)
646 return 0; /* no match */
647 string++;
648 stringLen--;
649 break;
650 }
651 case '\\':
652 if (patternLen >= 2) {
653 pattern++;
654 patternLen--;
655 }
656 /* fall through */
657 default:
658 if (!nocase) {
659 if (pattern[0] != string[0])
660 return 0; /* no match */
661 } else {
662 if (tolower((int)pattern[0]) != tolower((int)string[0]))
663 return 0; /* no match */
664 }
665 string++;
666 stringLen--;
667 break;
668 }
669 pattern++;
670 patternLen--;
671 if (stringLen == 0) {
672 while(*pattern == '*') {
673 pattern++;
674 patternLen--;
675 }
676 break;
677 }
678 }
679 if (patternLen == 0 && stringLen == 0)
680 return 1;
681 return 0;
682 }
683
684 static void redisLog(int level, const char *fmt, ...) {
685 va_list ap;
686 FILE *fp;
687
688 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
689 if (!fp) return;
690
691 va_start(ap, fmt);
692 if (level >= server.verbosity) {
693 char *c = ".-*";
694 char buf[64];
695 time_t now;
696
697 now = time(NULL);
698 strftime(buf,64,"%d %b %H:%M:%S",localtime(&now));
699 fprintf(fp,"%s %c ",buf,c[level]);
700 vfprintf(fp, fmt, ap);
701 fprintf(fp,"\n");
702 fflush(fp);
703 }
704 va_end(ap);
705
706 if (server.logfile) fclose(fp);
707 }
708
709 /*====================== Hash table type implementation ==================== */
710
711 /* This is an hash table type that uses the SDS dynamic strings libary as
712 * keys and radis objects as values (objects can hold SDS strings,
713 * lists, sets). */
714
715 static void dictVanillaFree(void *privdata, void *val)
716 {
717 DICT_NOTUSED(privdata);
718 zfree(val);
719 }
720
721 static int sdsDictKeyCompare(void *privdata, const void *key1,
722 const void *key2)
723 {
724 int l1,l2;
725 DICT_NOTUSED(privdata);
726
727 l1 = sdslen((sds)key1);
728 l2 = sdslen((sds)key2);
729 if (l1 != l2) return 0;
730 return memcmp(key1, key2, l1) == 0;
731 }
732
733 static void dictRedisObjectDestructor(void *privdata, void *val)
734 {
735 DICT_NOTUSED(privdata);
736
737 decrRefCount(val);
738 }
739
740 static int dictObjKeyCompare(void *privdata, const void *key1,
741 const void *key2)
742 {
743 const robj *o1 = key1, *o2 = key2;
744 return sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
745 }
746
747 static unsigned int dictObjHash(const void *key) {
748 const robj *o = key;
749 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
750 }
751
752 static int dictEncObjKeyCompare(void *privdata, const void *key1,
753 const void *key2)
754 {
755 const robj *o1 = key1, *o2 = key2;
756
757 if (o1->encoding == REDIS_ENCODING_RAW &&
758 o2->encoding == REDIS_ENCODING_RAW)
759 return sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
760 else {
761 robj *dec1, *dec2;
762 int cmp;
763
764 dec1 = o1->encoding != REDIS_ENCODING_RAW ?
765 getDecodedObject(o1) : (robj*)o1;
766 dec2 = o2->encoding != REDIS_ENCODING_RAW ?
767 getDecodedObject(o2) : (robj*)o2;
768 cmp = sdsDictKeyCompare(privdata,dec1->ptr,dec2->ptr);
769 if (dec1 != o1) decrRefCount(dec1);
770 if (dec2 != o2) decrRefCount(dec2);
771 return cmp;
772 }
773 }
774
775 static unsigned int dictEncObjHash(const void *key) {
776 const robj *o = key;
777
778 if (o->encoding == REDIS_ENCODING_RAW)
779 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
780 else {
781 robj *dec = getDecodedObject(o);
782 unsigned int hash = dictGenHashFunction(dec->ptr, sdslen((sds)dec->ptr));
783 decrRefCount(dec);
784 return hash;
785 }
786 }
787
788 static dictType setDictType = {
789 dictEncObjHash, /* hash function */
790 NULL, /* key dup */
791 NULL, /* val dup */
792 dictEncObjKeyCompare, /* key compare */
793 dictRedisObjectDestructor, /* key destructor */
794 NULL /* val destructor */
795 };
796
797 static dictType zsetDictType = {
798 dictEncObjHash, /* hash function */
799 NULL, /* key dup */
800 NULL, /* val dup */
801 dictEncObjKeyCompare, /* key compare */
802 dictRedisObjectDestructor, /* key destructor */
803 dictVanillaFree /* val destructor */
804 };
805
806 static dictType hashDictType = {
807 dictObjHash, /* hash function */
808 NULL, /* key dup */
809 NULL, /* val dup */
810 dictObjKeyCompare, /* key compare */
811 dictRedisObjectDestructor, /* key destructor */
812 dictRedisObjectDestructor /* val destructor */
813 };
814
815 /* ========================= Random utility functions ======================= */
816
817 /* Redis generally does not try to recover from out of memory conditions
818 * when allocating objects or strings, it is not clear if it will be possible
819 * to report this condition to the client since the networking layer itself
820 * is based on heap allocation for send buffers, so we simply abort.
821 * At least the code will be simpler to read... */
822 static void oom(const char *msg) {
823 fprintf(stderr, "%s: Out of memory\n",msg);
824 fflush(stderr);
825 sleep(1);
826 abort();
827 }
828
829 /* ====================== Redis server networking stuff ===================== */
830 static void closeTimedoutClients(void) {
831 redisClient *c;
832 listNode *ln;
833 time_t now = time(NULL);
834
835 listRewind(server.clients);
836 while ((ln = listYield(server.clients)) != NULL) {
837 c = listNodeValue(ln);
838 if (!(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
839 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
840 (now - c->lastinteraction > server.maxidletime)) {
841 redisLog(REDIS_DEBUG,"Closing idle client");
842 freeClient(c);
843 }
844 }
845 }
846
847 static int htNeedsResize(dict *dict) {
848 long long size, used;
849
850 size = dictSlots(dict);
851 used = dictSize(dict);
852 return (size && used && size > DICT_HT_INITIAL_SIZE &&
853 (used*100/size < REDIS_HT_MINFILL));
854 }
855
856 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
857 * we resize the hash table to save memory */
858 static void tryResizeHashTables(void) {
859 int j;
860
861 for (j = 0; j < server.dbnum; j++) {
862 if (htNeedsResize(server.db[j].dict)) {
863 redisLog(REDIS_DEBUG,"The hash table %d is too sparse, resize it...",j);
864 dictResize(server.db[j].dict);
865 redisLog(REDIS_DEBUG,"Hash table %d resized.",j);
866 }
867 if (htNeedsResize(server.db[j].expires))
868 dictResize(server.db[j].expires);
869 }
870 }
871
872 static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
873 int j, loops = server.cronloops++;
874 REDIS_NOTUSED(eventLoop);
875 REDIS_NOTUSED(id);
876 REDIS_NOTUSED(clientData);
877
878 /* Update the global state with the amount of used memory */
879 server.usedmemory = zmalloc_used_memory();
880
881 /* Show some info about non-empty databases */
882 for (j = 0; j < server.dbnum; j++) {
883 long long size, used, vkeys;
884
885 size = dictSlots(server.db[j].dict);
886 used = dictSize(server.db[j].dict);
887 vkeys = dictSize(server.db[j].expires);
888 if (!(loops % 5) && (used || vkeys)) {
889 redisLog(REDIS_DEBUG,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
890 /* dictPrintStats(server.dict); */
891 }
892 }
893
894 /* We don't want to resize the hash tables while a bacground saving
895 * is in progress: the saving child is created using fork() that is
896 * implemented with a copy-on-write semantic in most modern systems, so
897 * if we resize the HT while there is the saving child at work actually
898 * a lot of memory movements in the parent will cause a lot of pages
899 * copied. */
900 if (!server.bgsaveinprogress) tryResizeHashTables();
901
902 /* Show information about connected clients */
903 if (!(loops % 5)) {
904 redisLog(REDIS_DEBUG,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
905 listLength(server.clients)-listLength(server.slaves),
906 listLength(server.slaves),
907 server.usedmemory,
908 dictSize(server.sharingpool));
909 }
910
911 /* Close connections of timedout clients */
912 if (server.maxidletime && !(loops % 10))
913 closeTimedoutClients();
914
915 /* Check if a background saving in progress terminated */
916 if (server.bgsaveinprogress) {
917 int statloc;
918 if (wait3(&statloc,WNOHANG,NULL)) {
919 int exitcode = WEXITSTATUS(statloc);
920 int bysignal = WIFSIGNALED(statloc);
921
922 if (!bysignal && exitcode == 0) {
923 redisLog(REDIS_NOTICE,
924 "Background saving terminated with success");
925 server.dirty = 0;
926 server.lastsave = time(NULL);
927 } else if (!bysignal && exitcode != 0) {
928 redisLog(REDIS_WARNING, "Background saving error");
929 } else {
930 redisLog(REDIS_WARNING,
931 "Background saving terminated by signal");
932 rdbRemoveTempFile(server.bgsavechildpid);
933 }
934 server.bgsaveinprogress = 0;
935 server.bgsavechildpid = -1;
936 updateSlavesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
937 }
938 } else {
939 /* If there is not a background saving in progress check if
940 * we have to save now */
941 time_t now = time(NULL);
942 for (j = 0; j < server.saveparamslen; j++) {
943 struct saveparam *sp = server.saveparams+j;
944
945 if (server.dirty >= sp->changes &&
946 now-server.lastsave > sp->seconds) {
947 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
948 sp->changes, sp->seconds);
949 rdbSaveBackground(server.dbfilename);
950 break;
951 }
952 }
953 }
954
955 /* Try to expire a few timed out keys. The algorithm used is adaptive and
956 * will use few CPU cycles if there are few expiring keys, otherwise
957 * it will get more aggressive to avoid that too much memory is used by
958 * keys that can be removed from the keyspace. */
959 for (j = 0; j < server.dbnum; j++) {
960 int expired;
961 redisDb *db = server.db+j;
962
963 /* Continue to expire if at the end of the cycle more than 25%
964 * of the keys were expired. */
965 do {
966 int num = dictSize(db->expires);
967 time_t now = time(NULL);
968
969 expired = 0;
970 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
971 num = REDIS_EXPIRELOOKUPS_PER_CRON;
972 while (num--) {
973 dictEntry *de;
974 time_t t;
975
976 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
977 t = (time_t) dictGetEntryVal(de);
978 if (now > t) {
979 deleteKey(db,dictGetEntryKey(de));
980 expired++;
981 }
982 }
983 } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
984 }
985
986 /* Check if we should connect to a MASTER */
987 if (server.replstate == REDIS_REPL_CONNECT) {
988 redisLog(REDIS_NOTICE,"Connecting to MASTER...");
989 if (syncWithMaster() == REDIS_OK) {
990 redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync succeeded");
991 }
992 }
993 return 1000;
994 }
995
996 static void createSharedObjects(void) {
997 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
998 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
999 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
1000 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
1001 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
1002 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
1003 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
1004 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
1005 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
1006 /* no such key */
1007 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
1008 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
1009 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1010 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
1011 "-ERR no such key\r\n"));
1012 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
1013 "-ERR syntax error\r\n"));
1014 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
1015 "-ERR source and destination objects are the same\r\n"));
1016 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
1017 "-ERR index out of range\r\n"));
1018 shared.space = createObject(REDIS_STRING,sdsnew(" "));
1019 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
1020 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
1021 shared.select0 = createStringObject("select 0\r\n",10);
1022 shared.select1 = createStringObject("select 1\r\n",10);
1023 shared.select2 = createStringObject("select 2\r\n",10);
1024 shared.select3 = createStringObject("select 3\r\n",10);
1025 shared.select4 = createStringObject("select 4\r\n",10);
1026 shared.select5 = createStringObject("select 5\r\n",10);
1027 shared.select6 = createStringObject("select 6\r\n",10);
1028 shared.select7 = createStringObject("select 7\r\n",10);
1029 shared.select8 = createStringObject("select 8\r\n",10);
1030 shared.select9 = createStringObject("select 9\r\n",10);
1031 }
1032
1033 static void appendServerSaveParams(time_t seconds, int changes) {
1034 server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
1035 server.saveparams[server.saveparamslen].seconds = seconds;
1036 server.saveparams[server.saveparamslen].changes = changes;
1037 server.saveparamslen++;
1038 }
1039
1040 static void resetServerSaveParams() {
1041 zfree(server.saveparams);
1042 server.saveparams = NULL;
1043 server.saveparamslen = 0;
1044 }
1045
1046 static void initServerConfig() {
1047 server.dbnum = REDIS_DEFAULT_DBNUM;
1048 server.port = REDIS_SERVERPORT;
1049 server.verbosity = REDIS_DEBUG;
1050 server.maxidletime = REDIS_MAXIDLETIME;
1051 server.saveparams = NULL;
1052 server.logfile = NULL; /* NULL = log on standard output */
1053 server.bindaddr = NULL;
1054 server.glueoutputbuf = 1;
1055 server.daemonize = 0;
1056 server.appendonly = 0;
1057 server.appendfsync = APPENDFSYNC_ALWAYS;
1058 server.lastfsync = time(NULL);
1059 server.appendfd = -1;
1060 server.appendseldb = -1; /* Make sure the first time will not match */
1061 server.pidfile = "/var/run/redis.pid";
1062 server.dbfilename = "dump.rdb";
1063 server.appendfilename = "appendonly.log";
1064 server.requirepass = NULL;
1065 server.shareobjects = 0;
1066 server.sharingpoolsize = 1024;
1067 server.maxclients = 0;
1068 server.maxmemory = 0;
1069 resetServerSaveParams();
1070
1071 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1072 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1073 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1074 /* Replication related */
1075 server.isslave = 0;
1076 server.masterauth = NULL;
1077 server.masterhost = NULL;
1078 server.masterport = 6379;
1079 server.master = NULL;
1080 server.replstate = REDIS_REPL_NONE;
1081
1082 /* Double constants initialization */
1083 R_Zero = 0.0;
1084 R_PosInf = 1.0/R_Zero;
1085 R_NegInf = -1.0/R_Zero;
1086 R_Nan = R_Zero/R_Zero;
1087 }
1088
1089 static void initServer() {
1090 int j;
1091
1092 signal(SIGHUP, SIG_IGN);
1093 signal(SIGPIPE, SIG_IGN);
1094 setupSigSegvAction();
1095
1096 server.clients = listCreate();
1097 server.slaves = listCreate();
1098 server.monitors = listCreate();
1099 server.objfreelist = listCreate();
1100 createSharedObjects();
1101 server.el = aeCreateEventLoop();
1102 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
1103 server.sharingpool = dictCreate(&setDictType,NULL);
1104 server.fd = anetTcpServer(server.neterr, server.port, server.bindaddr);
1105 if (server.fd == -1) {
1106 redisLog(REDIS_WARNING, "Opening TCP port: %s", server.neterr);
1107 exit(1);
1108 }
1109 for (j = 0; j < server.dbnum; j++) {
1110 server.db[j].dict = dictCreate(&hashDictType,NULL);
1111 server.db[j].expires = dictCreate(&setDictType,NULL);
1112 server.db[j].id = j;
1113 }
1114 server.cronloops = 0;
1115 server.bgsaveinprogress = 0;
1116 server.bgsavechildpid = -1;
1117 server.lastsave = time(NULL);
1118 server.dirty = 0;
1119 server.usedmemory = 0;
1120 server.stat_numcommands = 0;
1121 server.stat_numconnections = 0;
1122 server.stat_starttime = time(NULL);
1123 aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
1124
1125 if (server.appendonly) {
1126 server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644);
1127 if (server.appendfd == -1) {
1128 redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
1129 strerror(errno));
1130 exit(1);
1131 }
1132 }
1133 }
1134
1135 /* Empty the whole database */
1136 static long long emptyDb() {
1137 int j;
1138 long long removed = 0;
1139
1140 for (j = 0; j < server.dbnum; j++) {
1141 removed += dictSize(server.db[j].dict);
1142 dictEmpty(server.db[j].dict);
1143 dictEmpty(server.db[j].expires);
1144 }
1145 return removed;
1146 }
1147
1148 static int yesnotoi(char *s) {
1149 if (!strcasecmp(s,"yes")) return 1;
1150 else if (!strcasecmp(s,"no")) return 0;
1151 else return -1;
1152 }
1153
1154 /* I agree, this is a very rudimental way to load a configuration...
1155 will improve later if the config gets more complex */
1156 static void loadServerConfig(char *filename) {
1157 FILE *fp;
1158 char buf[REDIS_CONFIGLINE_MAX+1], *err = NULL;
1159 int linenum = 0;
1160 sds line = NULL;
1161
1162 if (filename[0] == '-' && filename[1] == '\0')
1163 fp = stdin;
1164 else {
1165 if ((fp = fopen(filename,"r")) == NULL) {
1166 redisLog(REDIS_WARNING,"Fatal error, can't open config file");
1167 exit(1);
1168 }
1169 }
1170
1171 while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
1172 sds *argv;
1173 int argc, j;
1174
1175 linenum++;
1176 line = sdsnew(buf);
1177 line = sdstrim(line," \t\r\n");
1178
1179 /* Skip comments and blank lines*/
1180 if (line[0] == '#' || line[0] == '\0') {
1181 sdsfree(line);
1182 continue;
1183 }
1184
1185 /* Split into arguments */
1186 argv = sdssplitlen(line,sdslen(line)," ",1,&argc);
1187 sdstolower(argv[0]);
1188
1189 /* Execute config directives */
1190 if (!strcasecmp(argv[0],"timeout") && argc == 2) {
1191 server.maxidletime = atoi(argv[1]);
1192 if (server.maxidletime < 0) {
1193 err = "Invalid timeout value"; goto loaderr;
1194 }
1195 } else if (!strcasecmp(argv[0],"port") && argc == 2) {
1196 server.port = atoi(argv[1]);
1197 if (server.port < 1 || server.port > 65535) {
1198 err = "Invalid port"; goto loaderr;
1199 }
1200 } else if (!strcasecmp(argv[0],"bind") && argc == 2) {
1201 server.bindaddr = zstrdup(argv[1]);
1202 } else if (!strcasecmp(argv[0],"save") && argc == 3) {
1203 int seconds = atoi(argv[1]);
1204 int changes = atoi(argv[2]);
1205 if (seconds < 1 || changes < 0) {
1206 err = "Invalid save parameters"; goto loaderr;
1207 }
1208 appendServerSaveParams(seconds,changes);
1209 } else if (!strcasecmp(argv[0],"dir") && argc == 2) {
1210 if (chdir(argv[1]) == -1) {
1211 redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
1212 argv[1], strerror(errno));
1213 exit(1);
1214 }
1215 } else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
1216 if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
1217 else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
1218 else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
1219 else {
1220 err = "Invalid log level. Must be one of debug, notice, warning";
1221 goto loaderr;
1222 }
1223 } else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
1224 FILE *logfp;
1225
1226 server.logfile = zstrdup(argv[1]);
1227 if (!strcasecmp(server.logfile,"stdout")) {
1228 zfree(server.logfile);
1229 server.logfile = NULL;
1230 }
1231 if (server.logfile) {
1232 /* Test if we are able to open the file. The server will not
1233 * be able to abort just for this problem later... */
1234 logfp = fopen(server.logfile,"a");
1235 if (logfp == NULL) {
1236 err = sdscatprintf(sdsempty(),
1237 "Can't open the log file: %s", strerror(errno));
1238 goto loaderr;
1239 }
1240 fclose(logfp);
1241 }
1242 } else if (!strcasecmp(argv[0],"databases") && argc == 2) {
1243 server.dbnum = atoi(argv[1]);
1244 if (server.dbnum < 1) {
1245 err = "Invalid number of databases"; goto loaderr;
1246 }
1247 } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) {
1248 server.maxclients = atoi(argv[1]);
1249 } else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
1250 server.maxmemory = strtoll(argv[1], NULL, 10);
1251 } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
1252 server.masterhost = sdsnew(argv[1]);
1253 server.masterport = atoi(argv[2]);
1254 server.replstate = REDIS_REPL_CONNECT;
1255 } else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
1256 server.masterauth = zstrdup(argv[1]);
1257 } else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) {
1258 if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) {
1259 err = "argument must be 'yes' or 'no'"; goto loaderr;
1260 }
1261 } else if (!strcasecmp(argv[0],"shareobjects") && argc == 2) {
1262 if ((server.shareobjects = yesnotoi(argv[1])) == -1) {
1263 err = "argument must be 'yes' or 'no'"; goto loaderr;
1264 }
1265 } else if (!strcasecmp(argv[0],"shareobjectspoolsize") && argc == 2) {
1266 server.sharingpoolsize = atoi(argv[1]);
1267 if (server.sharingpoolsize < 1) {
1268 err = "invalid object sharing pool size"; goto loaderr;
1269 }
1270 } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
1271 if ((server.daemonize = yesnotoi(argv[1])) == -1) {
1272 err = "argument must be 'yes' or 'no'"; goto loaderr;
1273 }
1274 } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
1275 if ((server.appendonly = yesnotoi(argv[1])) == -1) {
1276 err = "argument must be 'yes' or 'no'"; goto loaderr;
1277 }
1278 } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
1279 if (!strcasecmp(argv[1],"no")) {
1280 server.appendfsync = APPENDFSYNC_NO;
1281 } else if (!strcasecmp(argv[1],"always")) {
1282 server.appendfsync = APPENDFSYNC_ALWAYS;
1283 } else if (!strcasecmp(argv[1],"everysec")) {
1284 server.appendfsync = APPENDFSYNC_EVERYSEC;
1285 } else {
1286 err = "argument must be 'no', 'always' or 'everysec'";
1287 goto loaderr;
1288 }
1289 } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
1290 server.requirepass = zstrdup(argv[1]);
1291 } else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
1292 server.pidfile = zstrdup(argv[1]);
1293 } else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
1294 server.dbfilename = zstrdup(argv[1]);
1295 } else {
1296 err = "Bad directive or wrong number of arguments"; goto loaderr;
1297 }
1298 for (j = 0; j < argc; j++)
1299 sdsfree(argv[j]);
1300 zfree(argv);
1301 sdsfree(line);
1302 }
1303 if (fp != stdin) fclose(fp);
1304 return;
1305
1306 loaderr:
1307 fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR ***\n");
1308 fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
1309 fprintf(stderr, ">>> '%s'\n", line);
1310 fprintf(stderr, "%s\n", err);
1311 exit(1);
1312 }
1313
1314 static void freeClientArgv(redisClient *c) {
1315 int j;
1316
1317 for (j = 0; j < c->argc; j++)
1318 decrRefCount(c->argv[j]);
1319 for (j = 0; j < c->mbargc; j++)
1320 decrRefCount(c->mbargv[j]);
1321 c->argc = 0;
1322 c->mbargc = 0;
1323 }
1324
1325 static void freeClient(redisClient *c) {
1326 listNode *ln;
1327
1328 aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
1329 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1330 sdsfree(c->querybuf);
1331 listRelease(c->reply);
1332 freeClientArgv(c);
1333 close(c->fd);
1334 ln = listSearchKey(server.clients,c);
1335 assert(ln != NULL);
1336 listDelNode(server.clients,ln);
1337 if (c->flags & REDIS_SLAVE) {
1338 if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
1339 close(c->repldbfd);
1340 list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
1341 ln = listSearchKey(l,c);
1342 assert(ln != NULL);
1343 listDelNode(l,ln);
1344 }
1345 if (c->flags & REDIS_MASTER) {
1346 server.master = NULL;
1347 server.replstate = REDIS_REPL_CONNECT;
1348 }
1349 zfree(c->argv);
1350 zfree(c->mbargv);
1351 zfree(c);
1352 }
1353
1354 #define GLUEREPLY_UP_TO (1024)
1355 static void glueReplyBuffersIfNeeded(redisClient *c) {
1356 int copylen = 0;
1357 char buf[GLUEREPLY_UP_TO];
1358 listNode *ln;
1359 robj *o;
1360
1361 listRewind(c->reply);
1362 while((ln = listYield(c->reply))) {
1363 int objlen;
1364
1365 o = ln->value;
1366 objlen = sdslen(o->ptr);
1367 if (copylen + objlen <= GLUEREPLY_UP_TO) {
1368 memcpy(buf+copylen,o->ptr,objlen);
1369 copylen += objlen;
1370 listDelNode(c->reply,ln);
1371 } else {
1372 if (copylen == 0) return;
1373 break;
1374 }
1375 }
1376 /* Now the output buffer is empty, add the new single element */
1377 o = createObject(REDIS_STRING,sdsnewlen(buf,copylen));
1378 listAddNodeHead(c->reply,o);
1379 }
1380
1381 static void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1382 redisClient *c = privdata;
1383 int nwritten = 0, totwritten = 0, objlen;
1384 robj *o;
1385 REDIS_NOTUSED(el);
1386 REDIS_NOTUSED(mask);
1387
1388 /* Use writev() if we have enough buffers to send */
1389 if (!server.glueoutputbuf &&
1390 listLength(c->reply) > REDIS_WRITEV_THRESHOLD &&
1391 !(c->flags & REDIS_MASTER))
1392 {
1393 sendReplyToClientWritev(el, fd, privdata, mask);
1394 return;
1395 }
1396
1397 while(listLength(c->reply)) {
1398 if (server.glueoutputbuf && listLength(c->reply) > 1)
1399 glueReplyBuffersIfNeeded(c);
1400
1401 o = listNodeValue(listFirst(c->reply));
1402 objlen = sdslen(o->ptr);
1403
1404 if (objlen == 0) {
1405 listDelNode(c->reply,listFirst(c->reply));
1406 continue;
1407 }
1408
1409 if (c->flags & REDIS_MASTER) {
1410 /* Don't reply to a master */
1411 nwritten = objlen - c->sentlen;
1412 } else {
1413 nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen);
1414 if (nwritten <= 0) break;
1415 }
1416 c->sentlen += nwritten;
1417 totwritten += nwritten;
1418 /* If we fully sent the object on head go to the next one */
1419 if (c->sentlen == objlen) {
1420 listDelNode(c->reply,listFirst(c->reply));
1421 c->sentlen = 0;
1422 }
1423 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
1424 * bytes, in a single threaded server it's a good idea to serve
1425 * other clients as well, even if a very large request comes from
1426 * super fast link that is always able to accept data (in real world
1427 * scenario think about 'KEYS *' against the loopback interfae) */
1428 if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
1429 }
1430 if (nwritten == -1) {
1431 if (errno == EAGAIN) {
1432 nwritten = 0;
1433 } else {
1434 redisLog(REDIS_DEBUG,
1435 "Error writing to client: %s", strerror(errno));
1436 freeClient(c);
1437 return;
1438 }
1439 }
1440 if (totwritten > 0) c->lastinteraction = time(NULL);
1441 if (listLength(c->reply) == 0) {
1442 c->sentlen = 0;
1443 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1444 }
1445 }
1446
1447 static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask)
1448 {
1449 redisClient *c = privdata;
1450 int nwritten = 0, totwritten = 0, objlen, willwrite;
1451 robj *o;
1452 struct iovec iov[REDIS_WRITEV_IOVEC_COUNT];
1453 int offset, ion = 0;
1454 REDIS_NOTUSED(el);
1455 REDIS_NOTUSED(mask);
1456
1457 listNode *node;
1458 while (listLength(c->reply)) {
1459 offset = c->sentlen;
1460 ion = 0;
1461 willwrite = 0;
1462
1463 /* fill-in the iov[] array */
1464 for(node = listFirst(c->reply); node; node = listNextNode(node)) {
1465 o = listNodeValue(node);
1466 objlen = sdslen(o->ptr);
1467
1468 if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT)
1469 break;
1470
1471 if(ion == REDIS_WRITEV_IOVEC_COUNT)
1472 break; /* no more iovecs */
1473
1474 iov[ion].iov_base = ((char*)o->ptr) + offset;
1475 iov[ion].iov_len = objlen - offset;
1476 willwrite += objlen - offset;
1477 offset = 0; /* just for the first item */
1478 ion++;
1479 }
1480
1481 if(willwrite == 0)
1482 break;
1483
1484 /* write all collected blocks at once */
1485 if((nwritten = writev(fd, iov, ion)) < 0) {
1486 if (errno != EAGAIN) {
1487 redisLog(REDIS_DEBUG,
1488 "Error writing to client: %s", strerror(errno));
1489 freeClient(c);
1490 return;
1491 }
1492 break;
1493 }
1494
1495 totwritten += nwritten;
1496 offset = c->sentlen;
1497
1498 /* remove written robjs from c->reply */
1499 while (nwritten && listLength(c->reply)) {
1500 o = listNodeValue(listFirst(c->reply));
1501 objlen = sdslen(o->ptr);
1502
1503 if(nwritten >= objlen - offset) {
1504 listDelNode(c->reply, listFirst(c->reply));
1505 nwritten -= objlen - offset;
1506 c->sentlen = 0;
1507 } else {
1508 /* partial write */
1509 c->sentlen += nwritten;
1510 break;
1511 }
1512 offset = 0;
1513 }
1514 }
1515
1516 if (totwritten > 0)
1517 c->lastinteraction = time(NULL);
1518
1519 if (listLength(c->reply) == 0) {
1520 c->sentlen = 0;
1521 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1522 }
1523 }
1524
1525 static struct redisCommand *lookupCommand(char *name) {
1526 int j = 0;
1527 while(cmdTable[j].name != NULL) {
1528 if (!strcasecmp(name,cmdTable[j].name)) return &cmdTable[j];
1529 j++;
1530 }
1531 return NULL;
1532 }
1533
1534 /* resetClient prepare the client to process the next command */
1535 static void resetClient(redisClient *c) {
1536 freeClientArgv(c);
1537 c->bulklen = -1;
1538 c->multibulk = 0;
1539 }
1540
1541 /* If this function gets called we already read a whole
1542 * command, argments are in the client argv/argc fields.
1543 * processCommand() execute the command or prepare the
1544 * server for a bulk read from the client.
1545 *
1546 * If 1 is returned the client is still alive and valid and
1547 * and other operations can be performed by the caller. Otherwise
1548 * if 0 is returned the client was destroied (i.e. after QUIT). */
1549 static int processCommand(redisClient *c) {
1550 struct redisCommand *cmd;
1551 long long dirty;
1552
1553 /* Free some memory if needed (maxmemory setting) */
1554 if (server.maxmemory) freeMemoryIfNeeded();
1555
1556 /* Handle the multi bulk command type. This is an alternative protocol
1557 * supported by Redis in order to receive commands that are composed of
1558 * multiple binary-safe "bulk" arguments. The latency of processing is
1559 * a bit higher but this allows things like multi-sets, so if this
1560 * protocol is used only for MSET and similar commands this is a big win. */
1561 if (c->multibulk == 0 && c->argc == 1 && ((char*)(c->argv[0]->ptr))[0] == '*') {
1562 c->multibulk = atoi(((char*)c->argv[0]->ptr)+1);
1563 if (c->multibulk <= 0) {
1564 resetClient(c);
1565 return 1;
1566 } else {
1567 decrRefCount(c->argv[c->argc-1]);
1568 c->argc--;
1569 return 1;
1570 }
1571 } else if (c->multibulk) {
1572 if (c->bulklen == -1) {
1573 if (((char*)c->argv[0]->ptr)[0] != '$') {
1574 addReplySds(c,sdsnew("-ERR multi bulk protocol error\r\n"));
1575 resetClient(c);
1576 return 1;
1577 } else {
1578 int bulklen = atoi(((char*)c->argv[0]->ptr)+1);
1579 decrRefCount(c->argv[0]);
1580 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1581 c->argc--;
1582 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1583 resetClient(c);
1584 return 1;
1585 }
1586 c->argc--;
1587 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1588 return 1;
1589 }
1590 } else {
1591 c->mbargv = zrealloc(c->mbargv,(sizeof(robj*))*(c->mbargc+1));
1592 c->mbargv[c->mbargc] = c->argv[0];
1593 c->mbargc++;
1594 c->argc--;
1595 c->multibulk--;
1596 if (c->multibulk == 0) {
1597 robj **auxargv;
1598 int auxargc;
1599
1600 /* Here we need to swap the multi-bulk argc/argv with the
1601 * normal argc/argv of the client structure. */
1602 auxargv = c->argv;
1603 c->argv = c->mbargv;
1604 c->mbargv = auxargv;
1605
1606 auxargc = c->argc;
1607 c->argc = c->mbargc;
1608 c->mbargc = auxargc;
1609
1610 /* We need to set bulklen to something different than -1
1611 * in order for the code below to process the command without
1612 * to try to read the last argument of a bulk command as
1613 * a special argument. */
1614 c->bulklen = 0;
1615 /* continue below and process the command */
1616 } else {
1617 c->bulklen = -1;
1618 return 1;
1619 }
1620 }
1621 }
1622 /* -- end of multi bulk commands processing -- */
1623
1624 /* The QUIT command is handled as a special case. Normal command
1625 * procs are unable to close the client connection safely */
1626 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
1627 freeClient(c);
1628 return 0;
1629 }
1630 cmd = lookupCommand(c->argv[0]->ptr);
1631 if (!cmd) {
1632 addReplySds(c,sdsnew("-ERR unknown command\r\n"));
1633 resetClient(c);
1634 return 1;
1635 } else if ((cmd->arity > 0 && cmd->arity != c->argc) ||
1636 (c->argc < -cmd->arity)) {
1637 addReplySds(c,sdsnew("-ERR wrong number of arguments\r\n"));
1638 resetClient(c);
1639 return 1;
1640 } else if (server.maxmemory && cmd->flags & REDIS_CMD_DENYOOM && zmalloc_used_memory() > server.maxmemory) {
1641 addReplySds(c,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
1642 resetClient(c);
1643 return 1;
1644 } else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) {
1645 int bulklen = atoi(c->argv[c->argc-1]->ptr);
1646
1647 decrRefCount(c->argv[c->argc-1]);
1648 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1649 c->argc--;
1650 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1651 resetClient(c);
1652 return 1;
1653 }
1654 c->argc--;
1655 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1656 /* It is possible that the bulk read is already in the
1657 * buffer. Check this condition and handle it accordingly.
1658 * This is just a fast path, alternative to call processInputBuffer().
1659 * It's a good idea since the code is small and this condition
1660 * happens most of the times. */
1661 if ((signed)sdslen(c->querybuf) >= c->bulklen) {
1662 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1663 c->argc++;
1664 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
1665 } else {
1666 return 1;
1667 }
1668 }
1669 /* Let's try to share objects on the command arguments vector */
1670 if (server.shareobjects) {
1671 int j;
1672 for(j = 1; j < c->argc; j++)
1673 c->argv[j] = tryObjectSharing(c->argv[j]);
1674 }
1675 /* Let's try to encode the bulk object to save space. */
1676 if (cmd->flags & REDIS_CMD_BULK)
1677 tryObjectEncoding(c->argv[c->argc-1]);
1678
1679 /* Check if the user is authenticated */
1680 if (server.requirepass && !c->authenticated && cmd->proc != authCommand) {
1681 addReplySds(c,sdsnew("-ERR operation not permitted\r\n"));
1682 resetClient(c);
1683 return 1;
1684 }
1685
1686 /* Exec the command */
1687 dirty = server.dirty;
1688 cmd->proc(c);
1689 if (server.appendonly && server.dirty-dirty)
1690 feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
1691 if (server.dirty-dirty && listLength(server.slaves))
1692 replicationFeedSlaves(server.slaves,cmd,c->db->id,c->argv,c->argc);
1693 if (listLength(server.monitors))
1694 replicationFeedSlaves(server.monitors,cmd,c->db->id,c->argv,c->argc);
1695 server.stat_numcommands++;
1696
1697 /* Prepare the client for the next command */
1698 if (c->flags & REDIS_CLOSE) {
1699 freeClient(c);
1700 return 0;
1701 }
1702 resetClient(c);
1703 return 1;
1704 }
1705
1706 static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc) {
1707 listNode *ln;
1708 int outc = 0, j;
1709 robj **outv;
1710 /* (args*2)+1 is enough room for args, spaces, newlines */
1711 robj *static_outv[REDIS_STATIC_ARGS*2+1];
1712
1713 if (argc <= REDIS_STATIC_ARGS) {
1714 outv = static_outv;
1715 } else {
1716 outv = zmalloc(sizeof(robj*)*(argc*2+1));
1717 }
1718
1719 for (j = 0; j < argc; j++) {
1720 if (j != 0) outv[outc++] = shared.space;
1721 if ((cmd->flags & REDIS_CMD_BULK) && j == argc-1) {
1722 robj *lenobj;
1723
1724 lenobj = createObject(REDIS_STRING,
1725 sdscatprintf(sdsempty(),"%d\r\n",
1726 stringObjectLen(argv[j])));
1727 lenobj->refcount = 0;
1728 outv[outc++] = lenobj;
1729 }
1730 outv[outc++] = argv[j];
1731 }
1732 outv[outc++] = shared.crlf;
1733
1734 /* Increment all the refcounts at start and decrement at end in order to
1735 * be sure to free objects if there is no slave in a replication state
1736 * able to be feed with commands */
1737 for (j = 0; j < outc; j++) incrRefCount(outv[j]);
1738 listRewind(slaves);
1739 while((ln = listYield(slaves))) {
1740 redisClient *slave = ln->value;
1741
1742 /* Don't feed slaves that are still waiting for BGSAVE to start */
1743 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
1744
1745 /* Feed all the other slaves, MONITORs and so on */
1746 if (slave->slaveseldb != dictid) {
1747 robj *selectcmd;
1748
1749 switch(dictid) {
1750 case 0: selectcmd = shared.select0; break;
1751 case 1: selectcmd = shared.select1; break;
1752 case 2: selectcmd = shared.select2; break;
1753 case 3: selectcmd = shared.select3; break;
1754 case 4: selectcmd = shared.select4; break;
1755 case 5: selectcmd = shared.select5; break;
1756 case 6: selectcmd = shared.select6; break;
1757 case 7: selectcmd = shared.select7; break;
1758 case 8: selectcmd = shared.select8; break;
1759 case 9: selectcmd = shared.select9; break;
1760 default:
1761 selectcmd = createObject(REDIS_STRING,
1762 sdscatprintf(sdsempty(),"select %d\r\n",dictid));
1763 selectcmd->refcount = 0;
1764 break;
1765 }
1766 addReply(slave,selectcmd);
1767 slave->slaveseldb = dictid;
1768 }
1769 for (j = 0; j < outc; j++) addReply(slave,outv[j]);
1770 }
1771 for (j = 0; j < outc; j++) decrRefCount(outv[j]);
1772 if (outv != static_outv) zfree(outv);
1773 }
1774
1775 static void processInputBuffer(redisClient *c) {
1776 again:
1777 if (c->bulklen == -1) {
1778 /* Read the first line of the query */
1779 char *p = strchr(c->querybuf,'\n');
1780 size_t querylen;
1781
1782 if (p) {
1783 sds query, *argv;
1784 int argc, j;
1785
1786 query = c->querybuf;
1787 c->querybuf = sdsempty();
1788 querylen = 1+(p-(query));
1789 if (sdslen(query) > querylen) {
1790 /* leave data after the first line of the query in the buffer */
1791 c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen);
1792 }
1793 *p = '\0'; /* remove "\n" */
1794 if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */
1795 sdsupdatelen(query);
1796
1797 /* Now we can split the query in arguments */
1798 if (sdslen(query) == 0) {
1799 /* Ignore empty query */
1800 sdsfree(query);
1801 return;
1802 }
1803 argv = sdssplitlen(query,sdslen(query)," ",1,&argc);
1804 sdsfree(query);
1805
1806 if (c->argv) zfree(c->argv);
1807 c->argv = zmalloc(sizeof(robj*)*argc);
1808
1809 for (j = 0; j < argc; j++) {
1810 if (sdslen(argv[j])) {
1811 c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
1812 c->argc++;
1813 } else {
1814 sdsfree(argv[j]);
1815 }
1816 }
1817 zfree(argv);
1818 /* Execute the command. If the client is still valid
1819 * after processCommand() return and there is something
1820 * on the query buffer try to process the next command. */
1821 if (c->argc && processCommand(c) && sdslen(c->querybuf)) goto again;
1822 return;
1823 } else if (sdslen(c->querybuf) >= REDIS_REQUEST_MAX_SIZE) {
1824 redisLog(REDIS_DEBUG, "Client protocol error");
1825 freeClient(c);
1826 return;
1827 }
1828 } else {
1829 /* Bulk read handling. Note that if we are at this point
1830 the client already sent a command terminated with a newline,
1831 we are reading the bulk data that is actually the last
1832 argument of the command. */
1833 int qbl = sdslen(c->querybuf);
1834
1835 if (c->bulklen <= qbl) {
1836 /* Copy everything but the final CRLF as final argument */
1837 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1838 c->argc++;
1839 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
1840 /* Process the command. If the client is still valid after
1841 * the processing and there is more data in the buffer
1842 * try to parse it. */
1843 if (processCommand(c) && sdslen(c->querybuf)) goto again;
1844 return;
1845 }
1846 }
1847 }
1848
1849 static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1850 redisClient *c = (redisClient*) privdata;
1851 char buf[REDIS_IOBUF_LEN];
1852 int nread;
1853 REDIS_NOTUSED(el);
1854 REDIS_NOTUSED(mask);
1855
1856 nread = read(fd, buf, REDIS_IOBUF_LEN);
1857 if (nread == -1) {
1858 if (errno == EAGAIN) {
1859 nread = 0;
1860 } else {
1861 redisLog(REDIS_DEBUG, "Reading from client: %s",strerror(errno));
1862 freeClient(c);
1863 return;
1864 }
1865 } else if (nread == 0) {
1866 redisLog(REDIS_DEBUG, "Client closed connection");
1867 freeClient(c);
1868 return;
1869 }
1870 if (nread) {
1871 c->querybuf = sdscatlen(c->querybuf, buf, nread);
1872 c->lastinteraction = time(NULL);
1873 } else {
1874 return;
1875 }
1876 processInputBuffer(c);
1877 }
1878
1879 static int selectDb(redisClient *c, int id) {
1880 if (id < 0 || id >= server.dbnum)
1881 return REDIS_ERR;
1882 c->db = &server.db[id];
1883 return REDIS_OK;
1884 }
1885
1886 static void *dupClientReplyValue(void *o) {
1887 incrRefCount((robj*)o);
1888 return 0;
1889 }
1890
1891 static redisClient *createClient(int fd) {
1892 redisClient *c = zmalloc(sizeof(*c));
1893
1894 anetNonBlock(NULL,fd);
1895 anetTcpNoDelay(NULL,fd);
1896 if (!c) return NULL;
1897 selectDb(c,0);
1898 c->fd = fd;
1899 c->querybuf = sdsempty();
1900 c->argc = 0;
1901 c->argv = NULL;
1902 c->bulklen = -1;
1903 c->multibulk = 0;
1904 c->mbargc = 0;
1905 c->mbargv = NULL;
1906 c->sentlen = 0;
1907 c->flags = 0;
1908 c->lastinteraction = time(NULL);
1909 c->authenticated = 0;
1910 c->replstate = REDIS_REPL_NONE;
1911 c->reply = listCreate();
1912 listSetFreeMethod(c->reply,decrRefCount);
1913 listSetDupMethod(c->reply,dupClientReplyValue);
1914 if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
1915 readQueryFromClient, c) == AE_ERR) {
1916 freeClient(c);
1917 return NULL;
1918 }
1919 listAddNodeTail(server.clients,c);
1920 return c;
1921 }
1922
1923 static void addReply(redisClient *c, robj *obj) {
1924 if (listLength(c->reply) == 0 &&
1925 (c->replstate == REDIS_REPL_NONE ||
1926 c->replstate == REDIS_REPL_ONLINE) &&
1927 aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
1928 sendReplyToClient, c) == AE_ERR) return;
1929 if (obj->encoding != REDIS_ENCODING_RAW) {
1930 obj = getDecodedObject(obj);
1931 } else {
1932 incrRefCount(obj);
1933 }
1934 listAddNodeTail(c->reply,obj);
1935 }
1936
1937 static void addReplySds(redisClient *c, sds s) {
1938 robj *o = createObject(REDIS_STRING,s);
1939 addReply(c,o);
1940 decrRefCount(o);
1941 }
1942
1943 static void addReplyDouble(redisClient *c, double d) {
1944 char buf[128];
1945
1946 snprintf(buf,sizeof(buf),"%.17g",d);
1947 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n%s\r\n",
1948 strlen(buf),buf));
1949 }
1950
1951 static void addReplyBulkLen(redisClient *c, robj *obj) {
1952 size_t len;
1953
1954 if (obj->encoding == REDIS_ENCODING_RAW) {
1955 len = sdslen(obj->ptr);
1956 } else {
1957 long n = (long)obj->ptr;
1958
1959 len = 1;
1960 if (n < 0) {
1961 len++;
1962 n = -n;
1963 }
1964 while((n = n/10) != 0) {
1965 len++;
1966 }
1967 }
1968 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",len));
1969 }
1970
1971 static void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
1972 int cport, cfd;
1973 char cip[128];
1974 redisClient *c;
1975 REDIS_NOTUSED(el);
1976 REDIS_NOTUSED(mask);
1977 REDIS_NOTUSED(privdata);
1978
1979 cfd = anetAccept(server.neterr, fd, cip, &cport);
1980 if (cfd == AE_ERR) {
1981 redisLog(REDIS_DEBUG,"Accepting client connection: %s", server.neterr);
1982 return;
1983 }
1984 redisLog(REDIS_DEBUG,"Accepted %s:%d", cip, cport);
1985 if ((c = createClient(cfd)) == NULL) {
1986 redisLog(REDIS_WARNING,"Error allocating resoures for the client");
1987 close(cfd); /* May be already closed, just ingore errors */
1988 return;
1989 }
1990 /* If maxclient directive is set and this is one client more... close the
1991 * connection. Note that we create the client instead to check before
1992 * for this condition, since now the socket is already set in nonblocking
1993 * mode and we can send an error for free using the Kernel I/O */
1994 if (server.maxclients && listLength(server.clients) > server.maxclients) {
1995 char *err = "-ERR max number of clients reached\r\n";
1996
1997 /* That's a best effort error message, don't check write errors */
1998 if (write(c->fd,err,strlen(err)) == -1) {
1999 /* Nothing to do, Just to avoid the warning... */
2000 }
2001 freeClient(c);
2002 return;
2003 }
2004 server.stat_numconnections++;
2005 }
2006
2007 /* ======================= Redis objects implementation ===================== */
2008
2009 static robj *createObject(int type, void *ptr) {
2010 robj *o;
2011
2012 if (listLength(server.objfreelist)) {
2013 listNode *head = listFirst(server.objfreelist);
2014 o = listNodeValue(head);
2015 listDelNode(server.objfreelist,head);
2016 } else {
2017 o = zmalloc(sizeof(*o));
2018 }
2019 o->type = type;
2020 o->encoding = REDIS_ENCODING_RAW;
2021 o->ptr = ptr;
2022 o->refcount = 1;
2023 return o;
2024 }
2025
2026 static robj *createStringObject(char *ptr, size_t len) {
2027 return createObject(REDIS_STRING,sdsnewlen(ptr,len));
2028 }
2029
2030 static robj *createListObject(void) {
2031 list *l = listCreate();
2032
2033 listSetFreeMethod(l,decrRefCount);
2034 return createObject(REDIS_LIST,l);
2035 }
2036
2037 static robj *createSetObject(void) {
2038 dict *d = dictCreate(&setDictType,NULL);
2039 return createObject(REDIS_SET,d);
2040 }
2041
2042 static robj *createZsetObject(void) {
2043 zset *zs = zmalloc(sizeof(*zs));
2044
2045 zs->dict = dictCreate(&zsetDictType,NULL);
2046 zs->zsl = zslCreate();
2047 return createObject(REDIS_ZSET,zs);
2048 }
2049
2050 static void freeStringObject(robj *o) {
2051 if (o->encoding == REDIS_ENCODING_RAW) {
2052 sdsfree(o->ptr);
2053 }
2054 }
2055
2056 static void freeListObject(robj *o) {
2057 listRelease((list*) o->ptr);
2058 }
2059
2060 static void freeSetObject(robj *o) {
2061 dictRelease((dict*) o->ptr);
2062 }
2063
2064 static void freeZsetObject(robj *o) {
2065 zset *zs = o->ptr;
2066
2067 dictRelease(zs->dict);
2068 zslFree(zs->zsl);
2069 zfree(zs);
2070 }
2071
2072 static void freeHashObject(robj *o) {
2073 dictRelease((dict*) o->ptr);
2074 }
2075
2076 static void incrRefCount(robj *o) {
2077 o->refcount++;
2078 #ifdef DEBUG_REFCOUNT
2079 if (o->type == REDIS_STRING)
2080 printf("Increment '%s'(%p), now is: %d\n",o->ptr,o,o->refcount);
2081 #endif
2082 }
2083
2084 static void decrRefCount(void *obj) {
2085 robj *o = obj;
2086
2087 #ifdef DEBUG_REFCOUNT
2088 if (o->type == REDIS_STRING)
2089 printf("Decrement '%s'(%p), now is: %d\n",o->ptr,o,o->refcount-1);
2090 #endif
2091 if (--(o->refcount) == 0) {
2092 switch(o->type) {
2093 case REDIS_STRING: freeStringObject(o); break;
2094 case REDIS_LIST: freeListObject(o); break;
2095 case REDIS_SET: freeSetObject(o); break;
2096 case REDIS_ZSET: freeZsetObject(o); break;
2097 case REDIS_HASH: freeHashObject(o); break;
2098 default: assert(0 != 0); break;
2099 }
2100 if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
2101 !listAddNodeHead(server.objfreelist,o))
2102 zfree(o);
2103 }
2104 }
2105
2106 static robj *lookupKey(redisDb *db, robj *key) {
2107 dictEntry *de = dictFind(db->dict,key);
2108 return de ? dictGetEntryVal(de) : NULL;
2109 }
2110
2111 static robj *lookupKeyRead(redisDb *db, robj *key) {
2112 expireIfNeeded(db,key);
2113 return lookupKey(db,key);
2114 }
2115
2116 static robj *lookupKeyWrite(redisDb *db, robj *key) {
2117 deleteIfVolatile(db,key);
2118 return lookupKey(db,key);
2119 }
2120
2121 static int deleteKey(redisDb *db, robj *key) {
2122 int retval;
2123
2124 /* We need to protect key from destruction: after the first dictDelete()
2125 * it may happen that 'key' is no longer valid if we don't increment
2126 * it's count. This may happen when we get the object reference directly
2127 * from the hash table with dictRandomKey() or dict iterators */
2128 incrRefCount(key);
2129 if (dictSize(db->expires)) dictDelete(db->expires,key);
2130 retval = dictDelete(db->dict,key);
2131 decrRefCount(key);
2132
2133 return retval == DICT_OK;
2134 }
2135
2136 /* Try to share an object against the shared objects pool */
2137 static robj *tryObjectSharing(robj *o) {
2138 struct dictEntry *de;
2139 unsigned long c;
2140
2141 if (o == NULL || server.shareobjects == 0) return o;
2142
2143 assert(o->type == REDIS_STRING);
2144 de = dictFind(server.sharingpool,o);
2145 if (de) {
2146 robj *shared = dictGetEntryKey(de);
2147
2148 c = ((unsigned long) dictGetEntryVal(de))+1;
2149 dictGetEntryVal(de) = (void*) c;
2150 incrRefCount(shared);
2151 decrRefCount(o);
2152 return shared;
2153 } else {
2154 /* Here we are using a stream algorihtm: Every time an object is
2155 * shared we increment its count, everytime there is a miss we
2156 * recrement the counter of a random object. If this object reaches
2157 * zero we remove the object and put the current object instead. */
2158 if (dictSize(server.sharingpool) >=
2159 server.sharingpoolsize) {
2160 de = dictGetRandomKey(server.sharingpool);
2161 assert(de != NULL);
2162 c = ((unsigned long) dictGetEntryVal(de))-1;
2163 dictGetEntryVal(de) = (void*) c;
2164 if (c == 0) {
2165 dictDelete(server.sharingpool,de->key);
2166 }
2167 } else {
2168 c = 0; /* If the pool is empty we want to add this object */
2169 }
2170 if (c == 0) {
2171 int retval;
2172
2173 retval = dictAdd(server.sharingpool,o,(void*)1);
2174 assert(retval == DICT_OK);
2175 incrRefCount(o);
2176 }
2177 return o;
2178 }
2179 }
2180
2181 /* Check if the nul-terminated string 's' can be represented by a long
2182 * (that is, is a number that fits into long without any other space or
2183 * character before or after the digits).
2184 *
2185 * If so, the function returns REDIS_OK and *longval is set to the value
2186 * of the number. Otherwise REDIS_ERR is returned */
2187 static int isStringRepresentableAsLong(sds s, long *longval) {
2188 char buf[32], *endptr;
2189 long value;
2190 int slen;
2191
2192 value = strtol(s, &endptr, 10);
2193 if (endptr[0] != '\0') return REDIS_ERR;
2194 slen = snprintf(buf,32,"%ld",value);
2195
2196 /* If the number converted back into a string is not identical
2197 * then it's not possible to encode the string as integer */
2198 if (sdslen(s) != (unsigned)slen || memcmp(buf,s,slen)) return REDIS_ERR;
2199 if (longval) *longval = value;
2200 return REDIS_OK;
2201 }
2202
2203 /* Try to encode a string object in order to save space */
2204 static int tryObjectEncoding(robj *o) {
2205 long value;
2206 sds s = o->ptr;
2207
2208 if (o->encoding != REDIS_ENCODING_RAW)
2209 return REDIS_ERR; /* Already encoded */
2210
2211 /* It's not save to encode shared objects: shared objects can be shared
2212 * everywhere in the "object space" of Redis. Encoded objects can only
2213 * appear as "values" (and not, for instance, as keys) */
2214 if (o->refcount > 1) return REDIS_ERR;
2215
2216 /* Currently we try to encode only strings */
2217 assert(o->type == REDIS_STRING);
2218
2219 /* Check if we can represent this string as a long integer */
2220 if (isStringRepresentableAsLong(s,&value) == REDIS_ERR) return REDIS_ERR;
2221
2222 /* Ok, this object can be encoded */
2223 o->encoding = REDIS_ENCODING_INT;
2224 sdsfree(o->ptr);
2225 o->ptr = (void*) value;
2226 return REDIS_OK;
2227 }
2228
2229 /* Get a decoded version of an encoded object (returned as a new object) */
2230 static robj *getDecodedObject(const robj *o) {
2231 robj *dec;
2232
2233 assert(o->encoding != REDIS_ENCODING_RAW);
2234 if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
2235 char buf[32];
2236
2237 snprintf(buf,32,"%ld",(long)o->ptr);
2238 dec = createStringObject(buf,strlen(buf));
2239 return dec;
2240 } else {
2241 assert(1 != 1);
2242 }
2243 }
2244
2245 /* Compare two string objects via strcmp() or alike.
2246 * Note that the objects may be integer-encoded. In such a case we
2247 * use snprintf() to get a string representation of the numbers on the stack
2248 * and compare the strings, it's much faster than calling getDecodedObject(). */
2249 static int compareStringObjects(robj *a, robj *b) {
2250 assert(a->type == REDIS_STRING && b->type == REDIS_STRING);
2251 char bufa[128], bufb[128], *astr, *bstr;
2252 int bothsds = 1;
2253
2254 if (a == b) return 0;
2255 if (a->encoding != REDIS_ENCODING_RAW) {
2256 snprintf(bufa,sizeof(bufa),"%ld",(long) a->ptr);
2257 astr = bufa;
2258 bothsds = 0;
2259 } else {
2260 astr = a->ptr;
2261 }
2262 if (b->encoding != REDIS_ENCODING_RAW) {
2263 snprintf(bufb,sizeof(bufb),"%ld",(long) b->ptr);
2264 bstr = bufb;
2265 bothsds = 0;
2266 } else {
2267 bstr = b->ptr;
2268 }
2269 return bothsds ? sdscmp(astr,bstr) : strcmp(astr,bstr);
2270 }
2271
2272 static size_t stringObjectLen(robj *o) {
2273 assert(o->type == REDIS_STRING);
2274 if (o->encoding == REDIS_ENCODING_RAW) {
2275 return sdslen(o->ptr);
2276 } else {
2277 char buf[32];
2278
2279 return snprintf(buf,32,"%ld",(long)o->ptr);
2280 }
2281 }
2282
2283 /*============================ DB saving/loading ============================ */
2284
2285 static int rdbSaveType(FILE *fp, unsigned char type) {
2286 if (fwrite(&type,1,1,fp) == 0) return -1;
2287 return 0;
2288 }
2289
2290 static int rdbSaveTime(FILE *fp, time_t t) {
2291 int32_t t32 = (int32_t) t;
2292 if (fwrite(&t32,4,1,fp) == 0) return -1;
2293 return 0;
2294 }
2295
2296 /* check rdbLoadLen() comments for more info */
2297 static int rdbSaveLen(FILE *fp, uint32_t len) {
2298 unsigned char buf[2];
2299
2300 if (len < (1<<6)) {
2301 /* Save a 6 bit len */
2302 buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
2303 if (fwrite(buf,1,1,fp) == 0) return -1;
2304 } else if (len < (1<<14)) {
2305 /* Save a 14 bit len */
2306 buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
2307 buf[1] = len&0xFF;
2308 if (fwrite(buf,2,1,fp) == 0) return -1;
2309 } else {
2310 /* Save a 32 bit len */
2311 buf[0] = (REDIS_RDB_32BITLEN<<6);
2312 if (fwrite(buf,1,1,fp) == 0) return -1;
2313 len = htonl(len);
2314 if (fwrite(&len,4,1,fp) == 0) return -1;
2315 }
2316 return 0;
2317 }
2318
2319 /* String objects in the form "2391" "-100" without any space and with a
2320 * range of values that can fit in an 8, 16 or 32 bit signed value can be
2321 * encoded as integers to save space */
2322 static int rdbTryIntegerEncoding(sds s, unsigned char *enc) {
2323 long long value;
2324 char *endptr, buf[32];
2325
2326 /* Check if it's possible to encode this value as a number */
2327 value = strtoll(s, &endptr, 10);
2328 if (endptr[0] != '\0') return 0;
2329 snprintf(buf,32,"%lld",value);
2330
2331 /* If the number converted back into a string is not identical
2332 * then it's not possible to encode the string as integer */
2333 if (strlen(buf) != sdslen(s) || memcmp(buf,s,sdslen(s))) return 0;
2334
2335 /* Finally check if it fits in our ranges */
2336 if (value >= -(1<<7) && value <= (1<<7)-1) {
2337 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
2338 enc[1] = value&0xFF;
2339 return 2;
2340 } else if (value >= -(1<<15) && value <= (1<<15)-1) {
2341 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
2342 enc[1] = value&0xFF;
2343 enc[2] = (value>>8)&0xFF;
2344 return 3;
2345 } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
2346 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
2347 enc[1] = value&0xFF;
2348 enc[2] = (value>>8)&0xFF;
2349 enc[3] = (value>>16)&0xFF;
2350 enc[4] = (value>>24)&0xFF;
2351 return 5;
2352 } else {
2353 return 0;
2354 }
2355 }
2356
2357 static int rdbSaveLzfStringObject(FILE *fp, robj *obj) {
2358 unsigned int comprlen, outlen;
2359 unsigned char byte;
2360 void *out;
2361
2362 /* We require at least four bytes compression for this to be worth it */
2363 outlen = sdslen(obj->ptr)-4;
2364 if (outlen <= 0) return 0;
2365 if ((out = zmalloc(outlen+1)) == NULL) return 0;
2366 comprlen = lzf_compress(obj->ptr, sdslen(obj->ptr), out, outlen);
2367 if (comprlen == 0) {
2368 zfree(out);
2369 return 0;
2370 }
2371 /* Data compressed! Let's save it on disk */
2372 byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
2373 if (fwrite(&byte,1,1,fp) == 0) goto writeerr;
2374 if (rdbSaveLen(fp,comprlen) == -1) goto writeerr;
2375 if (rdbSaveLen(fp,sdslen(obj->ptr)) == -1) goto writeerr;
2376 if (fwrite(out,comprlen,1,fp) == 0) goto writeerr;
2377 zfree(out);
2378 return comprlen;
2379
2380 writeerr:
2381 zfree(out);
2382 return -1;
2383 }
2384
2385 /* Save a string objet as [len][data] on disk. If the object is a string
2386 * representation of an integer value we try to safe it in a special form */
2387 static int rdbSaveStringObjectRaw(FILE *fp, robj *obj) {
2388 size_t len;
2389 int enclen;
2390
2391 len = sdslen(obj->ptr);
2392
2393 /* Try integer encoding */
2394 if (len <= 11) {
2395 unsigned char buf[5];
2396 if ((enclen = rdbTryIntegerEncoding(obj->ptr,buf)) > 0) {
2397 if (fwrite(buf,enclen,1,fp) == 0) return -1;
2398 return 0;
2399 }
2400 }
2401
2402 /* Try LZF compression - under 20 bytes it's unable to compress even
2403 * aaaaaaaaaaaaaaaaaa so skip it */
2404 if (len > 20) {
2405 int retval;
2406
2407 retval = rdbSaveLzfStringObject(fp,obj);
2408 if (retval == -1) return -1;
2409 if (retval > 0) return 0;
2410 /* retval == 0 means data can't be compressed, save the old way */
2411 }
2412
2413 /* Store verbatim */
2414 if (rdbSaveLen(fp,len) == -1) return -1;
2415 if (len && fwrite(obj->ptr,len,1,fp) == 0) return -1;
2416 return 0;
2417 }
2418
2419 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
2420 static int rdbSaveStringObject(FILE *fp, robj *obj) {
2421 int retval;
2422 robj *dec;
2423
2424 if (obj->encoding != REDIS_ENCODING_RAW) {
2425 dec = getDecodedObject(obj);
2426 retval = rdbSaveStringObjectRaw(fp,dec);
2427 decrRefCount(dec);
2428 return retval;
2429 } else {
2430 return rdbSaveStringObjectRaw(fp,obj);
2431 }
2432 }
2433
2434 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
2435 * 8 bit integer specifing the length of the representation.
2436 * This 8 bit integer has special values in order to specify the following
2437 * conditions:
2438 * 253: not a number
2439 * 254: + inf
2440 * 255: - inf
2441 */
2442 static int rdbSaveDoubleValue(FILE *fp, double val) {
2443 unsigned char buf[128];
2444 int len;
2445
2446 if (isnan(val)) {
2447 buf[0] = 253;
2448 len = 1;
2449 } else if (!isfinite(val)) {
2450 len = 1;
2451 buf[0] = (val < 0) ? 255 : 254;
2452 } else {
2453 snprintf((char*)buf+1,sizeof(buf)-1,"%.17g",val);
2454 buf[0] = strlen((char*)buf+1);
2455 len = buf[0]+1;
2456 }
2457 if (fwrite(buf,len,1,fp) == 0) return -1;
2458 return 0;
2459 }
2460
2461 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
2462 static int rdbSave(char *filename) {
2463 dictIterator *di = NULL;
2464 dictEntry *de;
2465 FILE *fp;
2466 char tmpfile[256];
2467 int j;
2468 time_t now = time(NULL);
2469
2470 snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
2471 fp = fopen(tmpfile,"w");
2472 if (!fp) {
2473 redisLog(REDIS_WARNING, "Failed saving the DB: %s", strerror(errno));
2474 return REDIS_ERR;
2475 }
2476 if (fwrite("REDIS0001",9,1,fp) == 0) goto werr;
2477 for (j = 0; j < server.dbnum; j++) {
2478 redisDb *db = server.db+j;
2479 dict *d = db->dict;
2480 if (dictSize(d) == 0) continue;
2481 di = dictGetIterator(d);
2482 if (!di) {
2483 fclose(fp);
2484 return REDIS_ERR;
2485 }
2486
2487 /* Write the SELECT DB opcode */
2488 if (rdbSaveType(fp,REDIS_SELECTDB) == -1) goto werr;
2489 if (rdbSaveLen(fp,j) == -1) goto werr;
2490
2491 /* Iterate this DB writing every entry */
2492 while((de = dictNext(di)) != NULL) {
2493 robj *key = dictGetEntryKey(de);
2494 robj *o = dictGetEntryVal(de);
2495 time_t expiretime = getExpire(db,key);
2496
2497 /* Save the expire time */
2498 if (expiretime != -1) {
2499 /* If this key is already expired skip it */
2500 if (expiretime < now) continue;
2501 if (rdbSaveType(fp,REDIS_EXPIRETIME) == -1) goto werr;
2502 if (rdbSaveTime(fp,expiretime) == -1) goto werr;
2503 }
2504 /* Save the key and associated value */
2505 if (rdbSaveType(fp,o->type) == -1) goto werr;
2506 if (rdbSaveStringObject(fp,key) == -1) goto werr;
2507 if (o->type == REDIS_STRING) {
2508 /* Save a string value */
2509 if (rdbSaveStringObject(fp,o) == -1) goto werr;
2510 } else if (o->type == REDIS_LIST) {
2511 /* Save a list value */
2512 list *list = o->ptr;
2513 listNode *ln;
2514
2515 listRewind(list);
2516 if (rdbSaveLen(fp,listLength(list)) == -1) goto werr;
2517 while((ln = listYield(list))) {
2518 robj *eleobj = listNodeValue(ln);
2519
2520 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
2521 }
2522 } else if (o->type == REDIS_SET) {
2523 /* Save a set value */
2524 dict *set = o->ptr;
2525 dictIterator *di = dictGetIterator(set);
2526 dictEntry *de;
2527
2528 if (rdbSaveLen(fp,dictSize(set)) == -1) goto werr;
2529 while((de = dictNext(di)) != NULL) {
2530 robj *eleobj = dictGetEntryKey(de);
2531
2532 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
2533 }
2534 dictReleaseIterator(di);
2535 } else if (o->type == REDIS_ZSET) {
2536 /* Save a set value */
2537 zset *zs = o->ptr;
2538 dictIterator *di = dictGetIterator(zs->dict);
2539 dictEntry *de;
2540
2541 if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) goto werr;
2542 while((de = dictNext(di)) != NULL) {
2543 robj *eleobj = dictGetEntryKey(de);
2544 double *score = dictGetEntryVal(de);
2545
2546 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
2547 if (rdbSaveDoubleValue(fp,*score) == -1) goto werr;
2548 }
2549 dictReleaseIterator(di);
2550 } else {
2551 assert(0 != 0);
2552 }
2553 }
2554 dictReleaseIterator(di);
2555 }
2556 /* EOF opcode */
2557 if (rdbSaveType(fp,REDIS_EOF) == -1) goto werr;
2558
2559 /* Make sure data will not remain on the OS's output buffers */
2560 fflush(fp);
2561 fsync(fileno(fp));
2562 fclose(fp);
2563
2564 /* Use RENAME to make sure the DB file is changed atomically only
2565 * if the generate DB file is ok. */
2566 if (rename(tmpfile,filename) == -1) {
2567 redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
2568 unlink(tmpfile);
2569 return REDIS_ERR;
2570 }
2571 redisLog(REDIS_NOTICE,"DB saved on disk");
2572 server.dirty = 0;
2573 server.lastsave = time(NULL);
2574 return REDIS_OK;
2575
2576 werr:
2577 fclose(fp);
2578 unlink(tmpfile);
2579 redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
2580 if (di) dictReleaseIterator(di);
2581 return REDIS_ERR;
2582 }
2583
2584 static int rdbSaveBackground(char *filename) {
2585 pid_t childpid;
2586
2587 if (server.bgsaveinprogress) return REDIS_ERR;
2588 if ((childpid = fork()) == 0) {
2589 /* Child */
2590 close(server.fd);
2591 if (rdbSave(filename) == REDIS_OK) {
2592 exit(0);
2593 } else {
2594 exit(1);
2595 }
2596 } else {
2597 /* Parent */
2598 if (childpid == -1) {
2599 redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
2600 strerror(errno));
2601 return REDIS_ERR;
2602 }
2603 redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
2604 server.bgsaveinprogress = 1;
2605 server.bgsavechildpid = childpid;
2606 return REDIS_OK;
2607 }
2608 return REDIS_OK; /* unreached */
2609 }
2610
2611 static void rdbRemoveTempFile(pid_t childpid) {
2612 char tmpfile[256];
2613
2614 snprintf(tmpfile,256,"temp-%d.rdb", (int) childpid);
2615 unlink(tmpfile);
2616 }
2617
2618 static int rdbLoadType(FILE *fp) {
2619 unsigned char type;
2620 if (fread(&type,1,1,fp) == 0) return -1;
2621 return type;
2622 }
2623
2624 static time_t rdbLoadTime(FILE *fp) {
2625 int32_t t32;
2626 if (fread(&t32,4,1,fp) == 0) return -1;
2627 return (time_t) t32;
2628 }
2629
2630 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
2631 * of this file for a description of how this are stored on disk.
2632 *
2633 * isencoded is set to 1 if the readed length is not actually a length but
2634 * an "encoding type", check the above comments for more info */
2635 static uint32_t rdbLoadLen(FILE *fp, int rdbver, int *isencoded) {
2636 unsigned char buf[2];
2637 uint32_t len;
2638
2639 if (isencoded) *isencoded = 0;
2640 if (rdbver == 0) {
2641 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
2642 return ntohl(len);
2643 } else {
2644 int type;
2645
2646 if (fread(buf,1,1,fp) == 0) return REDIS_RDB_LENERR;
2647 type = (buf[0]&0xC0)>>6;
2648 if (type == REDIS_RDB_6BITLEN) {
2649 /* Read a 6 bit len */
2650 return buf[0]&0x3F;
2651 } else if (type == REDIS_RDB_ENCVAL) {
2652 /* Read a 6 bit len encoding type */
2653 if (isencoded) *isencoded = 1;
2654 return buf[0]&0x3F;
2655 } else if (type == REDIS_RDB_14BITLEN) {
2656 /* Read a 14 bit len */
2657 if (fread(buf+1,1,1,fp) == 0) return REDIS_RDB_LENERR;
2658 return ((buf[0]&0x3F)<<8)|buf[1];
2659 } else {
2660 /* Read a 32 bit len */
2661 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
2662 return ntohl(len);
2663 }
2664 }
2665 }
2666
2667 static robj *rdbLoadIntegerObject(FILE *fp, int enctype) {
2668 unsigned char enc[4];
2669 long long val;
2670
2671 if (enctype == REDIS_RDB_ENC_INT8) {
2672 if (fread(enc,1,1,fp) == 0) return NULL;
2673 val = (signed char)enc[0];
2674 } else if (enctype == REDIS_RDB_ENC_INT16) {
2675 uint16_t v;
2676 if (fread(enc,2,1,fp) == 0) return NULL;
2677 v = enc[0]|(enc[1]<<8);
2678 val = (int16_t)v;
2679 } else if (enctype == REDIS_RDB_ENC_INT32) {
2680 uint32_t v;
2681 if (fread(enc,4,1,fp) == 0) return NULL;
2682 v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
2683 val = (int32_t)v;
2684 } else {
2685 val = 0; /* anti-warning */
2686 assert(0!=0);
2687 }
2688 return createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",val));
2689 }
2690
2691 static robj *rdbLoadLzfStringObject(FILE*fp, int rdbver) {
2692 unsigned int len, clen;
2693 unsigned char *c = NULL;
2694 sds val = NULL;
2695
2696 if ((clen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR) return NULL;
2697 if ((len = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR) return NULL;
2698 if ((c = zmalloc(clen)) == NULL) goto err;
2699 if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
2700 if (fread(c,clen,1,fp) == 0) goto err;
2701 if (lzf_decompress(c,clen,val,len) == 0) goto err;
2702 zfree(c);
2703 return createObject(REDIS_STRING,val);
2704 err:
2705 zfree(c);
2706 sdsfree(val);
2707 return NULL;
2708 }
2709
2710 static robj *rdbLoadStringObject(FILE*fp, int rdbver) {
2711 int isencoded;
2712 uint32_t len;
2713 sds val;
2714
2715 len = rdbLoadLen(fp,rdbver,&isencoded);
2716 if (isencoded) {
2717 switch(len) {
2718 case REDIS_RDB_ENC_INT8:
2719 case REDIS_RDB_ENC_INT16:
2720 case REDIS_RDB_ENC_INT32:
2721 return tryObjectSharing(rdbLoadIntegerObject(fp,len));
2722 case REDIS_RDB_ENC_LZF:
2723 return tryObjectSharing(rdbLoadLzfStringObject(fp,rdbver));
2724 default:
2725 assert(0!=0);
2726 }
2727 }
2728
2729 if (len == REDIS_RDB_LENERR) return NULL;
2730 val = sdsnewlen(NULL,len);
2731 if (len && fread(val,len,1,fp) == 0) {
2732 sdsfree(val);
2733 return NULL;
2734 }
2735 return tryObjectSharing(createObject(REDIS_STRING,val));
2736 }
2737
2738 /* For information about double serialization check rdbSaveDoubleValue() */
2739 static int rdbLoadDoubleValue(FILE *fp, double *val) {
2740 char buf[128];
2741 unsigned char len;
2742
2743 if (fread(&len,1,1,fp) == 0) return -1;
2744 switch(len) {
2745 case 255: *val = R_NegInf; return 0;
2746 case 254: *val = R_PosInf; return 0;
2747 case 253: *val = R_Nan; return 0;
2748 default:
2749 if (fread(buf,len,1,fp) == 0) return -1;
2750 sscanf(buf, "%lg", val);
2751 return 0;
2752 }
2753 }
2754
2755 static int rdbLoad(char *filename) {
2756 FILE *fp;
2757 robj *keyobj = NULL;
2758 uint32_t dbid;
2759 int type, retval, rdbver;
2760 dict *d = server.db[0].dict;
2761 redisDb *db = server.db+0;
2762 char buf[1024];
2763 time_t expiretime = -1, now = time(NULL);
2764
2765 fp = fopen(filename,"r");
2766 if (!fp) return REDIS_ERR;
2767 if (fread(buf,9,1,fp) == 0) goto eoferr;
2768 buf[9] = '\0';
2769 if (memcmp(buf,"REDIS",5) != 0) {
2770 fclose(fp);
2771 redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
2772 return REDIS_ERR;
2773 }
2774 rdbver = atoi(buf+5);
2775 if (rdbver > 1) {
2776 fclose(fp);
2777 redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
2778 return REDIS_ERR;
2779 }
2780 while(1) {
2781 robj *o;
2782
2783 /* Read type. */
2784 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
2785 if (type == REDIS_EXPIRETIME) {
2786 if ((expiretime = rdbLoadTime(fp)) == -1) goto eoferr;
2787 /* We read the time so we need to read the object type again */
2788 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
2789 }
2790 if (type == REDIS_EOF) break;
2791 /* Handle SELECT DB opcode as a special case */
2792 if (type == REDIS_SELECTDB) {
2793 if ((dbid = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2794 goto eoferr;
2795 if (dbid >= (unsigned)server.dbnum) {
2796 redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
2797 exit(1);
2798 }
2799 db = server.db+dbid;
2800 d = db->dict;
2801 continue;
2802 }
2803 /* Read key */
2804 if ((keyobj = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2805
2806 if (type == REDIS_STRING) {
2807 /* Read string value */
2808 if ((o = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2809 tryObjectEncoding(o);
2810 } else if (type == REDIS_LIST || type == REDIS_SET) {
2811 /* Read list/set value */
2812 uint32_t listlen;
2813
2814 if ((listlen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2815 goto eoferr;
2816 o = (type == REDIS_LIST) ? createListObject() : createSetObject();
2817 /* Load every single element of the list/set */
2818 while(listlen--) {
2819 robj *ele;
2820
2821 if ((ele = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2822 tryObjectEncoding(ele);
2823 if (type == REDIS_LIST) {
2824 listAddNodeTail((list*)o->ptr,ele);
2825 } else {
2826 dictAdd((dict*)o->ptr,ele,NULL);
2827 }
2828 }
2829 } else if (type == REDIS_ZSET) {
2830 /* Read list/set value */
2831 uint32_t zsetlen;
2832 zset *zs;
2833
2834 if ((zsetlen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2835 goto eoferr;
2836 o = createZsetObject();
2837 zs = o->ptr;
2838 /* Load every single element of the list/set */
2839 while(zsetlen--) {
2840 robj *ele;
2841 double *score = zmalloc(sizeof(double));
2842
2843 if ((ele = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2844 tryObjectEncoding(ele);
2845 if (rdbLoadDoubleValue(fp,score) == -1) goto eoferr;
2846 dictAdd(zs->dict,ele,score);
2847 zslInsert(zs->zsl,*score,ele);
2848 incrRefCount(ele); /* added to skiplist */
2849 }
2850 } else {
2851 assert(0 != 0);
2852 }
2853 /* Add the new object in the hash table */
2854 retval = dictAdd(d,keyobj,o);
2855 if (retval == DICT_ERR) {
2856 redisLog(REDIS_WARNING,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj->ptr);
2857 exit(1);
2858 }
2859 /* Set the expire time if needed */
2860 if (expiretime != -1) {
2861 setExpire(db,keyobj,expiretime);
2862 /* Delete this key if already expired */
2863 if (expiretime < now) deleteKey(db,keyobj);
2864 expiretime = -1;
2865 }
2866 keyobj = o = NULL;
2867 }
2868 fclose(fp);
2869 return REDIS_OK;
2870
2871 eoferr: /* unexpected end of file is handled here with a fatal exit */
2872 if (keyobj) decrRefCount(keyobj);
2873 redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
2874 exit(1);
2875 return REDIS_ERR; /* Just to avoid warning */
2876 }
2877
2878 /*================================== Commands =============================== */
2879
2880 static void authCommand(redisClient *c) {
2881 if (!server.requirepass || !strcmp(c->argv[1]->ptr, server.requirepass)) {
2882 c->authenticated = 1;
2883 addReply(c,shared.ok);
2884 } else {
2885 c->authenticated = 0;
2886 addReplySds(c,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
2887 }
2888 }
2889
2890 static void pingCommand(redisClient *c) {
2891 addReply(c,shared.pong);
2892 }
2893
2894 static void echoCommand(redisClient *c) {
2895 addReplyBulkLen(c,c->argv[1]);
2896 addReply(c,c->argv[1]);
2897 addReply(c,shared.crlf);
2898 }
2899
2900 /*=================================== Strings =============================== */
2901
2902 static void setGenericCommand(redisClient *c, int nx) {
2903 int retval;
2904
2905 retval = dictAdd(c->db->dict,c->argv[1],c->argv[2]);
2906 if (retval == DICT_ERR) {
2907 if (!nx) {
2908 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
2909 incrRefCount(c->argv[2]);
2910 } else {
2911 addReply(c,shared.czero);
2912 return;
2913 }
2914 } else {
2915 incrRefCount(c->argv[1]);
2916 incrRefCount(c->argv[2]);
2917 }
2918 server.dirty++;
2919 removeExpire(c->db,c->argv[1]);
2920 addReply(c, nx ? shared.cone : shared.ok);
2921 }
2922
2923 static void setCommand(redisClient *c) {
2924 setGenericCommand(c,0);
2925 }
2926
2927 static void setnxCommand(redisClient *c) {
2928 setGenericCommand(c,1);
2929 }
2930
2931 static void getCommand(redisClient *c) {
2932 robj *o = lookupKeyRead(c->db,c->argv[1]);
2933
2934 if (o == NULL) {
2935 addReply(c,shared.nullbulk);
2936 } else {
2937 if (o->type != REDIS_STRING) {
2938 addReply(c,shared.wrongtypeerr);
2939 } else {
2940 addReplyBulkLen(c,o);
2941 addReply(c,o);
2942 addReply(c,shared.crlf);
2943 }
2944 }
2945 }
2946
2947 static void getsetCommand(redisClient *c) {
2948 getCommand(c);
2949 if (dictAdd(c->db->dict,c->argv[1],c->argv[2]) == DICT_ERR) {
2950 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
2951 } else {
2952 incrRefCount(c->argv[1]);
2953 }
2954 incrRefCount(c->argv[2]);
2955 server.dirty++;
2956 removeExpire(c->db,c->argv[1]);
2957 }
2958
2959 static void mgetCommand(redisClient *c) {
2960 int j;
2961
2962 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-1));
2963 for (j = 1; j < c->argc; j++) {
2964 robj *o = lookupKeyRead(c->db,c->argv[j]);
2965 if (o == NULL) {
2966 addReply(c,shared.nullbulk);
2967 } else {
2968 if (o->type != REDIS_STRING) {
2969 addReply(c,shared.nullbulk);
2970 } else {
2971 addReplyBulkLen(c,o);
2972 addReply(c,o);
2973 addReply(c,shared.crlf);
2974 }
2975 }
2976 }
2977 }
2978
2979 static void msetGenericCommand(redisClient *c, int nx) {
2980 int j;
2981
2982 if ((c->argc % 2) == 0) {
2983 addReplySds(c,sdsnew("-ERR wrong number of arguments\r\n"));
2984 return;
2985 }
2986 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
2987 * set nothing at all if at least one already key exists. */
2988 if (nx) {
2989 for (j = 1; j < c->argc; j += 2) {
2990 if (dictFind(c->db->dict,c->argv[j]) != NULL) {
2991 addReply(c, shared.czero);
2992 return;
2993 }
2994 }
2995 }
2996
2997 for (j = 1; j < c->argc; j += 2) {
2998 int retval;
2999
3000 retval = dictAdd(c->db->dict,c->argv[j],c->argv[j+1]);
3001 if (retval == DICT_ERR) {
3002 dictReplace(c->db->dict,c->argv[j],c->argv[j+1]);
3003 incrRefCount(c->argv[j+1]);
3004 } else {
3005 incrRefCount(c->argv[j]);
3006 incrRefCount(c->argv[j+1]);
3007 }
3008 removeExpire(c->db,c->argv[j]);
3009 }
3010 server.dirty += (c->argc-1)/2;
3011 addReply(c, nx ? shared.cone : shared.ok);
3012 }
3013
3014 static void msetCommand(redisClient *c) {
3015 msetGenericCommand(c,0);
3016 }
3017
3018 static void msetnxCommand(redisClient *c) {
3019 msetGenericCommand(c,1);
3020 }
3021
3022 static void incrDecrCommand(redisClient *c, long long incr) {
3023 long long value;
3024 int retval;
3025 robj *o;
3026
3027 o = lookupKeyWrite(c->db,c->argv[1]);
3028 if (o == NULL) {
3029 value = 0;
3030 } else {
3031 if (o->type != REDIS_STRING) {
3032 value = 0;
3033 } else {
3034 char *eptr;
3035
3036 if (o->encoding == REDIS_ENCODING_RAW)
3037 value = strtoll(o->ptr, &eptr, 10);
3038 else if (o->encoding == REDIS_ENCODING_INT)
3039 value = (long)o->ptr;
3040 else
3041 assert(1 != 1);
3042 }
3043 }
3044
3045 value += incr;
3046 o = createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",value));
3047 tryObjectEncoding(o);
3048 retval = dictAdd(c->db->dict,c->argv[1],o);
3049 if (retval == DICT_ERR) {
3050 dictReplace(c->db->dict,c->argv[1],o);
3051 removeExpire(c->db,c->argv[1]);
3052 } else {
3053 incrRefCount(c->argv[1]);
3054 }
3055 server.dirty++;
3056 addReply(c,shared.colon);
3057 addReply(c,o);
3058 addReply(c,shared.crlf);
3059 }
3060
3061 static void incrCommand(redisClient *c) {
3062 incrDecrCommand(c,1);
3063 }
3064
3065 static void decrCommand(redisClient *c) {
3066 incrDecrCommand(c,-1);
3067 }
3068
3069 static void incrbyCommand(redisClient *c) {
3070 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
3071 incrDecrCommand(c,incr);
3072 }
3073
3074 static void decrbyCommand(redisClient *c) {
3075 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
3076 incrDecrCommand(c,-incr);
3077 }
3078
3079 /* ========================= Type agnostic commands ========================= */
3080
3081 static void delCommand(redisClient *c) {
3082 int deleted = 0, j;
3083
3084 for (j = 1; j < c->argc; j++) {
3085 if (deleteKey(c->db,c->argv[j])) {
3086 server.dirty++;
3087 deleted++;
3088 }
3089 }
3090 switch(deleted) {
3091 case 0:
3092 addReply(c,shared.czero);
3093 break;
3094 case 1:
3095 addReply(c,shared.cone);
3096 break;
3097 default:
3098 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",deleted));
3099 break;
3100 }
3101 }
3102
3103 static void existsCommand(redisClient *c) {
3104 addReply(c,lookupKeyRead(c->db,c->argv[1]) ? shared.cone : shared.czero);
3105 }
3106
3107 static void selectCommand(redisClient *c) {
3108 int id = atoi(c->argv[1]->ptr);
3109
3110 if (selectDb(c,id) == REDIS_ERR) {
3111 addReplySds(c,sdsnew("-ERR invalid DB index\r\n"));
3112 } else {
3113 addReply(c,shared.ok);
3114 }
3115 }
3116
3117 static void randomkeyCommand(redisClient *c) {
3118 dictEntry *de;
3119
3120 while(1) {
3121 de = dictGetRandomKey(c->db->dict);
3122 if (!de || expireIfNeeded(c->db,dictGetEntryKey(de)) == 0) break;
3123 }
3124 if (de == NULL) {
3125 addReply(c,shared.plus);
3126 addReply(c,shared.crlf);
3127 } else {
3128 addReply(c,shared.plus);
3129 addReply(c,dictGetEntryKey(de));
3130 addReply(c,shared.crlf);
3131 }
3132 }
3133
3134 static void keysCommand(redisClient *c) {
3135 dictIterator *di;
3136 dictEntry *de;
3137 sds pattern = c->argv[1]->ptr;
3138 int plen = sdslen(pattern);
3139 int numkeys = 0, keyslen = 0;
3140 robj *lenobj = createObject(REDIS_STRING,NULL);
3141
3142 di = dictGetIterator(c->db->dict);
3143 addReply(c,lenobj);
3144 decrRefCount(lenobj);
3145 while((de = dictNext(di)) != NULL) {
3146 robj *keyobj = dictGetEntryKey(de);
3147
3148 sds key = keyobj->ptr;
3149 if ((pattern[0] == '*' && pattern[1] == '\0') ||
3150 stringmatchlen(pattern,plen,key,sdslen(key),0)) {
3151 if (expireIfNeeded(c->db,keyobj) == 0) {
3152 if (numkeys != 0)
3153 addReply(c,shared.space);
3154 addReply(c,keyobj);
3155 numkeys++;
3156 keyslen += sdslen(key);
3157 }
3158 }
3159 }
3160 dictReleaseIterator(di);
3161 lenobj->ptr = sdscatprintf(sdsempty(),"$%lu\r\n",keyslen+(numkeys ? (numkeys-1) : 0));
3162 addReply(c,shared.crlf);
3163 }
3164
3165 static void dbsizeCommand(redisClient *c) {
3166 addReplySds(c,
3167 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c->db->dict)));
3168 }
3169
3170 static void lastsaveCommand(redisClient *c) {
3171 addReplySds(c,
3172 sdscatprintf(sdsempty(),":%lu\r\n",server.lastsave));
3173 }
3174
3175 static void typeCommand(redisClient *c) {
3176 robj *o;
3177 char *type;
3178
3179 o = lookupKeyRead(c->db,c->argv[1]);
3180 if (o == NULL) {
3181 type = "+none";
3182 } else {
3183 switch(o->type) {
3184 case REDIS_STRING: type = "+string"; break;
3185 case REDIS_LIST: type = "+list"; break;
3186 case REDIS_SET: type = "+set"; break;
3187 case REDIS_ZSET: type = "+zset"; break;
3188 default: type = "unknown"; break;
3189 }
3190 }
3191 addReplySds(c,sdsnew(type));
3192 addReply(c,shared.crlf);
3193 }
3194
3195 static void saveCommand(redisClient *c) {
3196 if (server.bgsaveinprogress) {
3197 addReplySds(c,sdsnew("-ERR background save in progress\r\n"));
3198 return;
3199 }
3200 if (rdbSave(server.dbfilename) == REDIS_OK) {
3201 addReply(c,shared.ok);
3202 } else {
3203 addReply(c,shared.err);
3204 }
3205 }
3206
3207 static void bgsaveCommand(redisClient *c) {
3208 if (server.bgsaveinprogress) {
3209 addReplySds(c,sdsnew("-ERR background save already in progress\r\n"));
3210 return;
3211 }
3212 if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
3213 addReply(c,shared.ok);
3214 } else {
3215 addReply(c,shared.err);
3216 }
3217 }
3218
3219 static void shutdownCommand(redisClient *c) {
3220 redisLog(REDIS_WARNING,"User requested shutdown, saving DB...");
3221 /* Kill the saving child if there is a background saving in progress.
3222 We want to avoid race conditions, for instance our saving child may
3223 overwrite the synchronous saving did by SHUTDOWN. */
3224 if (server.bgsaveinprogress) {
3225 redisLog(REDIS_WARNING,"There is a live saving child. Killing it!");
3226 kill(server.bgsavechildpid,SIGKILL);
3227 rdbRemoveTempFile(server.bgsavechildpid);
3228 }
3229 /* SYNC SAVE */
3230 if (rdbSave(server.dbfilename) == REDIS_OK) {
3231 if (server.daemonize)
3232 unlink(server.pidfile);
3233 redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
3234 redisLog(REDIS_WARNING,"Server exit now, bye bye...");
3235 exit(1);
3236 } else {
3237 /* Ooops.. error saving! The best we can do is to continue operating.
3238 * Note that if there was a background saving process, in the next
3239 * cron() Redis will be notified that the background saving aborted,
3240 * handling special stuff like slaves pending for synchronization... */
3241 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit");
3242 addReplySds(c,sdsnew("-ERR can't quit, problems saving the DB\r\n"));
3243 }
3244 }
3245
3246 static void renameGenericCommand(redisClient *c, int nx) {
3247 robj *o;
3248
3249 /* To use the same key as src and dst is probably an error */
3250 if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
3251 addReply(c,shared.sameobjecterr);
3252 return;
3253 }
3254
3255 o = lookupKeyWrite(c->db,c->argv[1]);
3256 if (o == NULL) {
3257 addReply(c,shared.nokeyerr);
3258 return;
3259 }
3260 incrRefCount(o);
3261 deleteIfVolatile(c->db,c->argv[2]);
3262 if (dictAdd(c->db->dict,c->argv[2],o) == DICT_ERR) {
3263 if (nx) {
3264 decrRefCount(o);
3265 addReply(c,shared.czero);
3266 return;
3267 }
3268 dictReplace(c->db->dict,c->argv[2],o);
3269 } else {
3270 incrRefCount(c->argv[2]);
3271 }
3272 deleteKey(c->db,c->argv[1]);
3273 server.dirty++;
3274 addReply(c,nx ? shared.cone : shared.ok);
3275 }
3276
3277 static void renameCommand(redisClient *c) {
3278 renameGenericCommand(c,0);
3279 }
3280
3281 static void renamenxCommand(redisClient *c) {
3282 renameGenericCommand(c,1);
3283 }
3284
3285 static void moveCommand(redisClient *c) {
3286 robj *o;
3287 redisDb *src, *dst;
3288 int srcid;
3289
3290 /* Obtain source and target DB pointers */
3291 src = c->db;
3292 srcid = c->db->id;
3293 if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
3294 addReply(c,shared.outofrangeerr);
3295 return;
3296 }
3297 dst = c->db;
3298 selectDb(c,srcid); /* Back to the source DB */
3299
3300 /* If the user is moving using as target the same
3301 * DB as the source DB it is probably an error. */
3302 if (src == dst) {
3303 addReply(c,shared.sameobjecterr);
3304 return;
3305 }
3306
3307 /* Check if the element exists and get a reference */
3308 o = lookupKeyWrite(c->db,c->argv[1]);
3309 if (!o) {
3310 addReply(c,shared.czero);
3311 return;
3312 }
3313
3314 /* Try to add the element to the target DB */
3315 deleteIfVolatile(dst,c->argv[1]);
3316 if (dictAdd(dst->dict,c->argv[1],o) == DICT_ERR) {
3317 addReply(c,shared.czero);
3318 return;
3319 }
3320 incrRefCount(c->argv[1]);
3321 incrRefCount(o);
3322
3323 /* OK! key moved, free the entry in the source DB */
3324 deleteKey(src,c->argv[1]);
3325 server.dirty++;
3326 addReply(c,shared.cone);
3327 }
3328
3329 /* =================================== Lists ================================ */
3330 static void pushGenericCommand(redisClient *c, int where) {
3331 robj *lobj;
3332 list *list;
3333
3334 lobj = lookupKeyWrite(c->db,c->argv[1]);
3335 if (lobj == NULL) {
3336 lobj = createListObject();
3337 list = lobj->ptr;
3338 if (where == REDIS_HEAD) {
3339 listAddNodeHead(list,c->argv[2]);
3340 } else {
3341 listAddNodeTail(list,c->argv[2]);
3342 }
3343 dictAdd(c->db->dict,c->argv[1],lobj);
3344 incrRefCount(c->argv[1]);
3345 incrRefCount(c->argv[2]);
3346 } else {
3347 if (lobj->type != REDIS_LIST) {
3348 addReply(c,shared.wrongtypeerr);
3349 return;
3350 }
3351 list = lobj->ptr;
3352 if (where == REDIS_HEAD) {
3353 listAddNodeHead(list,c->argv[2]);
3354 } else {
3355 listAddNodeTail(list,c->argv[2]);
3356 }
3357 incrRefCount(c->argv[2]);
3358 }
3359 server.dirty++;
3360 addReply(c,shared.ok);
3361 }
3362
3363 static void lpushCommand(redisClient *c) {
3364 pushGenericCommand(c,REDIS_HEAD);
3365 }
3366
3367 static void rpushCommand(redisClient *c) {
3368 pushGenericCommand(c,REDIS_TAIL);
3369 }
3370
3371 static void llenCommand(redisClient *c) {
3372 robj *o;
3373 list *l;
3374
3375 o = lookupKeyRead(c->db,c->argv[1]);
3376 if (o == NULL) {
3377 addReply(c,shared.czero);
3378 return;
3379 } else {
3380 if (o->type != REDIS_LIST) {
3381 addReply(c,shared.wrongtypeerr);
3382 } else {
3383 l = o->ptr;
3384 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",listLength(l)));
3385 }
3386 }
3387 }
3388
3389 static void lindexCommand(redisClient *c) {
3390 robj *o;
3391 int index = atoi(c->argv[2]->ptr);
3392
3393 o = lookupKeyRead(c->db,c->argv[1]);
3394 if (o == NULL) {
3395 addReply(c,shared.nullbulk);
3396 } else {
3397 if (o->type != REDIS_LIST) {
3398 addReply(c,shared.wrongtypeerr);
3399 } else {
3400 list *list = o->ptr;
3401 listNode *ln;
3402
3403 ln = listIndex(list, index);
3404 if (ln == NULL) {
3405 addReply(c,shared.nullbulk);
3406 } else {
3407 robj *ele = listNodeValue(ln);
3408 addReplyBulkLen(c,ele);
3409 addReply(c,ele);
3410 addReply(c,shared.crlf);
3411 }
3412 }
3413 }
3414 }
3415
3416 static void lsetCommand(redisClient *c) {
3417 robj *o;
3418 int index = atoi(c->argv[2]->ptr);
3419
3420 o = lookupKeyWrite(c->db,c->argv[1]);
3421 if (o == NULL) {
3422 addReply(c,shared.nokeyerr);
3423 } else {
3424 if (o->type != REDIS_LIST) {
3425 addReply(c,shared.wrongtypeerr);
3426 } else {
3427 list *list = o->ptr;
3428 listNode *ln;
3429
3430 ln = listIndex(list, index);
3431 if (ln == NULL) {
3432 addReply(c,shared.outofrangeerr);
3433 } else {
3434 robj *ele = listNodeValue(ln);
3435
3436 decrRefCount(ele);
3437 listNodeValue(ln) = c->argv[3];
3438 incrRefCount(c->argv[3]);
3439 addReply(c,shared.ok);
3440 server.dirty++;
3441 }
3442 }
3443 }
3444 }
3445
3446 static void popGenericCommand(redisClient *c, int where) {
3447 robj *o;
3448
3449 o = lookupKeyWrite(c->db,c->argv[1]);
3450 if (o == NULL) {
3451 addReply(c,shared.nullbulk);
3452 } else {
3453 if (o->type != REDIS_LIST) {
3454 addReply(c,shared.wrongtypeerr);
3455 } else {
3456 list *list = o->ptr;
3457 listNode *ln;
3458
3459 if (where == REDIS_HEAD)
3460 ln = listFirst(list);
3461 else
3462 ln = listLast(list);
3463
3464 if (ln == NULL) {
3465 addReply(c,shared.nullbulk);
3466 } else {
3467 robj *ele = listNodeValue(ln);
3468 addReplyBulkLen(c,ele);
3469 addReply(c,ele);
3470 addReply(c,shared.crlf);
3471 listDelNode(list,ln);
3472 server.dirty++;
3473 }
3474 }
3475 }
3476 }
3477
3478 static void lpopCommand(redisClient *c) {
3479 popGenericCommand(c,REDIS_HEAD);
3480 }
3481
3482 static void rpopCommand(redisClient *c) {
3483 popGenericCommand(c,REDIS_TAIL);
3484 }
3485
3486 static void lrangeCommand(redisClient *c) {
3487 robj *o;
3488 int start = atoi(c->argv[2]->ptr);
3489 int end = atoi(c->argv[3]->ptr);
3490
3491 o = lookupKeyRead(c->db,c->argv[1]);
3492 if (o == NULL) {
3493 addReply(c,shared.nullmultibulk);
3494 } else {
3495 if (o->type != REDIS_LIST) {
3496 addReply(c,shared.wrongtypeerr);
3497 } else {
3498 list *list = o->ptr;
3499 listNode *ln;
3500 int llen = listLength(list);
3501 int rangelen, j;
3502 robj *ele;
3503
3504 /* convert negative indexes */
3505 if (start < 0) start = llen+start;
3506 if (end < 0) end = llen+end;
3507 if (start < 0) start = 0;
3508 if (end < 0) end = 0;
3509
3510 /* indexes sanity checks */
3511 if (start > end || start >= llen) {
3512 /* Out of range start or start > end result in empty list */
3513 addReply(c,shared.emptymultibulk);
3514 return;
3515 }
3516 if (end >= llen) end = llen-1;
3517 rangelen = (end-start)+1;
3518
3519 /* Return the result in form of a multi-bulk reply */
3520 ln = listIndex(list, start);
3521 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
3522 for (j = 0; j < rangelen; j++) {
3523 ele = listNodeValue(ln);
3524 addReplyBulkLen(c,ele);
3525 addReply(c,ele);
3526 addReply(c,shared.crlf);
3527 ln = ln->next;
3528 }
3529 }
3530 }
3531 }
3532
3533 static void ltrimCommand(redisClient *c) {
3534 robj *o;
3535 int start = atoi(c->argv[2]->ptr);
3536 int end = atoi(c->argv[3]->ptr);
3537
3538 o = lookupKeyWrite(c->db,c->argv[1]);
3539 if (o == NULL) {
3540 addReply(c,shared.nokeyerr);
3541 } else {
3542 if (o->type != REDIS_LIST) {
3543 addReply(c,shared.wrongtypeerr);
3544 } else {
3545 list *list = o->ptr;
3546 listNode *ln;
3547 int llen = listLength(list);
3548 int j, ltrim, rtrim;
3549
3550 /* convert negative indexes */
3551 if (start < 0) start = llen+start;
3552 if (end < 0) end = llen+end;
3553 if (start < 0) start = 0;
3554 if (end < 0) end = 0;
3555
3556 /* indexes sanity checks */
3557 if (start > end || start >= llen) {
3558 /* Out of range start or start > end result in empty list */
3559 ltrim = llen;
3560 rtrim = 0;
3561 } else {
3562 if (end >= llen) end = llen-1;
3563 ltrim = start;
3564 rtrim = llen-end-1;
3565 }
3566
3567 /* Remove list elements to perform the trim */
3568 for (j = 0; j < ltrim; j++) {
3569 ln = listFirst(list);
3570 listDelNode(list,ln);
3571 }
3572 for (j = 0; j < rtrim; j++) {
3573 ln = listLast(list);
3574 listDelNode(list,ln);
3575 }
3576 server.dirty++;
3577 addReply(c,shared.ok);
3578 }
3579 }
3580 }
3581
3582 static void lremCommand(redisClient *c) {
3583 robj *o;
3584
3585 o = lookupKeyWrite(c->db,c->argv[1]);
3586 if (o == NULL) {
3587 addReply(c,shared.czero);
3588 } else {
3589 if (o->type != REDIS_LIST) {
3590 addReply(c,shared.wrongtypeerr);
3591 } else {
3592 list *list = o->ptr;
3593 listNode *ln, *next;
3594 int toremove = atoi(c->argv[2]->ptr);
3595 int removed = 0;
3596 int fromtail = 0;
3597
3598 if (toremove < 0) {
3599 toremove = -toremove;
3600 fromtail = 1;
3601 }
3602 ln = fromtail ? list->tail : list->head;
3603 while (ln) {
3604 robj *ele = listNodeValue(ln);
3605
3606 next = fromtail ? ln->prev : ln->next;
3607 if (compareStringObjects(ele,c->argv[3]) == 0) {
3608 listDelNode(list,ln);
3609 server.dirty++;
3610 removed++;
3611 if (toremove && removed == toremove) break;
3612 }
3613 ln = next;
3614 }
3615 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",removed));
3616 }
3617 }
3618 }
3619
3620 /* This is the semantic of this command:
3621 * RPOPLPUSH srclist dstlist:
3622 * IF LLEN(srclist) > 0
3623 * element = RPOP srclist
3624 * LPUSH dstlist element
3625 * RETURN element
3626 * ELSE
3627 * RETURN nil
3628 * END
3629 * END
3630 *
3631 * The idea is to be able to get an element from a list in a reliable way
3632 * since the element is not just returned but pushed against another list
3633 * as well. This command was originally proposed by Ezra Zygmuntowicz.
3634 */
3635 static void rpoplpushcommand(redisClient *c) {
3636 robj *sobj;
3637
3638 sobj = lookupKeyWrite(c->db,c->argv[1]);
3639 if (sobj == NULL) {
3640 addReply(c,shared.nullbulk);
3641 } else {
3642 if (sobj->type != REDIS_LIST) {
3643 addReply(c,shared.wrongtypeerr);
3644 } else {
3645 list *srclist = sobj->ptr;
3646 listNode *ln = listLast(srclist);
3647
3648 if (ln == NULL) {
3649 addReply(c,shared.nullbulk);
3650 } else {
3651 robj *dobj = lookupKeyWrite(c->db,c->argv[2]);
3652 robj *ele = listNodeValue(ln);
3653 list *dstlist;
3654
3655 if (dobj == NULL) {
3656
3657 /* Create the list if the key does not exist */
3658 dobj = createListObject();
3659 dictAdd(c->db->dict,c->argv[2],dobj);
3660 incrRefCount(c->argv[2]);
3661 } else if (dobj->type != REDIS_LIST) {
3662 addReply(c,shared.wrongtypeerr);
3663 return;
3664 }
3665 /* Add the element to the target list */
3666 dstlist = dobj->ptr;
3667 listAddNodeHead(dstlist,ele);
3668 incrRefCount(ele);
3669
3670 /* Send the element to the client as reply as well */
3671 addReplyBulkLen(c,ele);
3672 addReply(c,ele);
3673 addReply(c,shared.crlf);
3674
3675 /* Finally remove the element from the source list */
3676 listDelNode(srclist,ln);
3677 server.dirty++;
3678 }
3679 }
3680 }
3681 }
3682
3683
3684 /* ==================================== Sets ================================ */
3685
3686 static void saddCommand(redisClient *c) {
3687 robj *set;
3688
3689 set = lookupKeyWrite(c->db,c->argv[1]);
3690 if (set == NULL) {
3691 set = createSetObject();
3692 dictAdd(c->db->dict,c->argv[1],set);
3693 incrRefCount(c->argv[1]);
3694 } else {
3695 if (set->type != REDIS_SET) {
3696 addReply(c,shared.wrongtypeerr);
3697 return;
3698 }
3699 }
3700 if (dictAdd(set->ptr,c->argv[2],NULL) == DICT_OK) {
3701 incrRefCount(c->argv[2]);
3702 server.dirty++;
3703 addReply(c,shared.cone);
3704 } else {
3705 addReply(c,shared.czero);
3706 }
3707 }
3708
3709 static void sremCommand(redisClient *c) {
3710 robj *set;
3711
3712 set = lookupKeyWrite(c->db,c->argv[1]);
3713 if (set == NULL) {
3714 addReply(c,shared.czero);
3715 } else {
3716 if (set->type != REDIS_SET) {
3717 addReply(c,shared.wrongtypeerr);
3718 return;
3719 }
3720 if (dictDelete(set->ptr,c->argv[2]) == DICT_OK) {
3721 server.dirty++;
3722 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
3723 addReply(c,shared.cone);
3724 } else {
3725 addReply(c,shared.czero);
3726 }
3727 }
3728 }
3729
3730 static void smoveCommand(redisClient *c) {
3731 robj *srcset, *dstset;
3732
3733 srcset = lookupKeyWrite(c->db,c->argv[1]);
3734 dstset = lookupKeyWrite(c->db,c->argv[2]);
3735
3736 /* If the source key does not exist return 0, if it's of the wrong type
3737 * raise an error */
3738 if (srcset == NULL || srcset->type != REDIS_SET) {
3739 addReply(c, srcset ? shared.wrongtypeerr : shared.czero);
3740 return;
3741 }
3742 /* Error if the destination key is not a set as well */
3743 if (dstset && dstset->type != REDIS_SET) {
3744 addReply(c,shared.wrongtypeerr);
3745 return;
3746 }
3747 /* Remove the element from the source set */
3748 if (dictDelete(srcset->ptr,c->argv[3]) == DICT_ERR) {
3749 /* Key not found in the src set! return zero */
3750 addReply(c,shared.czero);
3751 return;
3752 }
3753 server.dirty++;
3754 /* Add the element to the destination set */
3755 if (!dstset) {
3756 dstset = createSetObject();
3757 dictAdd(c->db->dict,c->argv[2],dstset);
3758 incrRefCount(c->argv[2]);
3759 }
3760 if (dictAdd(dstset->ptr,c->argv[3],NULL) == DICT_OK)
3761 incrRefCount(c->argv[3]);
3762 addReply(c,shared.cone);
3763 }
3764
3765 static void sismemberCommand(redisClient *c) {
3766 robj *set;
3767
3768 set = lookupKeyRead(c->db,c->argv[1]);
3769 if (set == NULL) {
3770 addReply(c,shared.czero);
3771 } else {
3772 if (set->type != REDIS_SET) {
3773 addReply(c,shared.wrongtypeerr);
3774 return;
3775 }
3776 if (dictFind(set->ptr,c->argv[2]))
3777 addReply(c,shared.cone);
3778 else
3779 addReply(c,shared.czero);
3780 }
3781 }
3782
3783 static void scardCommand(redisClient *c) {
3784 robj *o;
3785 dict *s;
3786
3787 o = lookupKeyRead(c->db,c->argv[1]);
3788 if (o == NULL) {
3789 addReply(c,shared.czero);
3790 return;
3791 } else {
3792 if (o->type != REDIS_SET) {
3793 addReply(c,shared.wrongtypeerr);
3794 } else {
3795 s = o->ptr;
3796 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3797 dictSize(s)));
3798 }
3799 }
3800 }
3801
3802 static void spopCommand(redisClient *c) {
3803 robj *set;
3804 dictEntry *de;
3805
3806 set = lookupKeyWrite(c->db,c->argv[1]);
3807 if (set == NULL) {
3808 addReply(c,shared.nullbulk);
3809 } else {
3810 if (set->type != REDIS_SET) {
3811 addReply(c,shared.wrongtypeerr);
3812 return;
3813 }
3814 de = dictGetRandomKey(set->ptr);
3815 if (de == NULL) {
3816 addReply(c,shared.nullbulk);
3817 } else {
3818 robj *ele = dictGetEntryKey(de);
3819
3820 addReplyBulkLen(c,ele);
3821 addReply(c,ele);
3822 addReply(c,shared.crlf);
3823 dictDelete(set->ptr,ele);
3824 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
3825 server.dirty++;
3826 }
3827 }
3828 }
3829
3830 static void srandmemberCommand(redisClient *c) {
3831 robj *set;
3832 dictEntry *de;
3833
3834 set = lookupKeyRead(c->db,c->argv[1]);
3835 if (set == NULL) {
3836 addReply(c,shared.nullbulk);
3837 } else {
3838 if (set->type != REDIS_SET) {
3839 addReply(c,shared.wrongtypeerr);
3840 return;
3841 }
3842 de = dictGetRandomKey(set->ptr);
3843 if (de == NULL) {
3844 addReply(c,shared.nullbulk);
3845 } else {
3846 robj *ele = dictGetEntryKey(de);
3847
3848 addReplyBulkLen(c,ele);
3849 addReply(c,ele);
3850 addReply(c,shared.crlf);
3851 }
3852 }
3853 }
3854
3855 static int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
3856 dict **d1 = (void*) s1, **d2 = (void*) s2;
3857
3858 return dictSize(*d1)-dictSize(*d2);
3859 }
3860
3861 static void sinterGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey) {
3862 dict **dv = zmalloc(sizeof(dict*)*setsnum);
3863 dictIterator *di;
3864 dictEntry *de;
3865 robj *lenobj = NULL, *dstset = NULL;
3866 int j, cardinality = 0;
3867
3868 for (j = 0; j < setsnum; j++) {
3869 robj *setobj;
3870
3871 setobj = dstkey ?
3872 lookupKeyWrite(c->db,setskeys[j]) :
3873 lookupKeyRead(c->db,setskeys[j]);
3874 if (!setobj) {
3875 zfree(dv);
3876 if (dstkey) {
3877 deleteKey(c->db,dstkey);
3878 addReply(c,shared.ok);
3879 } else {
3880 addReply(c,shared.nullmultibulk);
3881 }
3882 return;
3883 }
3884 if (setobj->type != REDIS_SET) {
3885 zfree(dv);
3886 addReply(c,shared.wrongtypeerr);
3887 return;
3888 }
3889 dv[j] = setobj->ptr;
3890 }
3891 /* Sort sets from the smallest to largest, this will improve our
3892 * algorithm's performace */
3893 qsort(dv,setsnum,sizeof(dict*),qsortCompareSetsByCardinality);
3894
3895 /* The first thing we should output is the total number of elements...
3896 * since this is a multi-bulk write, but at this stage we don't know
3897 * the intersection set size, so we use a trick, append an empty object
3898 * to the output list and save the pointer to later modify it with the
3899 * right length */
3900 if (!dstkey) {
3901 lenobj = createObject(REDIS_STRING,NULL);
3902 addReply(c,lenobj);
3903 decrRefCount(lenobj);
3904 } else {
3905 /* If we have a target key where to store the resulting set
3906 * create this key with an empty set inside */
3907 dstset = createSetObject();
3908 }
3909
3910 /* Iterate all the elements of the first (smallest) set, and test
3911 * the element against all the other sets, if at least one set does
3912 * not include the element it is discarded */
3913 di = dictGetIterator(dv[0]);
3914
3915 while((de = dictNext(di)) != NULL) {
3916 robj *ele;
3917
3918 for (j = 1; j < setsnum; j++)
3919 if (dictFind(dv[j],dictGetEntryKey(de)) == NULL) break;
3920 if (j != setsnum)
3921 continue; /* at least one set does not contain the member */
3922 ele = dictGetEntryKey(de);
3923 if (!dstkey) {
3924 addReplyBulkLen(c,ele);
3925 addReply(c,ele);
3926 addReply(c,shared.crlf);
3927 cardinality++;
3928 } else {
3929 dictAdd(dstset->ptr,ele,NULL);
3930 incrRefCount(ele);
3931 }
3932 }
3933 dictReleaseIterator(di);
3934
3935 if (dstkey) {
3936 /* Store the resulting set into the target */
3937 deleteKey(c->db,dstkey);
3938 dictAdd(c->db->dict,dstkey,dstset);
3939 incrRefCount(dstkey);
3940 }
3941
3942 if (!dstkey) {
3943 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",cardinality);
3944 } else {
3945 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3946 dictSize((dict*)dstset->ptr)));
3947 server.dirty++;
3948 }
3949 zfree(dv);
3950 }
3951
3952 static void sinterCommand(redisClient *c) {
3953 sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
3954 }
3955
3956 static void sinterstoreCommand(redisClient *c) {
3957 sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
3958 }
3959
3960 #define REDIS_OP_UNION 0
3961 #define REDIS_OP_DIFF 1
3962
3963 static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey, int op) {
3964 dict **dv = zmalloc(sizeof(dict*)*setsnum);
3965 dictIterator *di;
3966 dictEntry *de;
3967 robj *dstset = NULL;
3968 int j, cardinality = 0;
3969
3970 for (j = 0; j < setsnum; j++) {
3971 robj *setobj;
3972
3973 setobj = dstkey ?
3974 lookupKeyWrite(c->db,setskeys[j]) :
3975 lookupKeyRead(c->db,setskeys[j]);
3976 if (!setobj) {
3977 dv[j] = NULL;
3978 continue;
3979 }
3980 if (setobj->type != REDIS_SET) {
3981 zfree(dv);
3982 addReply(c,shared.wrongtypeerr);
3983 return;
3984 }
3985 dv[j] = setobj->ptr;
3986 }
3987
3988 /* We need a temp set object to store our union. If the dstkey
3989 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
3990 * this set object will be the resulting object to set into the target key*/
3991 dstset = createSetObject();
3992
3993 /* Iterate all the elements of all the sets, add every element a single
3994 * time to the result set */
3995 for (j = 0; j < setsnum; j++) {
3996 if (op == REDIS_OP_DIFF && j == 0 && !dv[j]) break; /* result set is empty */
3997 if (!dv[j]) continue; /* non existing keys are like empty sets */
3998
3999 di = dictGetIterator(dv[j]);
4000
4001 while((de = dictNext(di)) != NULL) {
4002 robj *ele;
4003
4004 /* dictAdd will not add the same element multiple times */
4005 ele = dictGetEntryKey(de);
4006 if (op == REDIS_OP_UNION || j == 0) {
4007 if (dictAdd(dstset->ptr,ele,NULL) == DICT_OK) {
4008 incrRefCount(ele);
4009 cardinality++;
4010 }
4011 } else if (op == REDIS_OP_DIFF) {
4012 if (dictDelete(dstset->ptr,ele) == DICT_OK) {
4013 cardinality--;
4014 }
4015 }
4016 }
4017 dictReleaseIterator(di);
4018
4019 if (op == REDIS_OP_DIFF && cardinality == 0) break; /* result set is empty */
4020 }
4021
4022 /* Output the content of the resulting set, if not in STORE mode */
4023 if (!dstkey) {
4024 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality));
4025 di = dictGetIterator(dstset->ptr);
4026 while((de = dictNext(di)) != NULL) {
4027 robj *ele;
4028
4029 ele = dictGetEntryKey(de);
4030 addReplyBulkLen(c,ele);
4031 addReply(c,ele);
4032 addReply(c,shared.crlf);
4033 }
4034 dictReleaseIterator(di);
4035 } else {
4036 /* If we have a target key where to store the resulting set
4037 * create this key with the result set inside */
4038 deleteKey(c->db,dstkey);
4039 dictAdd(c->db->dict,dstkey,dstset);
4040 incrRefCount(dstkey);
4041 }
4042
4043 /* Cleanup */
4044 if (!dstkey) {
4045 decrRefCount(dstset);
4046 } else {
4047 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
4048 dictSize((dict*)dstset->ptr)));
4049 server.dirty++;
4050 }
4051 zfree(dv);
4052 }
4053
4054 static void sunionCommand(redisClient *c) {
4055 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_UNION);
4056 }
4057
4058 static void sunionstoreCommand(redisClient *c) {
4059 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_UNION);
4060 }
4061
4062 static void sdiffCommand(redisClient *c) {
4063 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_DIFF);
4064 }
4065
4066 static void sdiffstoreCommand(redisClient *c) {
4067 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
4068 }
4069
4070 /* ==================================== ZSets =============================== */
4071
4072 /* ZSETs are ordered sets using two data structures to hold the same elements
4073 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4074 * data structure.
4075 *
4076 * The elements are added to an hash table mapping Redis objects to scores.
4077 * At the same time the elements are added to a skip list mapping scores
4078 * to Redis objects (so objects are sorted by scores in this "view"). */
4079
4080 /* This skiplist implementation is almost a C translation of the original
4081 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
4082 * Alternative to Balanced Trees", modified in three ways:
4083 * a) this implementation allows for repeated values.
4084 * b) the comparison is not just by key (our 'score') but by satellite data.
4085 * c) there is a back pointer, so it's a doubly linked list with the back
4086 * pointers being only at "level 1". This allows to traverse the list
4087 * from tail to head, useful for ZREVRANGE. */
4088
4089 static zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
4090 zskiplistNode *zn = zmalloc(sizeof(*zn));
4091
4092 zn->forward = zmalloc(sizeof(zskiplistNode*) * level);
4093 zn->score = score;
4094 zn->obj = obj;
4095 return zn;
4096 }
4097
4098 static zskiplist *zslCreate(void) {
4099 int j;
4100 zskiplist *zsl;
4101
4102 zsl = zmalloc(sizeof(*zsl));
4103 zsl->level = 1;
4104 zsl->length = 0;
4105 zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
4106 for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++)
4107 zsl->header->forward[j] = NULL;
4108 zsl->header->backward = NULL;
4109 zsl->tail = NULL;
4110 return zsl;
4111 }
4112
4113 static void zslFreeNode(zskiplistNode *node) {
4114 decrRefCount(node->obj);
4115 zfree(node->forward);
4116 zfree(node);
4117 }
4118
4119 static void zslFree(zskiplist *zsl) {
4120 zskiplistNode *node = zsl->header->forward[0], *next;
4121
4122 zfree(zsl->header->forward);
4123 zfree(zsl->header);
4124 while(node) {
4125 next = node->forward[0];
4126 zslFreeNode(node);
4127 node = next;
4128 }
4129 zfree(zsl);
4130 }
4131
4132 static int zslRandomLevel(void) {
4133 int level = 1;
4134 while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
4135 level += 1;
4136 return level;
4137 }
4138
4139 static void zslInsert(zskiplist *zsl, double score, robj *obj) {
4140 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4141 int i, level;
4142
4143 x = zsl->header;
4144 for (i = zsl->level-1; i >= 0; i--) {
4145 while (x->forward[i] &&
4146 (x->forward[i]->score < score ||
4147 (x->forward[i]->score == score &&
4148 compareStringObjects(x->forward[i]->obj,obj) < 0)))
4149 x = x->forward[i];
4150 update[i] = x;
4151 }
4152 /* we assume the key is not already inside, since we allow duplicated
4153 * scores, and the re-insertion of score and redis object should never
4154 * happpen since the caller of zslInsert() should test in the hash table
4155 * if the element is already inside or not. */
4156 level = zslRandomLevel();
4157 if (level > zsl->level) {
4158 for (i = zsl->level; i < level; i++)
4159 update[i] = zsl->header;
4160 zsl->level = level;
4161 }
4162 x = zslCreateNode(level,score,obj);
4163 for (i = 0; i < level; i++) {
4164 x->forward[i] = update[i]->forward[i];
4165 update[i]->forward[i] = x;
4166 }
4167 x->backward = (update[0] == zsl->header) ? NULL : update[0];
4168 if (x->forward[0])
4169 x->forward[0]->backward = x;
4170 else
4171 zsl->tail = x;
4172 zsl->length++;
4173 }
4174
4175 /* Delete an element with matching score/object from the skiplist. */
4176 static int zslDelete(zskiplist *zsl, double score, robj *obj) {
4177 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4178 int i;
4179
4180 x = zsl->header;
4181 for (i = zsl->level-1; i >= 0; i--) {
4182 while (x->forward[i] &&
4183 (x->forward[i]->score < score ||
4184 (x->forward[i]->score == score &&
4185 compareStringObjects(x->forward[i]->obj,obj) < 0)))
4186 x = x->forward[i];
4187 update[i] = x;
4188 }
4189 /* We may have multiple elements with the same score, what we need
4190 * is to find the element with both the right score and object. */
4191 x = x->forward[0];
4192 if (x && score == x->score && compareStringObjects(x->obj,obj) == 0) {
4193 for (i = 0; i < zsl->level; i++) {
4194 if (update[i]->forward[i] != x) break;
4195 update[i]->forward[i] = x->forward[i];
4196 }
4197 if (x->forward[0]) {
4198 x->forward[0]->backward = (x->backward == zsl->header) ?
4199 NULL : x->backward;
4200 } else {
4201 zsl->tail = x->backward;
4202 }
4203 zslFreeNode(x);
4204 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4205 zsl->level--;
4206 zsl->length--;
4207 return 1;
4208 } else {
4209 return 0; /* not found */
4210 }
4211 return 0; /* not found */
4212 }
4213
4214 /* Delete all the elements with score between min and max from the skiplist.
4215 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
4216 * Note that this function takes the reference to the hash table view of the
4217 * sorted set, in order to remove the elements from the hash table too. */
4218 static unsigned long zslDeleteRange(zskiplist *zsl, double min, double max, dict *dict) {
4219 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4220 unsigned long removed = 0;
4221 int i;
4222
4223 x = zsl->header;
4224 for (i = zsl->level-1; i >= 0; i--) {
4225 while (x->forward[i] && x->forward[i]->score < min)
4226 x = x->forward[i];
4227 update[i] = x;
4228 }
4229 /* We may have multiple elements with the same score, what we need
4230 * is to find the element with both the right score and object. */
4231 x = x->forward[0];
4232 while (x && x->score <= max) {
4233 zskiplistNode *next;
4234
4235 for (i = 0; i < zsl->level; i++) {
4236 if (update[i]->forward[i] != x) break;
4237 update[i]->forward[i] = x->forward[i];
4238 }
4239 if (x->forward[0]) {
4240 x->forward[0]->backward = (x->backward == zsl->header) ?
4241 NULL : x->backward;
4242 } else {
4243 zsl->tail = x->backward;
4244 }
4245 next = x->forward[0];
4246 dictDelete(dict,x->obj);
4247 zslFreeNode(x);
4248 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4249 zsl->level--;
4250 zsl->length--;
4251 removed++;
4252 x = next;
4253 }
4254 return removed; /* not found */
4255 }
4256
4257 /* Find the first node having a score equal or greater than the specified one.
4258 * Returns NULL if there is no match. */
4259 static zskiplistNode *zslFirstWithScore(zskiplist *zsl, double score) {
4260 zskiplistNode *x;
4261 int i;
4262
4263 x = zsl->header;
4264 for (i = zsl->level-1; i >= 0; i--) {
4265 while (x->forward[i] && x->forward[i]->score < score)
4266 x = x->forward[i];
4267 }
4268 /* We may have multiple elements with the same score, what we need
4269 * is to find the element with both the right score and object. */
4270 return x->forward[0];
4271 }
4272
4273 /* The actual Z-commands implementations */
4274
4275 /* This generic command implements both ZADD and ZINCRBY.
4276 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
4277 * the increment if the operation is a ZINCRBY (doincrement == 1). */
4278 static void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double scoreval, int doincrement) {
4279 robj *zsetobj;
4280 zset *zs;
4281 double *score;
4282
4283 zsetobj = lookupKeyWrite(c->db,key);
4284 if (zsetobj == NULL) {
4285 zsetobj = createZsetObject();
4286 dictAdd(c->db->dict,key,zsetobj);
4287 incrRefCount(key);
4288 } else {
4289 if (zsetobj->type != REDIS_ZSET) {
4290 addReply(c,shared.wrongtypeerr);
4291 return;
4292 }
4293 }
4294 zs = zsetobj->ptr;
4295
4296 /* Ok now since we implement both ZADD and ZINCRBY here the code
4297 * needs to handle the two different conditions. It's all about setting
4298 * '*score', that is, the new score to set, to the right value. */
4299 score = zmalloc(sizeof(double));
4300 if (doincrement) {
4301 dictEntry *de;
4302
4303 /* Read the old score. If the element was not present starts from 0 */
4304 de = dictFind(zs->dict,ele);
4305 if (de) {
4306 double *oldscore = dictGetEntryVal(de);
4307 *score = *oldscore + scoreval;
4308 } else {
4309 *score = scoreval;
4310 }
4311 } else {
4312 *score = scoreval;
4313 }
4314
4315 /* What follows is a simple remove and re-insert operation that is common
4316 * to both ZADD and ZINCRBY... */
4317 if (dictAdd(zs->dict,ele,score) == DICT_OK) {
4318 /* case 1: New element */
4319 incrRefCount(ele); /* added to hash */
4320 zslInsert(zs->zsl,*score,ele);
4321 incrRefCount(ele); /* added to skiplist */
4322 server.dirty++;
4323 if (doincrement)
4324 addReplyDouble(c,*score);
4325 else
4326 addReply(c,shared.cone);
4327 } else {
4328 dictEntry *de;
4329 double *oldscore;
4330
4331 /* case 2: Score update operation */
4332 de = dictFind(zs->dict,ele);
4333 assert(de != NULL);
4334 oldscore = dictGetEntryVal(de);
4335 if (*score != *oldscore) {
4336 int deleted;
4337
4338 /* Remove and insert the element in the skip list with new score */
4339 deleted = zslDelete(zs->zsl,*oldscore,ele);
4340 assert(deleted != 0);
4341 zslInsert(zs->zsl,*score,ele);
4342 incrRefCount(ele);
4343 /* Update the score in the hash table */
4344 dictReplace(zs->dict,ele,score);
4345 server.dirty++;
4346 } else {
4347 zfree(score);
4348 }
4349 if (doincrement)
4350 addReplyDouble(c,*score);
4351 else
4352 addReply(c,shared.czero);
4353 }
4354 }
4355
4356 static void zaddCommand(redisClient *c) {
4357 double scoreval;
4358
4359 scoreval = strtod(c->argv[2]->ptr,NULL);
4360 zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,0);
4361 }
4362
4363 static void zincrbyCommand(redisClient *c) {
4364 double scoreval;
4365
4366 scoreval = strtod(c->argv[2]->ptr,NULL);
4367 zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,1);
4368 }
4369
4370 static void zremCommand(redisClient *c) {
4371 robj *zsetobj;
4372 zset *zs;
4373
4374 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4375 if (zsetobj == NULL) {
4376 addReply(c,shared.czero);
4377 } else {
4378 dictEntry *de;
4379 double *oldscore;
4380 int deleted;
4381
4382 if (zsetobj->type != REDIS_ZSET) {
4383 addReply(c,shared.wrongtypeerr);
4384 return;
4385 }
4386 zs = zsetobj->ptr;
4387 de = dictFind(zs->dict,c->argv[2]);
4388 if (de == NULL) {
4389 addReply(c,shared.czero);
4390 return;
4391 }
4392 /* Delete from the skiplist */
4393 oldscore = dictGetEntryVal(de);
4394 deleted = zslDelete(zs->zsl,*oldscore,c->argv[2]);
4395 assert(deleted != 0);
4396
4397 /* Delete from the hash table */
4398 dictDelete(zs->dict,c->argv[2]);
4399 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4400 server.dirty++;
4401 addReply(c,shared.cone);
4402 }
4403 }
4404
4405 static void zremrangebyscoreCommand(redisClient *c) {
4406 double min = strtod(c->argv[2]->ptr,NULL);
4407 double max = strtod(c->argv[3]->ptr,NULL);
4408 robj *zsetobj;
4409 zset *zs;
4410
4411 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4412 if (zsetobj == NULL) {
4413 addReply(c,shared.czero);
4414 } else {
4415 long deleted;
4416
4417 if (zsetobj->type != REDIS_ZSET) {
4418 addReply(c,shared.wrongtypeerr);
4419 return;
4420 }
4421 zs = zsetobj->ptr;
4422 deleted = zslDeleteRange(zs->zsl,min,max,zs->dict);
4423 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4424 server.dirty += deleted;
4425 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",deleted));
4426 }
4427 }
4428
4429 static void zrangeGenericCommand(redisClient *c, int reverse) {
4430 robj *o;
4431 int start = atoi(c->argv[2]->ptr);
4432 int end = atoi(c->argv[3]->ptr);
4433
4434 o = lookupKeyRead(c->db,c->argv[1]);
4435 if (o == NULL) {
4436 addReply(c,shared.nullmultibulk);
4437 } else {
4438 if (o->type != REDIS_ZSET) {
4439 addReply(c,shared.wrongtypeerr);
4440 } else {
4441 zset *zsetobj = o->ptr;
4442 zskiplist *zsl = zsetobj->zsl;
4443 zskiplistNode *ln;
4444
4445 int llen = zsl->length;
4446 int rangelen, j;
4447 robj *ele;
4448
4449 /* convert negative indexes */
4450 if (start < 0) start = llen+start;
4451 if (end < 0) end = llen+end;
4452 if (start < 0) start = 0;
4453 if (end < 0) end = 0;
4454
4455 /* indexes sanity checks */
4456 if (start > end || start >= llen) {
4457 /* Out of range start or start > end result in empty list */
4458 addReply(c,shared.emptymultibulk);
4459 return;
4460 }
4461 if (end >= llen) end = llen-1;
4462 rangelen = (end-start)+1;
4463
4464 /* Return the result in form of a multi-bulk reply */
4465 if (reverse) {
4466 ln = zsl->tail;
4467 while (start--)
4468 ln = ln->backward;
4469 } else {
4470 ln = zsl->header->forward[0];
4471 while (start--)
4472 ln = ln->forward[0];
4473 }
4474
4475 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
4476 for (j = 0; j < rangelen; j++) {
4477 ele = ln->obj;
4478 addReplyBulkLen(c,ele);
4479 addReply(c,ele);
4480 addReply(c,shared.crlf);
4481 ln = reverse ? ln->backward : ln->forward[0];
4482 }
4483 }
4484 }
4485 }
4486
4487 static void zrangeCommand(redisClient *c) {
4488 zrangeGenericCommand(c,0);
4489 }
4490
4491 static void zrevrangeCommand(redisClient *c) {
4492 zrangeGenericCommand(c,1);
4493 }
4494
4495 static void zrangebyscoreCommand(redisClient *c) {
4496 robj *o;
4497 double min = strtod(c->argv[2]->ptr,NULL);
4498 double max = strtod(c->argv[3]->ptr,NULL);
4499
4500 o = lookupKeyRead(c->db,c->argv[1]);
4501 if (o == NULL) {
4502 addReply(c,shared.nullmultibulk);
4503 } else {
4504 if (o->type != REDIS_ZSET) {
4505 addReply(c,shared.wrongtypeerr);
4506 } else {
4507 zset *zsetobj = o->ptr;
4508 zskiplist *zsl = zsetobj->zsl;
4509 zskiplistNode *ln;
4510 robj *ele, *lenobj;
4511 unsigned int rangelen = 0;
4512
4513 /* Get the first node with the score >= min */
4514 ln = zslFirstWithScore(zsl,min);
4515 if (ln == NULL) {
4516 /* No element matching the speciifed interval */
4517 addReply(c,shared.emptymultibulk);
4518 return;
4519 }
4520
4521 /* We don't know in advance how many matching elements there
4522 * are in the list, so we push this object that will represent
4523 * the multi-bulk length in the output buffer, and will "fix"
4524 * it later */
4525 lenobj = createObject(REDIS_STRING,NULL);
4526 addReply(c,lenobj);
4527
4528 while(ln && ln->score <= max) {
4529 ele = ln->obj;
4530 addReplyBulkLen(c,ele);
4531 addReply(c,ele);
4532 addReply(c,shared.crlf);
4533 ln = ln->forward[0];
4534 rangelen++;
4535 }
4536 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",rangelen);
4537 }
4538 }
4539 }
4540
4541 static void zcardCommand(redisClient *c) {
4542 robj *o;
4543 zset *zs;
4544
4545 o = lookupKeyRead(c->db,c->argv[1]);
4546 if (o == NULL) {
4547 addReply(c,shared.czero);
4548 return;
4549 } else {
4550 if (o->type != REDIS_ZSET) {
4551 addReply(c,shared.wrongtypeerr);
4552 } else {
4553 zs = o->ptr;
4554 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",zs->zsl->length));
4555 }
4556 }
4557 }
4558
4559 static void zscoreCommand(redisClient *c) {
4560 robj *o;
4561 zset *zs;
4562
4563 o = lookupKeyRead(c->db,c->argv[1]);
4564 if (o == NULL) {
4565 addReply(c,shared.nullbulk);
4566 return;
4567 } else {
4568 if (o->type != REDIS_ZSET) {
4569 addReply(c,shared.wrongtypeerr);
4570 } else {
4571 dictEntry *de;
4572
4573 zs = o->ptr;
4574 de = dictFind(zs->dict,c->argv[2]);
4575 if (!de) {
4576 addReply(c,shared.nullbulk);
4577 } else {
4578 double *score = dictGetEntryVal(de);
4579
4580 addReplyDouble(c,*score);
4581 }
4582 }
4583 }
4584 }
4585
4586 /* ========================= Non type-specific commands ==================== */
4587
4588 static void flushdbCommand(redisClient *c) {
4589 server.dirty += dictSize(c->db->dict);
4590 dictEmpty(c->db->dict);
4591 dictEmpty(c->db->expires);
4592 addReply(c,shared.ok);
4593 }
4594
4595 static void flushallCommand(redisClient *c) {
4596 server.dirty += emptyDb();
4597 addReply(c,shared.ok);
4598 rdbSave(server.dbfilename);
4599 server.dirty++;
4600 }
4601
4602 static redisSortOperation *createSortOperation(int type, robj *pattern) {
4603 redisSortOperation *so = zmalloc(sizeof(*so));
4604 so->type = type;
4605 so->pattern = pattern;
4606 return so;
4607 }
4608
4609 /* Return the value associated to the key with a name obtained
4610 * substituting the first occurence of '*' in 'pattern' with 'subst' */
4611 static robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
4612 char *p;
4613 sds spat, ssub;
4614 robj keyobj;
4615 int prefixlen, sublen, postfixlen;
4616 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
4617 struct {
4618 long len;
4619 long free;
4620 char buf[REDIS_SORTKEY_MAX+1];
4621 } keyname;
4622
4623 /* If the pattern is "#" return the substitution object itself in order
4624 * to implement the "SORT ... GET #" feature. */
4625 spat = pattern->ptr;
4626 if (spat[0] == '#' && spat[1] == '\0') {
4627 return subst;
4628 }
4629
4630 /* The substitution object may be specially encoded. If so we create
4631 * a decoded object on the fly. */
4632 if (subst->encoding == REDIS_ENCODING_RAW)
4633 /* If we don't need to get a decoded object increment the refcount
4634 * so that the final decrRefCount() call will restore the original
4635 * count */
4636 incrRefCount(subst);
4637 else {
4638 subst = getDecodedObject(subst);
4639 }
4640
4641 ssub = subst->ptr;
4642 if (sdslen(spat)+sdslen(ssub)-1 > REDIS_SORTKEY_MAX) return NULL;
4643 p = strchr(spat,'*');
4644 if (!p) {
4645 decrRefCount(subst);
4646 return NULL;
4647 }
4648
4649 prefixlen = p-spat;
4650 sublen = sdslen(ssub);
4651 postfixlen = sdslen(spat)-(prefixlen+1);
4652 memcpy(keyname.buf,spat,prefixlen);
4653 memcpy(keyname.buf+prefixlen,ssub,sublen);
4654 memcpy(keyname.buf+prefixlen+sublen,p+1,postfixlen);
4655 keyname.buf[prefixlen+sublen+postfixlen] = '\0';
4656 keyname.len = prefixlen+sublen+postfixlen;
4657
4658 keyobj.refcount = 1;
4659 keyobj.type = REDIS_STRING;
4660 keyobj.ptr = ((char*)&keyname)+(sizeof(long)*2);
4661
4662 decrRefCount(subst);
4663
4664 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
4665 return lookupKeyRead(db,&keyobj);
4666 }
4667
4668 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
4669 * the additional parameter is not standard but a BSD-specific we have to
4670 * pass sorting parameters via the global 'server' structure */
4671 static int sortCompare(const void *s1, const void *s2) {
4672 const redisSortObject *so1 = s1, *so2 = s2;
4673 int cmp;
4674
4675 if (!server.sort_alpha) {
4676 /* Numeric sorting. Here it's trivial as we precomputed scores */
4677 if (so1->u.score > so2->u.score) {
4678 cmp = 1;
4679 } else if (so1->u.score < so2->u.score) {
4680 cmp = -1;
4681 } else {
4682 cmp = 0;
4683 }
4684 } else {
4685 /* Alphanumeric sorting */
4686 if (server.sort_bypattern) {
4687 if (!so1->u.cmpobj || !so2->u.cmpobj) {
4688 /* At least one compare object is NULL */
4689 if (so1->u.cmpobj == so2->u.cmpobj)
4690 cmp = 0;
4691 else if (so1->u.cmpobj == NULL)
4692 cmp = -1;
4693 else
4694 cmp = 1;
4695 } else {
4696 /* We have both the objects, use strcoll */
4697 cmp = strcoll(so1->u.cmpobj->ptr,so2->u.cmpobj->ptr);
4698 }
4699 } else {
4700 /* Compare elements directly */
4701 if (so1->obj->encoding == REDIS_ENCODING_RAW &&
4702 so2->obj->encoding == REDIS_ENCODING_RAW) {
4703 cmp = strcoll(so1->obj->ptr,so2->obj->ptr);
4704 } else {
4705 robj *dec1, *dec2;
4706
4707 dec1 = so1->obj->encoding == REDIS_ENCODING_RAW ?
4708 so1->obj : getDecodedObject(so1->obj);
4709 dec2 = so2->obj->encoding == REDIS_ENCODING_RAW ?
4710 so2->obj : getDecodedObject(so2->obj);
4711 cmp = strcoll(dec1->ptr,dec2->ptr);
4712 if (dec1 != so1->obj) decrRefCount(dec1);
4713 if (dec2 != so2->obj) decrRefCount(dec2);
4714 }
4715 }
4716 }
4717 return server.sort_desc ? -cmp : cmp;
4718 }
4719
4720 /* The SORT command is the most complex command in Redis. Warning: this code
4721 * is optimized for speed and a bit less for readability */
4722 static void sortCommand(redisClient *c) {
4723 list *operations;
4724 int outputlen = 0;
4725 int desc = 0, alpha = 0;
4726 int limit_start = 0, limit_count = -1, start, end;
4727 int j, dontsort = 0, vectorlen;
4728 int getop = 0; /* GET operation counter */
4729 robj *sortval, *sortby = NULL, *storekey = NULL;
4730 redisSortObject *vector; /* Resulting vector to sort */
4731
4732 /* Lookup the key to sort. It must be of the right types */
4733 sortval = lookupKeyRead(c->db,c->argv[1]);
4734 if (sortval == NULL) {
4735 addReply(c,shared.nokeyerr);
4736 return;
4737 }
4738 if (sortval->type != REDIS_SET && sortval->type != REDIS_LIST) {
4739 addReply(c,shared.wrongtypeerr);
4740 return;
4741 }
4742
4743 /* Create a list of operations to perform for every sorted element.
4744 * Operations can be GET/DEL/INCR/DECR */
4745 operations = listCreate();
4746 listSetFreeMethod(operations,zfree);
4747 j = 2;
4748
4749 /* Now we need to protect sortval incrementing its count, in the future
4750 * SORT may have options able to overwrite/delete keys during the sorting
4751 * and the sorted key itself may get destroied */
4752 incrRefCount(sortval);
4753
4754 /* The SORT command has an SQL-alike syntax, parse it */
4755 while(j < c->argc) {
4756 int leftargs = c->argc-j-1;
4757 if (!strcasecmp(c->argv[j]->ptr,"asc")) {
4758 desc = 0;
4759 } else if (!strcasecmp(c->argv[j]->ptr,"desc")) {
4760 desc = 1;
4761 } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) {
4762 alpha = 1;
4763 } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
4764 limit_start = atoi(c->argv[j+1]->ptr);
4765 limit_count = atoi(c->argv[j+2]->ptr);
4766 j+=2;
4767 } else if (!strcasecmp(c->argv[j]->ptr,"store") && leftargs >= 1) {
4768 storekey = c->argv[j+1];
4769 j++;
4770 } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) {
4771 sortby = c->argv[j+1];
4772 /* If the BY pattern does not contain '*', i.e. it is constant,
4773 * we don't need to sort nor to lookup the weight keys. */
4774 if (strchr(c->argv[j+1]->ptr,'*') == NULL) dontsort = 1;
4775 j++;
4776 } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
4777 listAddNodeTail(operations,createSortOperation(
4778 REDIS_SORT_GET,c->argv[j+1]));
4779 getop++;
4780 j++;
4781 } else {
4782 decrRefCount(sortval);
4783 listRelease(operations);
4784 addReply(c,shared.syntaxerr);
4785 return;
4786 }
4787 j++;
4788 }
4789
4790 /* Load the sorting vector with all the objects to sort */
4791 vectorlen = (sortval->type == REDIS_LIST) ?
4792 listLength((list*)sortval->ptr) :
4793 dictSize((dict*)sortval->ptr);
4794 vector = zmalloc(sizeof(redisSortObject)*vectorlen);
4795 j = 0;
4796 if (sortval->type == REDIS_LIST) {
4797 list *list = sortval->ptr;
4798 listNode *ln;
4799
4800 listRewind(list);
4801 while((ln = listYield(list))) {
4802 robj *ele = ln->value;
4803 vector[j].obj = ele;
4804 vector[j].u.score = 0;
4805 vector[j].u.cmpobj = NULL;
4806 j++;
4807 }
4808 } else {
4809 dict *set = sortval->ptr;
4810 dictIterator *di;
4811 dictEntry *setele;
4812
4813 di = dictGetIterator(set);
4814 while((setele = dictNext(di)) != NULL) {
4815 vector[j].obj = dictGetEntryKey(setele);
4816 vector[j].u.score = 0;
4817 vector[j].u.cmpobj = NULL;
4818 j++;
4819 }
4820 dictReleaseIterator(di);
4821 }
4822 assert(j == vectorlen);
4823
4824 /* Now it's time to load the right scores in the sorting vector */
4825 if (dontsort == 0) {
4826 for (j = 0; j < vectorlen; j++) {
4827 if (sortby) {
4828 robj *byval;
4829
4830 byval = lookupKeyByPattern(c->db,sortby,vector[j].obj);
4831 if (!byval || byval->type != REDIS_STRING) continue;
4832 if (alpha) {
4833 if (byval->encoding == REDIS_ENCODING_RAW) {
4834 vector[j].u.cmpobj = byval;
4835 incrRefCount(byval);
4836 } else {
4837 vector[j].u.cmpobj = getDecodedObject(byval);
4838 }
4839 } else {
4840 if (byval->encoding == REDIS_ENCODING_RAW) {
4841 vector[j].u.score = strtod(byval->ptr,NULL);
4842 } else {
4843 if (byval->encoding == REDIS_ENCODING_INT) {
4844 vector[j].u.score = (long)byval->ptr;
4845 } else
4846 assert(1 != 1);
4847 }
4848 }
4849 } else {
4850 if (!alpha) {
4851 if (vector[j].obj->encoding == REDIS_ENCODING_RAW)
4852 vector[j].u.score = strtod(vector[j].obj->ptr,NULL);
4853 else {
4854 if (vector[j].obj->encoding == REDIS_ENCODING_INT)
4855 vector[j].u.score = (long) vector[j].obj->ptr;
4856 else
4857 assert(1 != 1);
4858 }
4859 }
4860 }
4861 }
4862 }
4863
4864 /* We are ready to sort the vector... perform a bit of sanity check
4865 * on the LIMIT option too. We'll use a partial version of quicksort. */
4866 start = (limit_start < 0) ? 0 : limit_start;
4867 end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1;
4868 if (start >= vectorlen) {
4869 start = vectorlen-1;
4870 end = vectorlen-2;
4871 }
4872 if (end >= vectorlen) end = vectorlen-1;
4873
4874 if (dontsort == 0) {
4875 server.sort_desc = desc;
4876 server.sort_alpha = alpha;
4877 server.sort_bypattern = sortby ? 1 : 0;
4878 if (sortby && (start != 0 || end != vectorlen-1))
4879 pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end);
4880 else
4881 qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare);
4882 }
4883
4884 /* Send command output to the output buffer, performing the specified
4885 * GET/DEL/INCR/DECR operations if any. */
4886 outputlen = getop ? getop*(end-start+1) : end-start+1;
4887 if (storekey == NULL) {
4888 /* STORE option not specified, sent the sorting result to client */
4889 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen));
4890 for (j = start; j <= end; j++) {
4891 listNode *ln;
4892 if (!getop) {
4893 addReplyBulkLen(c,vector[j].obj);
4894 addReply(c,vector[j].obj);
4895 addReply(c,shared.crlf);
4896 }
4897 listRewind(operations);
4898 while((ln = listYield(operations))) {
4899 redisSortOperation *sop = ln->value;
4900 robj *val = lookupKeyByPattern(c->db,sop->pattern,
4901 vector[j].obj);
4902
4903 if (sop->type == REDIS_SORT_GET) {
4904 if (!val || val->type != REDIS_STRING) {
4905 addReply(c,shared.nullbulk);
4906 } else {
4907 addReplyBulkLen(c,val);
4908 addReply(c,val);
4909 addReply(c,shared.crlf);
4910 }
4911 } else {
4912 assert(sop->type == REDIS_SORT_GET); /* always fails */
4913 }
4914 }
4915 }
4916 } else {
4917 robj *listObject = createListObject();
4918 list *listPtr = (list*) listObject->ptr;
4919
4920 /* STORE option specified, set the sorting result as a List object */
4921 for (j = start; j <= end; j++) {
4922 listNode *ln;
4923 if (!getop) {
4924 listAddNodeTail(listPtr,vector[j].obj);
4925 incrRefCount(vector[j].obj);
4926 }
4927 listRewind(operations);
4928 while((ln = listYield(operations))) {
4929 redisSortOperation *sop = ln->value;
4930 robj *val = lookupKeyByPattern(c->db,sop->pattern,
4931 vector[j].obj);
4932
4933 if (sop->type == REDIS_SORT_GET) {
4934 if (!val || val->type != REDIS_STRING) {
4935 listAddNodeTail(listPtr,createStringObject("",0));
4936 } else {
4937 listAddNodeTail(listPtr,val);
4938 incrRefCount(val);
4939 }
4940 } else {
4941 assert(sop->type == REDIS_SORT_GET); /* always fails */
4942 }
4943 }
4944 }
4945 if (dictReplace(c->db->dict,storekey,listObject)) {
4946 incrRefCount(storekey);
4947 }
4948 /* Note: we add 1 because the DB is dirty anyway since even if the
4949 * SORT result is empty a new key is set and maybe the old content
4950 * replaced. */
4951 server.dirty += 1+outputlen;
4952 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",outputlen));
4953 }
4954
4955 /* Cleanup */
4956 decrRefCount(sortval);
4957 listRelease(operations);
4958 for (j = 0; j < vectorlen; j++) {
4959 if (sortby && alpha && vector[j].u.cmpobj)
4960 decrRefCount(vector[j].u.cmpobj);
4961 }
4962 zfree(vector);
4963 }
4964
4965 static void infoCommand(redisClient *c) {
4966 sds info;
4967 time_t uptime = time(NULL)-server.stat_starttime;
4968 int j;
4969
4970 info = sdscatprintf(sdsempty(),
4971 "redis_version:%s\r\n"
4972 "arch_bits:%s\r\n"
4973 "uptime_in_seconds:%d\r\n"
4974 "uptime_in_days:%d\r\n"
4975 "connected_clients:%d\r\n"
4976 "connected_slaves:%d\r\n"
4977 "used_memory:%zu\r\n"
4978 "changes_since_last_save:%lld\r\n"
4979 "bgsave_in_progress:%d\r\n"
4980 "last_save_time:%d\r\n"
4981 "total_connections_received:%lld\r\n"
4982 "total_commands_processed:%lld\r\n"
4983 "role:%s\r\n"
4984 ,REDIS_VERSION,
4985 (sizeof(long) == 8) ? "64" : "32",
4986 uptime,
4987 uptime/(3600*24),
4988 listLength(server.clients)-listLength(server.slaves),
4989 listLength(server.slaves),
4990 server.usedmemory,
4991 server.dirty,
4992 server.bgsaveinprogress,
4993 server.lastsave,
4994 server.stat_numconnections,
4995 server.stat_numcommands,
4996 server.masterhost == NULL ? "master" : "slave"
4997 );
4998 if (server.masterhost) {
4999 info = sdscatprintf(info,
5000 "master_host:%s\r\n"
5001 "master_port:%d\r\n"
5002 "master_link_status:%s\r\n"
5003 "master_last_io_seconds_ago:%d\r\n"
5004 ,server.masterhost,
5005 server.masterport,
5006 (server.replstate == REDIS_REPL_CONNECTED) ?
5007 "up" : "down",
5008 server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1
5009 );
5010 }
5011 for (j = 0; j < server.dbnum; j++) {
5012 long long keys, vkeys;
5013
5014 keys = dictSize(server.db[j].dict);
5015 vkeys = dictSize(server.db[j].expires);
5016 if (keys || vkeys) {
5017 info = sdscatprintf(info, "db%d: keys=%lld,expires=%lld\r\n",
5018 j, keys, vkeys);
5019 }
5020 }
5021 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",sdslen(info)));
5022 addReplySds(c,info);
5023 addReply(c,shared.crlf);
5024 }
5025
5026 static void monitorCommand(redisClient *c) {
5027 /* ignore MONITOR if aleady slave or in monitor mode */
5028 if (c->flags & REDIS_SLAVE) return;
5029
5030 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
5031 c->slaveseldb = 0;
5032 listAddNodeTail(server.monitors,c);
5033 addReply(c,shared.ok);
5034 }
5035
5036 /* ================================= Expire ================================= */
5037 static int removeExpire(redisDb *db, robj *key) {
5038 if (dictDelete(db->expires,key) == DICT_OK) {
5039 return 1;
5040 } else {
5041 return 0;
5042 }
5043 }
5044
5045 static int setExpire(redisDb *db, robj *key, time_t when) {
5046 if (dictAdd(db->expires,key,(void*)when) == DICT_ERR) {
5047 return 0;
5048 } else {
5049 incrRefCount(key);
5050 return 1;
5051 }
5052 }
5053
5054 /* Return the expire time of the specified key, or -1 if no expire
5055 * is associated with this key (i.e. the key is non volatile) */
5056 static time_t getExpire(redisDb *db, robj *key) {
5057 dictEntry *de;
5058
5059 /* No expire? return ASAP */
5060 if (dictSize(db->expires) == 0 ||
5061 (de = dictFind(db->expires,key)) == NULL) return -1;
5062
5063 return (time_t) dictGetEntryVal(de);
5064 }
5065
5066 static int expireIfNeeded(redisDb *db, robj *key) {
5067 time_t when;
5068 dictEntry *de;
5069
5070 /* No expire? return ASAP */
5071 if (dictSize(db->expires) == 0 ||
5072 (de = dictFind(db->expires,key)) == NULL) return 0;
5073
5074 /* Lookup the expire */
5075 when = (time_t) dictGetEntryVal(de);
5076 if (time(NULL) <= when) return 0;
5077
5078 /* Delete the key */
5079 dictDelete(db->expires,key);
5080 return dictDelete(db->dict,key) == DICT_OK;
5081 }
5082
5083 static int deleteIfVolatile(redisDb *db, robj *key) {
5084 dictEntry *de;
5085
5086 /* No expire? return ASAP */
5087 if (dictSize(db->expires) == 0 ||
5088 (de = dictFind(db->expires,key)) == NULL) return 0;
5089
5090 /* Delete the key */
5091 server.dirty++;
5092 dictDelete(db->expires,key);
5093 return dictDelete(db->dict,key) == DICT_OK;
5094 }
5095
5096 static void expireGenericCommand(redisClient *c, robj *key, time_t seconds) {
5097 dictEntry *de;
5098
5099 de = dictFind(c->db->dict,key);
5100 if (de == NULL) {
5101 addReply(c,shared.czero);
5102 return;
5103 }
5104 if (seconds < 0) {
5105 if (deleteKey(c->db,key)) server.dirty++;
5106 addReply(c, shared.cone);
5107 return;
5108 } else {
5109 time_t when = time(NULL)+seconds;
5110 if (setExpire(c->db,key,when)) {
5111 addReply(c,shared.cone);
5112 server.dirty++;
5113 } else {
5114 addReply(c,shared.czero);
5115 }
5116 return;
5117 }
5118 }
5119
5120 static void expireCommand(redisClient *c) {
5121 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10));
5122 }
5123
5124 static void expireatCommand(redisClient *c) {
5125 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10)-time(NULL));
5126 }
5127
5128 static void ttlCommand(redisClient *c) {
5129 time_t expire;
5130 int ttl = -1;
5131
5132 expire = getExpire(c->db,c->argv[1]);
5133 if (expire != -1) {
5134 ttl = (int) (expire-time(NULL));
5135 if (ttl < 0) ttl = -1;
5136 }
5137 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
5138 }
5139
5140 /* =============================== Replication ============================= */
5141
5142 static int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
5143 ssize_t nwritten, ret = size;
5144 time_t start = time(NULL);
5145
5146 timeout++;
5147 while(size) {
5148 if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
5149 nwritten = write(fd,ptr,size);
5150 if (nwritten == -1) return -1;
5151 ptr += nwritten;
5152 size -= nwritten;
5153 }
5154 if ((time(NULL)-start) > timeout) {
5155 errno = ETIMEDOUT;
5156 return -1;
5157 }
5158 }
5159 return ret;
5160 }
5161
5162 static int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
5163 ssize_t nread, totread = 0;
5164 time_t start = time(NULL);
5165
5166 timeout++;
5167 while(size) {
5168 if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
5169 nread = read(fd,ptr,size);
5170 if (nread == -1) return -1;
5171 ptr += nread;
5172 size -= nread;
5173 totread += nread;
5174 }
5175 if ((time(NULL)-start) > timeout) {
5176 errno = ETIMEDOUT;
5177 return -1;
5178 }
5179 }
5180 return totread;
5181 }
5182
5183 static int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
5184 ssize_t nread = 0;
5185
5186 size--;
5187 while(size) {
5188 char c;
5189
5190 if (syncRead(fd,&c,1,timeout) == -1) return -1;
5191 if (c == '\n') {
5192 *ptr = '\0';
5193 if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
5194 return nread;
5195 } else {
5196 *ptr++ = c;
5197 *ptr = '\0';
5198 nread++;
5199 }
5200 }
5201 return nread;
5202 }
5203
5204 static void syncCommand(redisClient *c) {
5205 /* ignore SYNC if aleady slave or in monitor mode */
5206 if (c->flags & REDIS_SLAVE) return;
5207
5208 /* SYNC can't be issued when the server has pending data to send to
5209 * the client about already issued commands. We need a fresh reply
5210 * buffer registering the differences between the BGSAVE and the current
5211 * dataset, so that we can copy to other slaves if needed. */
5212 if (listLength(c->reply) != 0) {
5213 addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
5214 return;
5215 }
5216
5217 redisLog(REDIS_NOTICE,"Slave ask for synchronization");
5218 /* Here we need to check if there is a background saving operation
5219 * in progress, or if it is required to start one */
5220 if (server.bgsaveinprogress) {
5221 /* Ok a background save is in progress. Let's check if it is a good
5222 * one for replication, i.e. if there is another slave that is
5223 * registering differences since the server forked to save */
5224 redisClient *slave;
5225 listNode *ln;
5226
5227 listRewind(server.slaves);
5228 while((ln = listYield(server.slaves))) {
5229 slave = ln->value;
5230 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
5231 }
5232 if (ln) {
5233 /* Perfect, the server is already registering differences for
5234 * another slave. Set the right state, and copy the buffer. */
5235 listRelease(c->reply);
5236 c->reply = listDup(slave->reply);
5237 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5238 redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
5239 } else {
5240 /* No way, we need to wait for the next BGSAVE in order to
5241 * register differences */
5242 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
5243 redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
5244 }
5245 } else {
5246 /* Ok we don't have a BGSAVE in progress, let's start one */
5247 redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
5248 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
5249 redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
5250 addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
5251 return;
5252 }
5253 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5254 }
5255 c->repldbfd = -1;
5256 c->flags |= REDIS_SLAVE;
5257 c->slaveseldb = 0;
5258 listAddNodeTail(server.slaves,c);
5259 return;
5260 }
5261
5262 static void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
5263 redisClient *slave = privdata;
5264 REDIS_NOTUSED(el);
5265 REDIS_NOTUSED(mask);
5266 char buf[REDIS_IOBUF_LEN];
5267 ssize_t nwritten, buflen;
5268
5269 if (slave->repldboff == 0) {
5270 /* Write the bulk write count before to transfer the DB. In theory here
5271 * we don't know how much room there is in the output buffer of the
5272 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
5273 * operations) will never be smaller than the few bytes we need. */
5274 sds bulkcount;
5275
5276 bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
5277 slave->repldbsize);
5278 if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
5279 {
5280 sdsfree(bulkcount);
5281 freeClient(slave);
5282 return;
5283 }
5284 sdsfree(bulkcount);
5285 }
5286 lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
5287 buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
5288 if (buflen <= 0) {
5289 redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
5290 (buflen == 0) ? "premature EOF" : strerror(errno));
5291 freeClient(slave);
5292 return;
5293 }
5294 if ((nwritten = write(fd,buf,buflen)) == -1) {
5295 redisLog(REDIS_DEBUG,"Write error sending DB to slave: %s",
5296 strerror(errno));
5297 freeClient(slave);
5298 return;
5299 }
5300 slave->repldboff += nwritten;
5301 if (slave->repldboff == slave->repldbsize) {
5302 close(slave->repldbfd);
5303 slave->repldbfd = -1;
5304 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
5305 slave->replstate = REDIS_REPL_ONLINE;
5306 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
5307 sendReplyToClient, slave) == AE_ERR) {
5308 freeClient(slave);
5309 return;
5310 }
5311 addReplySds(slave,sdsempty());
5312 redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
5313 }
5314 }
5315
5316 /* This function is called at the end of every backgrond saving.
5317 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
5318 * otherwise REDIS_ERR is passed to the function.
5319 *
5320 * The goal of this function is to handle slaves waiting for a successful
5321 * background saving in order to perform non-blocking synchronization. */
5322 static void updateSlavesWaitingBgsave(int bgsaveerr) {
5323 listNode *ln;
5324 int startbgsave = 0;
5325
5326 listRewind(server.slaves);
5327 while((ln = listYield(server.slaves))) {
5328 redisClient *slave = ln->value;
5329
5330 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
5331 startbgsave = 1;
5332 slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5333 } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
5334 struct redis_stat buf;
5335
5336 if (bgsaveerr != REDIS_OK) {
5337 freeClient(slave);
5338 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
5339 continue;
5340 }
5341 if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
5342 redis_fstat(slave->repldbfd,&buf) == -1) {
5343 freeClient(slave);
5344 redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
5345 continue;
5346 }
5347 slave->repldboff = 0;
5348 slave->repldbsize = buf.st_size;
5349 slave->replstate = REDIS_REPL_SEND_BULK;
5350 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
5351 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
5352 freeClient(slave);
5353 continue;
5354 }
5355 }
5356 }
5357 if (startbgsave) {
5358 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
5359 listRewind(server.slaves);
5360 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
5361 while((ln = listYield(server.slaves))) {
5362 redisClient *slave = ln->value;
5363
5364 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
5365 freeClient(slave);
5366 }
5367 }
5368 }
5369 }
5370
5371 static int syncWithMaster(void) {
5372 char buf[1024], tmpfile[256], authcmd[1024];
5373 int dumpsize;
5374 int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
5375 int dfd;
5376
5377 if (fd == -1) {
5378 redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
5379 strerror(errno));
5380 return REDIS_ERR;
5381 }
5382
5383 /* AUTH with the master if required. */
5384 if(server.masterauth) {
5385 snprintf(authcmd, 1024, "AUTH %s\r\n", server.masterauth);
5386 if (syncWrite(fd, authcmd, strlen(server.masterauth)+7, 5) == -1) {
5387 close(fd);
5388 redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",
5389 strerror(errno));
5390 return REDIS_ERR;
5391 }
5392 /* Read the AUTH result. */
5393 if (syncReadLine(fd,buf,1024,3600) == -1) {
5394 close(fd);
5395 redisLog(REDIS_WARNING,"I/O error reading auth result from MASTER: %s",
5396 strerror(errno));
5397 return REDIS_ERR;
5398 }
5399 if (buf[0] != '+') {
5400 close(fd);
5401 redisLog(REDIS_WARNING,"Cannot AUTH to MASTER, is the masterauth password correct?");
5402 return REDIS_ERR;
5403 }
5404 }
5405
5406 /* Issue the SYNC command */
5407 if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
5408 close(fd);
5409 redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
5410 strerror(errno));
5411 return REDIS_ERR;
5412 }
5413 /* Read the bulk write count */
5414 if (syncReadLine(fd,buf,1024,3600) == -1) {
5415 close(fd);
5416 redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
5417 strerror(errno));
5418 return REDIS_ERR;
5419 }
5420 if (buf[0] != '$') {
5421 close(fd);
5422 redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
5423 return REDIS_ERR;
5424 }
5425 dumpsize = atoi(buf+1);
5426 redisLog(REDIS_NOTICE,"Receiving %d bytes data dump from MASTER",dumpsize);
5427 /* Read the bulk write data on a temp file */
5428 snprintf(tmpfile,256,"temp-%d.%ld.rdb",(int)time(NULL),(long int)random());
5429 dfd = open(tmpfile,O_CREAT|O_WRONLY,0644);
5430 if (dfd == -1) {
5431 close(fd);
5432 redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
5433 return REDIS_ERR;
5434 }
5435 while(dumpsize) {
5436 int nread, nwritten;
5437
5438 nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
5439 if (nread == -1) {
5440 redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
5441 strerror(errno));
5442 close(fd);
5443 close(dfd);
5444 return REDIS_ERR;
5445 }
5446 nwritten = write(dfd,buf,nread);
5447 if (nwritten == -1) {
5448 redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
5449 close(fd);
5450 close(dfd);
5451 return REDIS_ERR;
5452 }
5453 dumpsize -= nread;
5454 }
5455 close(dfd);
5456 if (rename(tmpfile,server.dbfilename) == -1) {
5457 redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
5458 unlink(tmpfile);
5459 close(fd);
5460 return REDIS_ERR;
5461 }
5462 emptyDb();
5463 if (rdbLoad(server.dbfilename) != REDIS_OK) {
5464 redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
5465 close(fd);
5466 return REDIS_ERR;
5467 }
5468 server.master = createClient(fd);
5469 server.master->flags |= REDIS_MASTER;
5470 server.replstate = REDIS_REPL_CONNECTED;
5471 return REDIS_OK;
5472 }
5473
5474 static void slaveofCommand(redisClient *c) {
5475 if (!strcasecmp(c->argv[1]->ptr,"no") &&
5476 !strcasecmp(c->argv[2]->ptr,"one")) {
5477 if (server.masterhost) {
5478 sdsfree(server.masterhost);
5479 server.masterhost = NULL;
5480 if (server.master) freeClient(server.master);
5481 server.replstate = REDIS_REPL_NONE;
5482 redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
5483 }
5484 } else {
5485 sdsfree(server.masterhost);
5486 server.masterhost = sdsdup(c->argv[1]->ptr);
5487 server.masterport = atoi(c->argv[2]->ptr);
5488 if (server.master) freeClient(server.master);
5489 server.replstate = REDIS_REPL_CONNECT;
5490 redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
5491 server.masterhost, server.masterport);
5492 }
5493 addReply(c,shared.ok);
5494 }
5495
5496 /* ============================ Maxmemory directive ======================== */
5497
5498 /* This function gets called when 'maxmemory' is set on the config file to limit
5499 * the max memory used by the server, and we are out of memory.
5500 * This function will try to, in order:
5501 *
5502 * - Free objects from the free list
5503 * - Try to remove keys with an EXPIRE set
5504 *
5505 * It is not possible to free enough memory to reach used-memory < maxmemory
5506 * the server will start refusing commands that will enlarge even more the
5507 * memory usage.
5508 */
5509 static void freeMemoryIfNeeded(void) {
5510 while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) {
5511 if (listLength(server.objfreelist)) {
5512 robj *o;
5513
5514 listNode *head = listFirst(server.objfreelist);
5515 o = listNodeValue(head);
5516 listDelNode(server.objfreelist,head);
5517 zfree(o);
5518 } else {
5519 int j, k, freed = 0;
5520
5521 for (j = 0; j < server.dbnum; j++) {
5522 int minttl = -1;
5523 robj *minkey = NULL;
5524 struct dictEntry *de;
5525
5526 if (dictSize(server.db[j].expires)) {
5527 freed = 1;
5528 /* From a sample of three keys drop the one nearest to
5529 * the natural expire */
5530 for (k = 0; k < 3; k++) {
5531 time_t t;
5532
5533 de = dictGetRandomKey(server.db[j].expires);
5534 t = (time_t) dictGetEntryVal(de);
5535 if (minttl == -1 || t < minttl) {
5536 minkey = dictGetEntryKey(de);
5537 minttl = t;
5538 }
5539 }
5540 deleteKey(server.db+j,minkey);
5541 }
5542 }
5543 if (!freed) return; /* nothing to free... */
5544 }
5545 }
5546 }
5547
5548 /* ============================== Append Only file ========================== */
5549
5550 static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
5551 sds buf = sdsempty();
5552 int j;
5553 ssize_t nwritten;
5554 time_t now;
5555 robj *tmpargv[3];
5556
5557 /* The DB this command was targetting is not the same as the last command
5558 * we appendend. To issue a SELECT command is needed. */
5559 if (dictid != server.appendseldb) {
5560 char seldb[64];
5561
5562 snprintf(seldb,sizeof(seldb),"%d",dictid);
5563 buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
5564 strlen(seldb),seldb);
5565 server.appendseldb = dictid;
5566 }
5567
5568 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
5569 * EXPIREs into EXPIREATs calls */
5570 if (cmd->proc == expireCommand) {
5571 long when;
5572
5573 tmpargv[0] = createStringObject("EXPIREAT",8);
5574 tmpargv[1] = argv[1];
5575 incrRefCount(argv[1]);
5576 when = time(NULL)+strtol(argv[2]->ptr,NULL,10);
5577 tmpargv[2] = createObject(REDIS_STRING,
5578 sdscatprintf(sdsempty(),"%ld",when));
5579 argv = tmpargv;
5580 }
5581
5582 /* Append the actual command */
5583 buf = sdscatprintf(buf,"*%d\r\n",argc);
5584 for (j = 0; j < argc; j++) {
5585 robj *o = argv[j];
5586
5587 if (o->encoding != REDIS_ENCODING_RAW)
5588 o = getDecodedObject(o);
5589 buf = sdscatprintf(buf,"$%d\r\n",sdslen(o->ptr));
5590 buf = sdscatlen(buf,o->ptr,sdslen(o->ptr));
5591 buf = sdscatlen(buf,"\r\n",2);
5592 if (o != argv[j])
5593 decrRefCount(o);
5594 }
5595
5596 /* Free the objects from the modified argv for EXPIREAT */
5597 if (cmd->proc == expireCommand) {
5598 for (j = 0; j < 3; j++)
5599 decrRefCount(argv[j]);
5600 }
5601
5602 /* We want to perform a single write. This should be guaranteed atomic
5603 * at least if the filesystem we are writing is a real physical one.
5604 * While this will save us against the server being killed I don't think
5605 * there is much to do about the whole server stopping for power problems
5606 * or alike */
5607 nwritten = write(server.appendfd,buf,sdslen(buf));
5608 if (nwritten != (signed)sdslen(buf)) {
5609 /* Ooops, we are in troubles. The best thing to do for now is
5610 * to simply exit instead to give the illusion that everything is
5611 * working as expected. */
5612 if (nwritten == -1) {
5613 redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno));
5614 } else {
5615 redisLog(REDIS_WARNING,"Exiting on short write while writing to the append-only file: %s",strerror(errno));
5616 }
5617 exit(1);
5618 }
5619 now = time(NULL);
5620 if (server.appendfsync == APPENDFSYNC_ALWAYS ||
5621 (server.appendfsync == APPENDFSYNC_EVERYSEC &&
5622 now-server.lastfsync > 1))
5623 {
5624 fsync(server.appendfd); /* Let's try to get this data on the disk */
5625 server.lastfsync = now;
5626 }
5627 }
5628
5629 /* In Redis commands are always executed in the context of a client, so in
5630 * order to load the append only file we need to create a fake client. */
5631 static struct redisClient *createFakeClient(void) {
5632 struct redisClient *c = zmalloc(sizeof(*c));
5633
5634 selectDb(c,0);
5635 c->fd = -1;
5636 c->querybuf = sdsempty();
5637 c->argc = 0;
5638 c->argv = NULL;
5639 c->flags = 0;
5640 /* We set the fake client as a slave waiting for the synchronization
5641 * so that Redis will not try to send replies to this client. */
5642 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
5643 c->reply = listCreate();
5644 listSetFreeMethod(c->reply,decrRefCount);
5645 listSetDupMethod(c->reply,dupClientReplyValue);
5646 return c;
5647 }
5648
5649 static void freeFakeClient(struct redisClient *c) {
5650 sdsfree(c->querybuf);
5651 listRelease(c->reply);
5652 zfree(c);
5653 }
5654
5655 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
5656 * error (the append only file is zero-length) REDIS_ERR is returned. On
5657 * fatal error an error message is logged and the program exists. */
5658 int loadAppendOnlyFile(char *filename) {
5659 struct redisClient *fakeClient;
5660 FILE *fp = fopen(filename,"r");
5661 struct redis_stat sb;
5662
5663 if (redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0)
5664 return REDIS_ERR;
5665
5666 if (fp == NULL) {
5667 redisLog(REDIS_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
5668 exit(1);
5669 }
5670
5671 fakeClient = createFakeClient();
5672 while(1) {
5673 int argc, j;
5674 unsigned long len;
5675 robj **argv;
5676 char buf[128];
5677 sds argsds;
5678 struct redisCommand *cmd;
5679
5680 if (fgets(buf,sizeof(buf),fp) == NULL) {
5681 if (feof(fp))
5682 break;
5683 else
5684 goto readerr;
5685 }
5686 if (buf[0] != '*') goto fmterr;
5687 argc = atoi(buf+1);
5688 argv = zmalloc(sizeof(robj*)*argc);
5689 for (j = 0; j < argc; j++) {
5690 if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr;
5691 if (buf[0] != '$') goto fmterr;
5692 len = strtol(buf+1,NULL,10);
5693 argsds = sdsnewlen(NULL,len);
5694 if (fread(argsds,len,1,fp) == 0) goto fmterr;
5695 argv[j] = createObject(REDIS_STRING,argsds);
5696 if (fread(buf,2,1,fp) == 0) goto fmterr; /* discard CRLF */
5697 }
5698
5699 /* Command lookup */
5700 cmd = lookupCommand(argv[0]->ptr);
5701 if (!cmd) {
5702 redisLog(REDIS_WARNING,"Unknown command '%s' reading the append only file", argv[0]->ptr);
5703 exit(1);
5704 }
5705 /* Try object sharing and encoding */
5706 if (server.shareobjects) {
5707 int j;
5708 for(j = 1; j < argc; j++)
5709 argv[j] = tryObjectSharing(argv[j]);
5710 }
5711 if (cmd->flags & REDIS_CMD_BULK)
5712 tryObjectEncoding(argv[argc-1]);
5713 /* Run the command in the context of a fake client */
5714 fakeClient->argc = argc;
5715 fakeClient->argv = argv;
5716 cmd->proc(fakeClient);
5717 /* Discard the reply objects list from the fake client */
5718 while(listLength(fakeClient->reply))
5719 listDelNode(fakeClient->reply,listFirst(fakeClient->reply));
5720 /* Clean up, ready for the next command */
5721 for (j = 0; j < argc; j++) decrRefCount(argv[j]);
5722 zfree(argv);
5723 }
5724 fclose(fp);
5725 freeFakeClient(fakeClient);
5726 return REDIS_OK;
5727
5728 readerr:
5729 if (feof(fp)) {
5730 redisLog(REDIS_WARNING,"Unexpected end of file reading the append only file");
5731 } else {
5732 redisLog(REDIS_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
5733 }
5734 exit(1);
5735 fmterr:
5736 redisLog(REDIS_WARNING,"Bad file format reading the append only file");
5737 exit(1);
5738 }
5739
5740 /* ================================= Debugging ============================== */
5741
5742 static void debugCommand(redisClient *c) {
5743 if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
5744 *((char*)-1) = 'x';
5745 } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
5746 dictEntry *de = dictFind(c->db->dict,c->argv[2]);
5747 robj *key, *val;
5748
5749 if (!de) {
5750 addReply(c,shared.nokeyerr);
5751 return;
5752 }
5753 key = dictGetEntryKey(de);
5754 val = dictGetEntryVal(de);
5755 addReplySds(c,sdscatprintf(sdsempty(),
5756 "+Key at:%p refcount:%d, value at:%p refcount:%d encoding:%d\r\n",
5757 key, key->refcount, val, val->refcount, val->encoding));
5758 } else {
5759 addReplySds(c,sdsnew(
5760 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>]\r\n"));
5761 }
5762 }
5763
5764 /* =================================== Main! ================================ */
5765
5766 #ifdef __linux__
5767 int linuxOvercommitMemoryValue(void) {
5768 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
5769 char buf[64];
5770
5771 if (!fp) return -1;
5772 if (fgets(buf,64,fp) == NULL) {
5773 fclose(fp);
5774 return -1;
5775 }
5776 fclose(fp);
5777
5778 return atoi(buf);
5779 }
5780
5781 void linuxOvercommitMemoryWarning(void) {
5782 if (linuxOvercommitMemoryValue() == 0) {
5783 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
5784 }
5785 }
5786 #endif /* __linux__ */
5787
5788 static void daemonize(void) {
5789 int fd;
5790 FILE *fp;
5791
5792 if (fork() != 0) exit(0); /* parent exits */
5793 setsid(); /* create a new session */
5794
5795 /* Every output goes to /dev/null. If Redis is daemonized but
5796 * the 'logfile' is set to 'stdout' in the configuration file
5797 * it will not log at all. */
5798 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
5799 dup2(fd, STDIN_FILENO);
5800 dup2(fd, STDOUT_FILENO);
5801 dup2(fd, STDERR_FILENO);
5802 if (fd > STDERR_FILENO) close(fd);
5803 }
5804 /* Try to write the pid file */
5805 fp = fopen(server.pidfile,"w");
5806 if (fp) {
5807 fprintf(fp,"%d\n",getpid());
5808 fclose(fp);
5809 }
5810 }
5811
5812 int main(int argc, char **argv) {
5813 initServerConfig();
5814 if (argc == 2) {
5815 resetServerSaveParams();
5816 loadServerConfig(argv[1]);
5817 } else if (argc > 2) {
5818 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf]\n");
5819 exit(1);
5820 } else {
5821 redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
5822 }
5823 initServer();
5824 if (server.daemonize) daemonize();
5825 redisLog(REDIS_NOTICE,"Server started, Redis version " REDIS_VERSION);
5826 #ifdef __linux__
5827 linuxOvercommitMemoryWarning();
5828 #endif
5829 if (server.appendonly) {
5830 if (loadAppendOnlyFile(server.appendfilename) == REDIS_OK)
5831 redisLog(REDIS_NOTICE,"DB loaded from append only file");
5832 } else {
5833 if (rdbLoad(server.dbfilename) == REDIS_OK)
5834 redisLog(REDIS_NOTICE,"DB loaded from disk");
5835 }
5836 if (aeCreateFileEvent(server.el, server.fd, AE_READABLE,
5837 acceptHandler, NULL) == AE_ERR) oom("creating file event");
5838 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
5839 aeMain(server.el);
5840 aeDeleteEventLoop(server.el);
5841 return 0;
5842 }
5843
5844 /* ============================= Backtrace support ========================= */
5845
5846 #ifdef HAVE_BACKTRACE
5847 static char *findFuncName(void *pointer, unsigned long *offset);
5848
5849 static void *getMcontextEip(ucontext_t *uc) {
5850 #if defined(__FreeBSD__)
5851 return (void*) uc->uc_mcontext.mc_eip;
5852 #elif defined(__dietlibc__)
5853 return (void*) uc->uc_mcontext.eip;
5854 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
5855 return (void*) uc->uc_mcontext->__ss.__eip;
5856 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
5857 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
5858 return (void*) uc->uc_mcontext->__ss.__rip;
5859 #else
5860 return (void*) uc->uc_mcontext->__ss.__eip;
5861 #endif
5862 #elif defined(__i386__) || defined(__X86_64__) /* Linux x86 */
5863 return (void*) uc->uc_mcontext.gregs[REG_EIP];
5864 #elif defined(__ia64__) /* Linux IA64 */
5865 return (void*) uc->uc_mcontext.sc_ip;
5866 #else
5867 return NULL;
5868 #endif
5869 }
5870
5871 static void segvHandler(int sig, siginfo_t *info, void *secret) {
5872 void *trace[100];
5873 char **messages = NULL;
5874 int i, trace_size = 0;
5875 unsigned long offset=0;
5876 time_t uptime = time(NULL)-server.stat_starttime;
5877 ucontext_t *uc = (ucontext_t*) secret;
5878 REDIS_NOTUSED(info);
5879
5880 redisLog(REDIS_WARNING,
5881 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION, sig);
5882 redisLog(REDIS_WARNING, "%s", sdscatprintf(sdsempty(),
5883 "redis_version:%s; "
5884 "uptime_in_seconds:%d; "
5885 "connected_clients:%d; "
5886 "connected_slaves:%d; "
5887 "used_memory:%zu; "
5888 "changes_since_last_save:%lld; "
5889 "bgsave_in_progress:%d; "
5890 "last_save_time:%d; "
5891 "total_connections_received:%lld; "
5892 "total_commands_processed:%lld; "
5893 "role:%s;"
5894 ,REDIS_VERSION,
5895 uptime,
5896 listLength(server.clients)-listLength(server.slaves),
5897 listLength(server.slaves),
5898 server.usedmemory,
5899 server.dirty,
5900 server.bgsaveinprogress,
5901 server.lastsave,
5902 server.stat_numconnections,
5903 server.stat_numcommands,
5904 server.masterhost == NULL ? "master" : "slave"
5905 ));
5906
5907 trace_size = backtrace(trace, 100);
5908 /* overwrite sigaction with caller's address */
5909 if (getMcontextEip(uc) != NULL) {
5910 trace[1] = getMcontextEip(uc);
5911 }
5912 messages = backtrace_symbols(trace, trace_size);
5913
5914 for (i=1; i<trace_size; ++i) {
5915 char *fn = findFuncName(trace[i], &offset), *p;
5916
5917 p = strchr(messages[i],'+');
5918 if (!fn || (p && ((unsigned long)strtol(p+1,NULL,10)) < offset)) {
5919 redisLog(REDIS_WARNING,"%s", messages[i]);
5920 } else {
5921 redisLog(REDIS_WARNING,"%d redis-server %p %s + %d", i, trace[i], fn, (unsigned int)offset);
5922 }
5923 }
5924 free(messages);
5925 exit(0);
5926 }
5927
5928 static void setupSigSegvAction(void) {
5929 struct sigaction act;
5930
5931 sigemptyset (&act.sa_mask);
5932 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
5933 * is used. Otherwise, sa_handler is used */
5934 act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND | SA_SIGINFO;
5935 act.sa_sigaction = segvHandler;
5936 sigaction (SIGSEGV, &act, NULL);
5937 sigaction (SIGBUS, &act, NULL);
5938 sigaction (SIGFPE, &act, NULL);
5939 sigaction (SIGILL, &act, NULL);
5940 sigaction (SIGBUS, &act, NULL);
5941 return;
5942 }
5943
5944 #include "staticsymbols.h"
5945 /* This function try to convert a pointer into a function name. It's used in
5946 * oreder to provide a backtrace under segmentation fault that's able to
5947 * display functions declared as static (otherwise the backtrace is useless). */
5948 static char *findFuncName(void *pointer, unsigned long *offset){
5949 int i, ret = -1;
5950 unsigned long off, minoff = 0;
5951
5952 /* Try to match against the Symbol with the smallest offset */
5953 for (i=0; symsTable[i].pointer; i++) {
5954 unsigned long lp = (unsigned long) pointer;
5955
5956 if (lp != (unsigned long)-1 && lp >= symsTable[i].pointer) {
5957 off=lp-symsTable[i].pointer;
5958 if (ret < 0 || off < minoff) {
5959 minoff=off;
5960 ret=i;
5961 }
5962 }
5963 }
5964 if (ret == -1) return NULL;
5965 *offset = minoff;
5966 return symsTable[ret].name;
5967 }
5968 #else /* HAVE_BACKTRACE */
5969 static void setupSigSegvAction(void) {
5970 }
5971 #endif /* HAVE_BACKTRACE */
5972
5973
5974
5975 /* The End */
5976
5977
5978