]> git.saurik.com Git - redis.git/blob - src/redis.c
Set fd to writable when poll(2) detects POLLERR or POLLHUP event.
[redis.git] / src / redis.c
1 /*
2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include "redis.h"
31 #include "slowlog.h"
32 #include "bio.h"
33
34 #include <time.h>
35 #include <signal.h>
36 #include <sys/wait.h>
37 #include <errno.h>
38 #include <assert.h>
39 #include <ctype.h>
40 #include <stdarg.h>
41 #include <arpa/inet.h>
42 #include <sys/stat.h>
43 #include <fcntl.h>
44 #include <sys/time.h>
45 #include <sys/resource.h>
46 #include <sys/uio.h>
47 #include <limits.h>
48 #include <float.h>
49 #include <math.h>
50 #include <sys/resource.h>
51 #include <sys/utsname.h>
52
53 /* Our shared "common" objects */
54
55 struct sharedObjectsStruct shared;
56
57 /* Global vars that are actually used as constants. The following double
58 * values are used for double on-disk serialization, and are initialized
59 * at runtime to avoid strange compiler optimizations. */
60
61 double R_Zero, R_PosInf, R_NegInf, R_Nan;
62
63 /*================================= Globals ================================= */
64
65 /* Global vars */
66 struct redisServer server; /* server global state */
67 struct redisCommand *commandTable;
68
69 /* Our command table.
70 *
71 * Every entry is composed of the following fields:
72 *
73 * name: a string representing the command name.
74 * function: pointer to the C function implementing the command.
75 * arity: number of arguments, it is possible to use -N to say >= N
76 * sflags: command flags as string. See below for a table of flags.
77 * flags: flags as bitmask. Computed by Redis using the 'sflags' field.
78 * get_keys_proc: an optional function to get key arguments from a command.
79 * This is only used when the following three fields are not
80 * enough to specify what arguments are keys.
81 * first_key_index: first argument that is a key
82 * last_key_index: last argument that is a key
83 * key_step: step to get all the keys from first to last argument. For instance
84 * in MSET the step is two since arguments are key,val,key,val,...
85 * microseconds: microseconds of total execution time for this command.
86 * calls: total number of calls of this command.
87 *
88 * The flags, microseconds and calls fields are computed by Redis and should
89 * always be set to zero.
90 *
91 * Command flags are expressed using strings where every character represents
92 * a flag. Later the populateCommandTable() function will take care of
93 * populating the real 'flags' field using this characters.
94 *
95 * This is the meaning of the flags:
96 *
97 * w: write command (may modify the key space).
98 * r: read command (will never modify the key space).
99 * m: may increase memory usage once called. Don't allow if out of memory.
100 * a: admin command, like SAVE or SHUTDOWN.
101 * p: Pub/Sub related command.
102 * f: force replication of this command, regarless of server.dirty.
103 * s: command not allowed in scripts.
104 * R: random command. Command is not deterministic, that is, the same command
105 * with the same arguments, with the same key space, may have different
106 * results. For instance SPOP and RANDOMKEY are two random commands.
107 * S: Sort command output array if called from script, so that the output
108 * is deterministic.
109 */
110 struct redisCommand redisCommandTable[] = {
111 {"get",getCommand,2,"r",0,NULL,1,1,1,0,0},
112 {"set",setCommand,3,"wm",0,noPreloadGetKeys,1,1,1,0,0},
113 {"setnx",setnxCommand,3,"wm",0,noPreloadGetKeys,1,1,1,0,0},
114 {"setex",setexCommand,4,"wm",0,noPreloadGetKeys,1,1,1,0,0},
115 {"psetex",psetexCommand,4,"wm",0,noPreloadGetKeys,1,1,1,0,0},
116 {"append",appendCommand,3,"wm",0,NULL,1,1,1,0,0},
117 {"strlen",strlenCommand,2,"r",0,NULL,1,1,1,0,0},
118 {"del",delCommand,-2,"w",0,noPreloadGetKeys,1,-1,1,0,0},
119 {"exists",existsCommand,2,"r",0,NULL,1,1,1,0,0},
120 {"setbit",setbitCommand,4,"wm",0,NULL,1,1,1,0,0},
121 {"getbit",getbitCommand,3,"r",0,NULL,1,1,1,0,0},
122 {"setrange",setrangeCommand,4,"wm",0,NULL,1,1,1,0,0},
123 {"getrange",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
124 {"substr",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
125 {"incr",incrCommand,2,"wm",0,NULL,1,1,1,0,0},
126 {"decr",decrCommand,2,"wm",0,NULL,1,1,1,0,0},
127 {"mget",mgetCommand,-2,"r",0,NULL,1,-1,1,0,0},
128 {"rpush",rpushCommand,-3,"wm",0,NULL,1,1,1,0,0},
129 {"lpush",lpushCommand,-3,"wm",0,NULL,1,1,1,0,0},
130 {"rpushx",rpushxCommand,3,"wm",0,NULL,1,1,1,0,0},
131 {"lpushx",lpushxCommand,3,"wm",0,NULL,1,1,1,0,0},
132 {"linsert",linsertCommand,5,"wm",0,NULL,1,1,1,0,0},
133 {"rpop",rpopCommand,2,"w",0,NULL,1,1,1,0,0},
134 {"lpop",lpopCommand,2,"w",0,NULL,1,1,1,0,0},
135 {"brpop",brpopCommand,-3,"ws",0,NULL,1,1,1,0,0},
136 {"brpoplpush",brpoplpushCommand,4,"wms",0,NULL,1,2,1,0,0},
137 {"blpop",blpopCommand,-3,"ws",0,NULL,1,-2,1,0,0},
138 {"llen",llenCommand,2,"r",0,NULL,1,1,1,0,0},
139 {"lindex",lindexCommand,3,"r",0,NULL,1,1,1,0,0},
140 {"lset",lsetCommand,4,"wm",0,NULL,1,1,1,0,0},
141 {"lrange",lrangeCommand,4,"r",0,NULL,1,1,1,0,0},
142 {"ltrim",ltrimCommand,4,"w",0,NULL,1,1,1,0,0},
143 {"lrem",lremCommand,4,"w",0,NULL,1,1,1,0,0},
144 {"rpoplpush",rpoplpushCommand,3,"wm",0,NULL,1,2,1,0,0},
145 {"sadd",saddCommand,-3,"wm",0,NULL,1,1,1,0,0},
146 {"srem",sremCommand,-3,"w",0,NULL,1,1,1,0,0},
147 {"smove",smoveCommand,4,"w",0,NULL,1,2,1,0,0},
148 {"sismember",sismemberCommand,3,"r",0,NULL,1,1,1,0,0},
149 {"scard",scardCommand,2,"r",0,NULL,1,1,1,0,0},
150 {"spop",spopCommand,2,"wRs",0,NULL,1,1,1,0,0},
151 {"srandmember",srandmemberCommand,2,"rR",0,NULL,1,1,1,0,0},
152 {"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
153 {"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
154 {"sunion",sunionCommand,-2,"rS",0,NULL,1,-1,1,0,0},
155 {"sunionstore",sunionstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
156 {"sdiff",sdiffCommand,-2,"rS",0,NULL,1,-1,1,0,0},
157 {"sdiffstore",sdiffstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
158 {"smembers",sinterCommand,2,"rS",0,NULL,1,1,1,0,0},
159 {"zadd",zaddCommand,-4,"wm",0,NULL,1,1,1,0,0},
160 {"zincrby",zincrbyCommand,4,"wm",0,NULL,1,1,1,0,0},
161 {"zrem",zremCommand,-3,"w",0,NULL,1,1,1,0,0},
162 {"zremrangebyscore",zremrangebyscoreCommand,4,"w",0,NULL,1,1,1,0,0},
163 {"zremrangebyrank",zremrangebyrankCommand,4,"w",0,NULL,1,1,1,0,0},
164 {"zunionstore",zunionstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0},
165 {"zinterstore",zinterstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0},
166 {"zrange",zrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
167 {"zrangebyscore",zrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0},
168 {"zrevrangebyscore",zrevrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0},
169 {"zcount",zcountCommand,4,"r",0,NULL,1,1,1,0,0},
170 {"zrevrange",zrevrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
171 {"zcard",zcardCommand,2,"r",0,NULL,1,1,1,0,0},
172 {"zscore",zscoreCommand,3,"r",0,NULL,1,1,1,0,0},
173 {"zrank",zrankCommand,3,"r",0,NULL,1,1,1,0,0},
174 {"zrevrank",zrevrankCommand,3,"r",0,NULL,1,1,1,0,0},
175 {"hset",hsetCommand,4,"wm",0,NULL,1,1,1,0,0},
176 {"hsetnx",hsetnxCommand,4,"wm",0,NULL,1,1,1,0,0},
177 {"hget",hgetCommand,3,"r",0,NULL,1,1,1,0,0},
178 {"hmset",hmsetCommand,-4,"wm",0,NULL,1,1,1,0,0},
179 {"hmget",hmgetCommand,-3,"r",0,NULL,1,1,1,0,0},
180 {"hincrby",hincrbyCommand,4,"wm",0,NULL,1,1,1,0,0},
181 {"hincrbyfloat",hincrbyfloatCommand,4,"wm",0,NULL,1,1,1,0,0},
182 {"hdel",hdelCommand,-3,"w",0,NULL,1,1,1,0,0},
183 {"hlen",hlenCommand,2,"r",0,NULL,1,1,1,0,0},
184 {"hkeys",hkeysCommand,2,"rS",0,NULL,1,1,1,0,0},
185 {"hvals",hvalsCommand,2,"rS",0,NULL,1,1,1,0,0},
186 {"hgetall",hgetallCommand,2,"r",0,NULL,1,1,1,0,0},
187 {"hexists",hexistsCommand,3,"r",0,NULL,1,1,1,0,0},
188 {"incrby",incrbyCommand,3,"wm",0,NULL,1,1,1,0,0},
189 {"decrby",decrbyCommand,3,"wm",0,NULL,1,1,1,0,0},
190 {"incrbyfloat",incrbyfloatCommand,3,"wm",0,NULL,1,1,1,0,0},
191 {"getset",getsetCommand,3,"wm",0,NULL,1,1,1,0,0},
192 {"mset",msetCommand,-3,"wm",0,NULL,1,-1,2,0,0},
193 {"msetnx",msetnxCommand,-3,"wm",0,NULL,1,-1,2,0,0},
194 {"randomkey",randomkeyCommand,1,"rR",0,NULL,0,0,0,0,0},
195 {"select",selectCommand,2,"r",0,NULL,0,0,0,0,0},
196 {"move",moveCommand,3,"w",0,NULL,1,1,1,0,0},
197 {"rename",renameCommand,3,"w",0,renameGetKeys,1,2,1,0,0},
198 {"renamenx",renamenxCommand,3,"w",0,renameGetKeys,1,2,1,0,0},
199 {"expire",expireCommand,3,"w",0,NULL,1,1,1,0,0},
200 {"expireat",expireatCommand,3,"w",0,NULL,1,1,1,0,0},
201 {"pexpire",pexpireCommand,3,"w",0,NULL,1,1,1,0,0},
202 {"pexpireat",pexpireatCommand,3,"w",0,NULL,1,1,1,0,0},
203 {"keys",keysCommand,2,"rS",0,NULL,0,0,0,0,0},
204 {"dbsize",dbsizeCommand,1,"r",0,NULL,0,0,0,0,0},
205 {"auth",authCommand,2,"rs",0,NULL,0,0,0,0,0},
206 {"ping",pingCommand,1,"r",0,NULL,0,0,0,0,0},
207 {"echo",echoCommand,2,"r",0,NULL,0,0,0,0,0},
208 {"save",saveCommand,1,"ars",0,NULL,0,0,0,0,0},
209 {"bgsave",bgsaveCommand,1,"ar",0,NULL,0,0,0,0,0},
210 {"bgrewriteaof",bgrewriteaofCommand,1,"ar",0,NULL,0,0,0,0,0},
211 {"shutdown",shutdownCommand,-1,"ar",0,NULL,0,0,0,0,0},
212 {"lastsave",lastsaveCommand,1,"r",0,NULL,0,0,0,0,0},
213 {"type",typeCommand,2,"r",0,NULL,1,1,1,0,0},
214 {"multi",multiCommand,1,"rs",0,NULL,0,0,0,0,0},
215 {"exec",execCommand,1,"s",0,NULL,0,0,0,0,0},
216 {"discard",discardCommand,1,"rs",0,NULL,0,0,0,0,0},
217 {"sync",syncCommand,1,"ars",0,NULL,0,0,0,0,0},
218 {"flushdb",flushdbCommand,1,"w",0,NULL,0,0,0,0,0},
219 {"flushall",flushallCommand,1,"w",0,NULL,0,0,0,0,0},
220 {"sort",sortCommand,-2,"wmS",0,NULL,1,1,1,0,0},
221 {"info",infoCommand,-1,"r",0,NULL,0,0,0,0,0},
222 {"monitor",monitorCommand,1,"ars",0,NULL,0,0,0,0,0},
223 {"ttl",ttlCommand,2,"r",0,NULL,1,1,1,0,0},
224 {"pttl",pttlCommand,2,"r",0,NULL,1,1,1,0,0},
225 {"persist",persistCommand,2,"w",0,NULL,1,1,1,0,0},
226 {"slaveof",slaveofCommand,3,"as",0,NULL,0,0,0,0,0},
227 {"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0},
228 {"config",configCommand,-2,"ar",0,NULL,0,0,0,0,0},
229 {"subscribe",subscribeCommand,-2,"rps",0,NULL,0,0,0,0,0},
230 {"unsubscribe",unsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0},
231 {"psubscribe",psubscribeCommand,-2,"rps",0,NULL,0,0,0,0,0},
232 {"punsubscribe",punsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0},
233 {"publish",publishCommand,3,"pf",0,NULL,0,0,0,0,0},
234 {"watch",watchCommand,-2,"rs",0,noPreloadGetKeys,1,-1,1,0,0},
235 {"unwatch",unwatchCommand,1,"rs",0,NULL,0,0,0,0,0},
236 {"restore",restoreCommand,4,"awm",0,NULL,1,1,1,0,0},
237 {"migrate",migrateCommand,6,"aw",0,NULL,0,0,0,0,0},
238 {"dump",dumpCommand,2,"ar",0,NULL,1,1,1,0,0},
239 {"object",objectCommand,-2,"r",0,NULL,2,2,2,0,0},
240 {"client",clientCommand,-2,"ar",0,NULL,0,0,0,0,0},
241 {"eval",evalCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0},
242 {"evalsha",evalShaCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0},
243 {"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0},
244 {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0},
245 {"time",timeCommand,1,"rR",0,NULL,0,0,0,0,0}
246 };
247
248 /*============================ Utility functions ============================ */
249
250 /* Low level logging. To use only for very big messages, otherwise
251 * redisLog() is to prefer. */
252 void redisLogRaw(int level, const char *msg) {
253 const int syslogLevelMap[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE, LOG_WARNING };
254 const char *c = ".-*#";
255 FILE *fp;
256 char buf[64];
257 int rawmode = (level & REDIS_LOG_RAW);
258
259 level &= 0xff; /* clear flags */
260 if (level < server.verbosity) return;
261
262 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
263 if (!fp) return;
264
265 if (rawmode) {
266 fprintf(fp,"%s",msg);
267 } else {
268 int off;
269 struct timeval tv;
270
271 gettimeofday(&tv,NULL);
272 off = strftime(buf,sizeof(buf),"%d %b %H:%M:%S.",localtime(&tv.tv_sec));
273 snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000);
274 fprintf(fp,"[%d] %s %c %s\n",(int)getpid(),buf,c[level],msg);
275 }
276 fflush(fp);
277
278 if (server.logfile) fclose(fp);
279
280 if (server.syslog_enabled) syslog(syslogLevelMap[level], "%s", msg);
281 }
282
283 /* Like redisLogRaw() but with printf-alike support. This is the funciton that
284 * is used across the code. The raw version is only used in order to dump
285 * the INFO output on crash. */
286 void redisLog(int level, const char *fmt, ...) {
287 va_list ap;
288 char msg[REDIS_MAX_LOGMSG_LEN];
289
290 if ((level&0xff) < server.verbosity) return;
291
292 va_start(ap, fmt);
293 vsnprintf(msg, sizeof(msg), fmt, ap);
294 va_end(ap);
295
296 redisLogRaw(level,msg);
297 }
298
299 /* Log a fixed message without printf-alike capabilities, in a way that is
300 * safe to call from a signal handler.
301 *
302 * We actually use this only for signals that are not fatal from the point
303 * of view of Redis. Signals that are going to kill the server anyway and
304 * where we need printf-alike features are served by redisLog(). */
305 void redisLogFromHandler(int level, const char *msg) {
306 int fd;
307 char buf[64];
308
309 if ((level&0xff) < server.verbosity ||
310 (server.logfile == NULL && server.daemonize)) return;
311 fd = server.logfile ?
312 open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644) :
313 STDOUT_FILENO;
314 if (fd == -1) return;
315 ll2string(buf,sizeof(buf),getpid());
316 if (write(fd,"[",1) == -1) goto err;
317 if (write(fd,buf,strlen(buf)) == -1) goto err;
318 if (write(fd," | signal handler] (",20) == -1) goto err;
319 ll2string(buf,sizeof(buf),time(NULL));
320 if (write(fd,buf,strlen(buf)) == -1) goto err;
321 if (write(fd,") ",2) == -1) goto err;
322 if (write(fd,msg,strlen(msg)) == -1) goto err;
323 if (write(fd,"\n",1) == -1) goto err;
324 err:
325 if (server.logfile) close(fd);
326 }
327
328 /* Redis generally does not try to recover from out of memory conditions
329 * when allocating objects or strings, it is not clear if it will be possible
330 * to report this condition to the client since the networking layer itself
331 * is based on heap allocation for send buffers, so we simply abort.
332 * At least the code will be simpler to read... */
333 void oom(const char *msg) {
334 redisLog(REDIS_WARNING, "%s: Out of memory\n",msg);
335 sleep(1);
336 abort();
337 }
338
339 /* Return the UNIX time in microseconds */
340 long long ustime(void) {
341 struct timeval tv;
342 long long ust;
343
344 gettimeofday(&tv, NULL);
345 ust = ((long long)tv.tv_sec)*1000000;
346 ust += tv.tv_usec;
347 return ust;
348 }
349
350 /* Return the UNIX time in milliseconds */
351 long long mstime(void) {
352 return ustime()/1000;
353 }
354
355 /* After an RDB dump or AOF rewrite we exit from children using _exit() instead of
356 * exit(), because the latter may interact with the same file objects used by
357 * the parent process. However if we are testing the coverage normal exit() is
358 * used in order to obtain the right coverage information. */
359 void exitFromChild(int retcode) {
360 #ifdef COVERAGE_TEST
361 exit(retcode);
362 #else
363 _exit(retcode);
364 #endif
365 }
366
367 /*====================== Hash table type implementation ==================== */
368
369 /* This is an hash table type that uses the SDS dynamic strings libary as
370 * keys and radis objects as values (objects can hold SDS strings,
371 * lists, sets). */
372
373 void dictVanillaFree(void *privdata, void *val)
374 {
375 DICT_NOTUSED(privdata);
376 zfree(val);
377 }
378
379 void dictListDestructor(void *privdata, void *val)
380 {
381 DICT_NOTUSED(privdata);
382 listRelease((list*)val);
383 }
384
385 int dictSdsKeyCompare(void *privdata, const void *key1,
386 const void *key2)
387 {
388 int l1,l2;
389 DICT_NOTUSED(privdata);
390
391 l1 = sdslen((sds)key1);
392 l2 = sdslen((sds)key2);
393 if (l1 != l2) return 0;
394 return memcmp(key1, key2, l1) == 0;
395 }
396
397 /* A case insensitive version used for the command lookup table. */
398 int dictSdsKeyCaseCompare(void *privdata, const void *key1,
399 const void *key2)
400 {
401 DICT_NOTUSED(privdata);
402
403 return strcasecmp(key1, key2) == 0;
404 }
405
406 void dictRedisObjectDestructor(void *privdata, void *val)
407 {
408 DICT_NOTUSED(privdata);
409
410 if (val == NULL) return; /* Values of swapped out keys as set to NULL */
411 decrRefCount(val);
412 }
413
414 void dictSdsDestructor(void *privdata, void *val)
415 {
416 DICT_NOTUSED(privdata);
417
418 sdsfree(val);
419 }
420
421 int dictObjKeyCompare(void *privdata, const void *key1,
422 const void *key2)
423 {
424 const robj *o1 = key1, *o2 = key2;
425 return dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
426 }
427
428 unsigned int dictObjHash(const void *key) {
429 const robj *o = key;
430 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
431 }
432
433 unsigned int dictSdsHash(const void *key) {
434 return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
435 }
436
437 unsigned int dictSdsCaseHash(const void *key) {
438 return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key));
439 }
440
441 int dictEncObjKeyCompare(void *privdata, const void *key1,
442 const void *key2)
443 {
444 robj *o1 = (robj*) key1, *o2 = (robj*) key2;
445 int cmp;
446
447 if (o1->encoding == REDIS_ENCODING_INT &&
448 o2->encoding == REDIS_ENCODING_INT)
449 return o1->ptr == o2->ptr;
450
451 o1 = getDecodedObject(o1);
452 o2 = getDecodedObject(o2);
453 cmp = dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
454 decrRefCount(o1);
455 decrRefCount(o2);
456 return cmp;
457 }
458
459 unsigned int dictEncObjHash(const void *key) {
460 robj *o = (robj*) key;
461
462 if (o->encoding == REDIS_ENCODING_RAW) {
463 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
464 } else {
465 if (o->encoding == REDIS_ENCODING_INT) {
466 char buf[32];
467 int len;
468
469 len = ll2string(buf,32,(long)o->ptr);
470 return dictGenHashFunction((unsigned char*)buf, len);
471 } else {
472 unsigned int hash;
473
474 o = getDecodedObject(o);
475 hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
476 decrRefCount(o);
477 return hash;
478 }
479 }
480 }
481
482 /* Sets type hash table */
483 dictType setDictType = {
484 dictEncObjHash, /* hash function */
485 NULL, /* key dup */
486 NULL, /* val dup */
487 dictEncObjKeyCompare, /* key compare */
488 dictRedisObjectDestructor, /* key destructor */
489 NULL /* val destructor */
490 };
491
492 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
493 dictType zsetDictType = {
494 dictEncObjHash, /* hash function */
495 NULL, /* key dup */
496 NULL, /* val dup */
497 dictEncObjKeyCompare, /* key compare */
498 dictRedisObjectDestructor, /* key destructor */
499 NULL /* val destructor */
500 };
501
502 /* Db->dict, keys are sds strings, vals are Redis objects. */
503 dictType dbDictType = {
504 dictSdsHash, /* hash function */
505 NULL, /* key dup */
506 NULL, /* val dup */
507 dictSdsKeyCompare, /* key compare */
508 dictSdsDestructor, /* key destructor */
509 dictRedisObjectDestructor /* val destructor */
510 };
511
512 /* Db->expires */
513 dictType keyptrDictType = {
514 dictSdsHash, /* hash function */
515 NULL, /* key dup */
516 NULL, /* val dup */
517 dictSdsKeyCompare, /* key compare */
518 NULL, /* key destructor */
519 NULL /* val destructor */
520 };
521
522 /* Command table. sds string -> command struct pointer. */
523 dictType commandTableDictType = {
524 dictSdsCaseHash, /* hash function */
525 NULL, /* key dup */
526 NULL, /* val dup */
527 dictSdsKeyCaseCompare, /* key compare */
528 dictSdsDestructor, /* key destructor */
529 NULL /* val destructor */
530 };
531
532 /* Hash type hash table (note that small hashes are represented with zimpaps) */
533 dictType hashDictType = {
534 dictEncObjHash, /* hash function */
535 NULL, /* key dup */
536 NULL, /* val dup */
537 dictEncObjKeyCompare, /* key compare */
538 dictRedisObjectDestructor, /* key destructor */
539 dictRedisObjectDestructor /* val destructor */
540 };
541
542 /* Keylist hash table type has unencoded redis objects as keys and
543 * lists as values. It's used for blocking operations (BLPOP) and to
544 * map swapped keys to a list of clients waiting for this keys to be loaded. */
545 dictType keylistDictType = {
546 dictObjHash, /* hash function */
547 NULL, /* key dup */
548 NULL, /* val dup */
549 dictObjKeyCompare, /* key compare */
550 dictRedisObjectDestructor, /* key destructor */
551 dictListDestructor /* val destructor */
552 };
553
554 int htNeedsResize(dict *dict) {
555 long long size, used;
556
557 size = dictSlots(dict);
558 used = dictSize(dict);
559 return (size && used && size > DICT_HT_INITIAL_SIZE &&
560 (used*100/size < REDIS_HT_MINFILL));
561 }
562
563 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
564 * we resize the hash table to save memory */
565 void tryResizeHashTables(void) {
566 int j;
567
568 for (j = 0; j < server.dbnum; j++) {
569 if (htNeedsResize(server.db[j].dict))
570 dictResize(server.db[j].dict);
571 if (htNeedsResize(server.db[j].expires))
572 dictResize(server.db[j].expires);
573 }
574 }
575
576 /* Our hash table implementation performs rehashing incrementally while
577 * we write/read from the hash table. Still if the server is idle, the hash
578 * table will use two tables for a long time. So we try to use 1 millisecond
579 * of CPU time at every serverCron() loop in order to rehash some key. */
580 void incrementallyRehash(void) {
581 int j;
582
583 for (j = 0; j < server.dbnum; j++) {
584 /* Keys dictionary */
585 if (dictIsRehashing(server.db[j].dict)) {
586 dictRehashMilliseconds(server.db[j].dict,1);
587 break; /* already used our millisecond for this loop... */
588 }
589 /* Expires */
590 if (dictIsRehashing(server.db[j].expires)) {
591 dictRehashMilliseconds(server.db[j].expires,1);
592 break; /* already used our millisecond for this loop... */
593 }
594 }
595 }
596
597 /* This function is called once a background process of some kind terminates,
598 * as we want to avoid resizing the hash tables when there is a child in order
599 * to play well with copy-on-write (otherwise when a resize happens lots of
600 * memory pages are copied). The goal of this function is to update the ability
601 * for dict.c to resize the hash tables accordingly to the fact we have o not
602 * running childs. */
603 void updateDictResizePolicy(void) {
604 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1)
605 dictEnableResize();
606 else
607 dictDisableResize();
608 }
609
610 /* ======================= Cron: called every 100 ms ======================== */
611
612 /* Try to expire a few timed out keys. The algorithm used is adaptive and
613 * will use few CPU cycles if there are few expiring keys, otherwise
614 * it will get more aggressive to avoid that too much memory is used by
615 * keys that can be removed from the keyspace. */
616 void activeExpireCycle(void) {
617 int j, iteration = 0;
618 long long start = ustime(), timelimit;
619
620 /* We can use at max REDIS_EXPIRELOOKUPS_TIME_PERC percentage of CPU time
621 * per iteration. Since this function gets called with a frequency of
622 * REDIS_HZ times per second, the following is the max amount of
623 * microseconds we can spend in this function. */
624 timelimit = 1000000*REDIS_EXPIRELOOKUPS_TIME_PERC/REDIS_HZ/100;
625 if (timelimit <= 0) timelimit = 1;
626
627 for (j = 0; j < server.dbnum; j++) {
628 int expired;
629 redisDb *db = server.db+j;
630
631 /* Continue to expire if at the end of the cycle more than 25%
632 * of the keys were expired. */
633 do {
634 unsigned long num = dictSize(db->expires);
635 unsigned long slots = dictSlots(db->expires);
636 long long now = mstime();
637
638 /* When there are less than 1% filled slots getting random
639 * keys is expensive, so stop here waiting for better times...
640 * The dictionary will be resized asap. */
641 if (num && slots > DICT_HT_INITIAL_SIZE &&
642 (num*100/slots < 1)) break;
643
644 /* The main collection cycle. Sample random keys among keys
645 * with an expire set, checking for expired ones. */
646 expired = 0;
647 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
648 num = REDIS_EXPIRELOOKUPS_PER_CRON;
649 while (num--) {
650 dictEntry *de;
651 long long t;
652
653 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
654 t = dictGetSignedIntegerVal(de);
655 if (now > t) {
656 sds key = dictGetKey(de);
657 robj *keyobj = createStringObject(key,sdslen(key));
658
659 propagateExpire(db,keyobj);
660 dbDelete(db,keyobj);
661 decrRefCount(keyobj);
662 expired++;
663 server.stat_expiredkeys++;
664 }
665 }
666 /* We can't block forever here even if there are many keys to
667 * expire. So after a given amount of milliseconds return to the
668 * caller waiting for the other active expire cycle. */
669 iteration++;
670 if ((iteration & 0xf) == 0 && /* check once every 16 cycles. */
671 (ustime()-start) > timelimit) return;
672 } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
673 }
674 }
675
676 void updateLRUClock(void) {
677 server.lruclock = (server.unixtime/REDIS_LRU_CLOCK_RESOLUTION) &
678 REDIS_LRU_CLOCK_MAX;
679 }
680
681
682 /* Add a sample to the operations per second array of samples. */
683 void trackOperationsPerSecond(void) {
684 long long t = mstime() - server.ops_sec_last_sample_time;
685 long long ops = server.stat_numcommands - server.ops_sec_last_sample_ops;
686 long long ops_sec;
687
688 ops_sec = t > 0 ? (ops*1000/t) : 0;
689
690 server.ops_sec_samples[server.ops_sec_idx] = ops_sec;
691 server.ops_sec_idx = (server.ops_sec_idx+1) % REDIS_OPS_SEC_SAMPLES;
692 server.ops_sec_last_sample_time = mstime();
693 server.ops_sec_last_sample_ops = server.stat_numcommands;
694 }
695
696 /* Return the mean of all the samples. */
697 long long getOperationsPerSecond(void) {
698 int j;
699 long long sum = 0;
700
701 for (j = 0; j < REDIS_OPS_SEC_SAMPLES; j++)
702 sum += server.ops_sec_samples[j];
703 return sum / REDIS_OPS_SEC_SAMPLES;
704 }
705
706 /* Check for timeouts. Returns non-zero if the client was terminated */
707 int clientsCronHandleTimeout(redisClient *c) {
708 time_t now = server.unixtime;
709
710 if (server.maxidletime &&
711 !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
712 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
713 !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */
714 dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */
715 listLength(c->pubsub_patterns) == 0 &&
716 (now - c->lastinteraction > server.maxidletime))
717 {
718 redisLog(REDIS_VERBOSE,"Closing idle client");
719 freeClient(c);
720 return 1;
721 } else if (c->flags & REDIS_BLOCKED) {
722 if (c->bpop.timeout != 0 && c->bpop.timeout < now) {
723 addReply(c,shared.nullmultibulk);
724 unblockClientWaitingData(c);
725 }
726 }
727 return 0;
728 }
729
730 /* The client query buffer is an sds.c string that can end with a lot of
731 * free space not used, this function reclaims space if needed.
732 *
733 * The funciton always returns 0 as it never terminates the client. */
734 int clientsCronResizeQueryBuffer(redisClient *c) {
735 size_t querybuf_size = sdsAllocSize(c->querybuf);
736 time_t idletime = server.unixtime - c->lastinteraction;
737
738 /* There are two conditions to resize the query buffer:
739 * 1) Query buffer is > BIG_ARG and too big for latest peak.
740 * 2) Client is inactive and the buffer is bigger than 1k. */
741 if (((querybuf_size > REDIS_MBULK_BIG_ARG) &&
742 (querybuf_size/(c->querybuf_peak+1)) > 2) ||
743 (querybuf_size > 1024 && idletime > 2))
744 {
745 /* Only resize the query buffer if it is actually wasting space. */
746 if (sdsavail(c->querybuf) > 1024) {
747 c->querybuf = sdsRemoveFreeSpace(c->querybuf);
748 }
749 }
750 /* Reset the peak again to capture the peak memory usage in the next
751 * cycle. */
752 c->querybuf_peak = 0;
753 return 0;
754 }
755
756 void clientsCron(void) {
757 /* Make sure to process at least 1/(REDIS_HZ*10) of clients per call.
758 * Since this function is called REDIS_HZ times per second we are sure that
759 * in the worst case we process all the clients in 10 seconds.
760 * In normal conditions (a reasonable number of clients) we process
761 * all the clients in a shorter time. */
762 int numclients = listLength(server.clients);
763 int iterations = numclients/(REDIS_HZ*10);
764
765 if (iterations < 50)
766 iterations = (numclients < 50) ? numclients : 50;
767 while(listLength(server.clients) && iterations--) {
768 redisClient *c;
769 listNode *head;
770
771 /* Rotate the list, take the current head, process.
772 * This way if the client must be removed from the list it's the
773 * first element and we don't incur into O(N) computation. */
774 listRotate(server.clients);
775 head = listFirst(server.clients);
776 c = listNodeValue(head);
777 /* The following functions do different service checks on the client.
778 * The protocol is that they return non-zero if the client was
779 * terminated. */
780 if (clientsCronHandleTimeout(c)) continue;
781 if (clientsCronResizeQueryBuffer(c)) continue;
782 }
783 }
784
785 /* This is our timer interrupt, called REDIS_HZ times per second.
786 * Here is where we do a number of things that need to be done asynchronously.
787 * For instance:
788 *
789 * - Active expired keys collection (it is also performed in a lazy way on
790 * lookup).
791 * - Software watchdong.
792 * - Update some statistic.
793 * - Incremental rehashing of the DBs hash tables.
794 * - Triggering BGSAVE / AOF rewrite, and handling of terminated children.
795 * - Clients timeout of differnet kinds.
796 * - Replication reconnection.
797 * - Many more...
798 *
799 * Everything directly called here will be called REDIS_HZ times per second,
800 * so in order to throttle execution of things we want to do less frequently
801 * a macro is used: run_with_period(milliseconds) { .... }
802 */
803
804 /* Using the following macro you can run code inside serverCron() with the
805 * specified period, specified in milliseconds.
806 * The actual resolution depends on REDIS_HZ. */
807 #define run_with_period(_ms_) if (!(loops % ((_ms_)/(1000/REDIS_HZ))))
808
809 int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
810 int j, loops = server.cronloops;
811 REDIS_NOTUSED(eventLoop);
812 REDIS_NOTUSED(id);
813 REDIS_NOTUSED(clientData);
814
815 /* Software watchdog: deliver the SIGALRM that will reach the signal
816 * handler if we don't return here fast enough. */
817 if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period);
818
819 /* We take a cached value of the unix time in the global state because
820 * with virtual memory and aging there is to store the current time
821 * in objects at every object access, and accuracy is not needed.
822 * To access a global var is faster than calling time(NULL) */
823 server.unixtime = time(NULL);
824
825 run_with_period(100) trackOperationsPerSecond();
826
827 /* We have just 22 bits per object for LRU information.
828 * So we use an (eventually wrapping) LRU clock with 10 seconds resolution.
829 * 2^22 bits with 10 seconds resoluton is more or less 1.5 years.
830 *
831 * Note that even if this will wrap after 1.5 years it's not a problem,
832 * everything will still work but just some object will appear younger
833 * to Redis. But for this to happen a given object should never be touched
834 * for 1.5 years.
835 *
836 * Note that you can change the resolution altering the
837 * REDIS_LRU_CLOCK_RESOLUTION define.
838 */
839 updateLRUClock();
840
841 /* Record the max memory used since the server was started. */
842 if (zmalloc_used_memory() > server.stat_peak_memory)
843 server.stat_peak_memory = zmalloc_used_memory();
844
845 /* We received a SIGTERM, shutting down here in a safe way, as it is
846 * not ok doing so inside the signal handler. */
847 if (server.shutdown_asap) {
848 if (prepareForShutdown(0) == REDIS_OK) exit(0);
849 redisLog(REDIS_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
850 }
851
852 /* Show some info about non-empty databases */
853 run_with_period(5000) {
854 for (j = 0; j < server.dbnum; j++) {
855 long long size, used, vkeys;
856
857 size = dictSlots(server.db[j].dict);
858 used = dictSize(server.db[j].dict);
859 vkeys = dictSize(server.db[j].expires);
860 if (used || vkeys) {
861 redisLog(REDIS_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
862 /* dictPrintStats(server.dict); */
863 }
864 }
865 }
866
867 /* We don't want to resize the hash tables while a bacground saving
868 * is in progress: the saving child is created using fork() that is
869 * implemented with a copy-on-write semantic in most modern systems, so
870 * if we resize the HT while there is the saving child at work actually
871 * a lot of memory movements in the parent will cause a lot of pages
872 * copied. */
873 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1) {
874 tryResizeHashTables();
875 if (server.activerehashing) incrementallyRehash();
876 }
877
878 /* Show information about connected clients */
879 run_with_period(5000) {
880 redisLog(REDIS_VERBOSE,"%d clients connected (%d slaves), %zu bytes in use",
881 listLength(server.clients)-listLength(server.slaves),
882 listLength(server.slaves),
883 zmalloc_used_memory());
884 }
885
886 /* We need to do a few operations on clients asynchronously. */
887 clientsCron();
888
889 /* Start a scheduled AOF rewrite if this was requested by the user while
890 * a BGSAVE was in progress. */
891 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1 &&
892 server.aof_rewrite_scheduled)
893 {
894 rewriteAppendOnlyFileBackground();
895 }
896
897 /* Check if a background saving or AOF rewrite in progress terminated. */
898 if (server.rdb_child_pid != -1 || server.aof_child_pid != -1) {
899 int statloc;
900 pid_t pid;
901
902 if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
903 int exitcode = WEXITSTATUS(statloc);
904 int bysignal = 0;
905
906 if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
907
908 if (pid == server.rdb_child_pid) {
909 backgroundSaveDoneHandler(exitcode,bysignal);
910 } else {
911 backgroundRewriteDoneHandler(exitcode,bysignal);
912 }
913 updateDictResizePolicy();
914 }
915 } else {
916 /* If there is not a background saving/rewrite in progress check if
917 * we have to save/rewrite now */
918 for (j = 0; j < server.saveparamslen; j++) {
919 struct saveparam *sp = server.saveparams+j;
920
921 if (server.dirty >= sp->changes &&
922 server.unixtime-server.lastsave > sp->seconds) {
923 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
924 sp->changes, sp->seconds);
925 rdbSaveBackground(server.rdb_filename);
926 break;
927 }
928 }
929
930 /* Trigger an AOF rewrite if needed */
931 if (server.rdb_child_pid == -1 &&
932 server.aof_child_pid == -1 &&
933 server.aof_rewrite_perc &&
934 server.aof_current_size > server.aof_rewrite_min_size)
935 {
936 long long base = server.aof_rewrite_base_size ?
937 server.aof_rewrite_base_size : 1;
938 long long growth = (server.aof_current_size*100/base) - 100;
939 if (growth >= server.aof_rewrite_perc) {
940 redisLog(REDIS_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
941 rewriteAppendOnlyFileBackground();
942 }
943 }
944 }
945
946
947 /* If we postponed an AOF buffer flush, let's try to do it every time the
948 * cron function is called. */
949 if (server.aof_flush_postponed_start) flushAppendOnlyFile(0);
950
951 /* Expire a few keys per cycle, only if this is a master.
952 * On slaves we wait for DEL operations synthesized by the master
953 * in order to guarantee a strict consistency. */
954 if (server.masterhost == NULL) activeExpireCycle();
955
956 /* Close clients that need to be closed asynchronous */
957 freeClientsInAsyncFreeQueue();
958
959 /* Replication cron function -- used to reconnect to master and
960 * to detect transfer failures. */
961 run_with_period(1000) replicationCron();
962
963 server.cronloops++;
964 return 1000/REDIS_HZ;
965 }
966
967 /* This function gets called every time Redis is entering the
968 * main loop of the event driven library, that is, before to sleep
969 * for ready file descriptors. */
970 void beforeSleep(struct aeEventLoop *eventLoop) {
971 REDIS_NOTUSED(eventLoop);
972 listNode *ln;
973 redisClient *c;
974
975 /* Try to process pending commands for clients that were just unblocked. */
976 while (listLength(server.unblocked_clients)) {
977 ln = listFirst(server.unblocked_clients);
978 redisAssert(ln != NULL);
979 c = ln->value;
980 listDelNode(server.unblocked_clients,ln);
981 c->flags &= ~REDIS_UNBLOCKED;
982
983 /* Process remaining data in the input buffer. */
984 if (c->querybuf && sdslen(c->querybuf) > 0) {
985 server.current_client = c;
986 processInputBuffer(c);
987 server.current_client = NULL;
988 }
989 }
990
991 /* Write the AOF buffer on disk */
992 flushAppendOnlyFile(0);
993 }
994
995 /* =========================== Server initialization ======================== */
996
997 void createSharedObjects(void) {
998 int j;
999
1000 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
1001 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
1002 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
1003 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
1004 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
1005 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
1006 shared.cnegone = createObject(REDIS_STRING,sdsnew(":-1\r\n"));
1007 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
1008 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
1009 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
1010 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
1011 shared.queued = createObject(REDIS_STRING,sdsnew("+QUEUED\r\n"));
1012 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
1013 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1014 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
1015 "-ERR no such key\r\n"));
1016 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
1017 "-ERR syntax error\r\n"));
1018 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
1019 "-ERR source and destination objects are the same\r\n"));
1020 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
1021 "-ERR index out of range\r\n"));
1022 shared.noscripterr = createObject(REDIS_STRING,sdsnew(
1023 "-NOSCRIPT No matching script. Please use EVAL.\r\n"));
1024 shared.loadingerr = createObject(REDIS_STRING,sdsnew(
1025 "-LOADING Redis is loading the dataset in memory\r\n"));
1026 shared.slowscripterr = createObject(REDIS_STRING,sdsnew(
1027 "-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n"));
1028 shared.masterdownerr = createObject(REDIS_STRING,sdsnew(
1029 "-MASTERDOWN Link with MASTER is down and slave-serve-stale-data is set to 'no'.\r\n"));
1030 shared.bgsaveerr = createObject(REDIS_STRING,sdsnew(
1031 "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error.\r\n"));
1032 shared.roslaveerr = createObject(REDIS_STRING,sdsnew(
1033 "-READONLY You can't write against a read only slave.\r\n"));
1034 shared.oomerr = createObject(REDIS_STRING,sdsnew(
1035 "-OOM command not allowed when used memory > 'maxmemory'.\r\n"));
1036 shared.space = createObject(REDIS_STRING,sdsnew(" "));
1037 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
1038 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
1039
1040 for (j = 0; j < REDIS_SHARED_SELECT_CMDS; j++) {
1041 shared.select[j] = createObject(REDIS_STRING,
1042 sdscatprintf(sdsempty(),"select %d\r\n", j));
1043 }
1044 shared.messagebulk = createStringObject("$7\r\nmessage\r\n",13);
1045 shared.pmessagebulk = createStringObject("$8\r\npmessage\r\n",14);
1046 shared.subscribebulk = createStringObject("$9\r\nsubscribe\r\n",15);
1047 shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18);
1048 shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
1049 shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
1050 shared.del = createStringObject("DEL",3);
1051 shared.rpop = createStringObject("RPOP",4);
1052 shared.lpop = createStringObject("LPOP",4);
1053 for (j = 0; j < REDIS_SHARED_INTEGERS; j++) {
1054 shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j);
1055 shared.integers[j]->encoding = REDIS_ENCODING_INT;
1056 }
1057 for (j = 0; j < REDIS_SHARED_BULKHDR_LEN; j++) {
1058 shared.mbulkhdr[j] = createObject(REDIS_STRING,
1059 sdscatprintf(sdsempty(),"*%d\r\n",j));
1060 shared.bulkhdr[j] = createObject(REDIS_STRING,
1061 sdscatprintf(sdsempty(),"$%d\r\n",j));
1062 }
1063 }
1064
1065 void initServerConfig() {
1066 getRandomHexChars(server.runid,REDIS_RUN_ID_SIZE);
1067 server.runid[REDIS_RUN_ID_SIZE] = '\0';
1068 server.arch_bits = (sizeof(long) == 8) ? 64 : 32;
1069 server.port = REDIS_SERVERPORT;
1070 server.bindaddr = NULL;
1071 server.unixsocket = NULL;
1072 server.unixsocketperm = 0;
1073 server.ipfd = -1;
1074 server.sofd = -1;
1075 server.dbnum = REDIS_DEFAULT_DBNUM;
1076 server.verbosity = REDIS_NOTICE;
1077 server.maxidletime = REDIS_MAXIDLETIME;
1078 server.client_max_querybuf_len = REDIS_MAX_QUERYBUF_LEN;
1079 server.saveparams = NULL;
1080 server.loading = 0;
1081 server.logfile = NULL; /* NULL = log on standard output */
1082 server.syslog_enabled = 0;
1083 server.syslog_ident = zstrdup("redis");
1084 server.syslog_facility = LOG_LOCAL0;
1085 server.daemonize = 0;
1086 server.aof_state = REDIS_AOF_OFF;
1087 server.aof_fsync = AOF_FSYNC_EVERYSEC;
1088 server.aof_no_fsync_on_rewrite = 0;
1089 server.aof_rewrite_perc = REDIS_AOF_REWRITE_PERC;
1090 server.aof_rewrite_min_size = REDIS_AOF_REWRITE_MIN_SIZE;
1091 server.aof_rewrite_base_size = 0;
1092 server.aof_rewrite_scheduled = 0;
1093 server.aof_last_fsync = time(NULL);
1094 server.aof_delayed_fsync = 0;
1095 server.aof_fd = -1;
1096 server.aof_selected_db = -1; /* Make sure the first time will not match */
1097 server.aof_flush_postponed_start = 0;
1098 server.pidfile = zstrdup("/var/run/redis.pid");
1099 server.rdb_filename = zstrdup("dump.rdb");
1100 server.aof_filename = zstrdup("appendonly.aof");
1101 server.requirepass = NULL;
1102 server.rdb_compression = 1;
1103 server.rdb_checksum = 1;
1104 server.activerehashing = 1;
1105 server.maxclients = REDIS_MAX_CLIENTS;
1106 server.bpop_blocked_clients = 0;
1107 server.maxmemory = 0;
1108 server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU;
1109 server.maxmemory_samples = 3;
1110 server.hash_max_ziplist_entries = REDIS_HASH_MAX_ZIPLIST_ENTRIES;
1111 server.hash_max_ziplist_value = REDIS_HASH_MAX_ZIPLIST_VALUE;
1112 server.list_max_ziplist_entries = REDIS_LIST_MAX_ZIPLIST_ENTRIES;
1113 server.list_max_ziplist_value = REDIS_LIST_MAX_ZIPLIST_VALUE;
1114 server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES;
1115 server.zset_max_ziplist_entries = REDIS_ZSET_MAX_ZIPLIST_ENTRIES;
1116 server.zset_max_ziplist_value = REDIS_ZSET_MAX_ZIPLIST_VALUE;
1117 server.shutdown_asap = 0;
1118 server.repl_ping_slave_period = REDIS_REPL_PING_SLAVE_PERIOD;
1119 server.repl_timeout = REDIS_REPL_TIMEOUT;
1120 server.lua_caller = NULL;
1121 server.lua_time_limit = REDIS_LUA_TIME_LIMIT;
1122 server.lua_client = NULL;
1123 server.lua_timedout = 0;
1124
1125 updateLRUClock();
1126 resetServerSaveParams();
1127
1128 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1129 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1130 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1131 /* Replication related */
1132 server.masterauth = NULL;
1133 server.masterhost = NULL;
1134 server.masterport = 6379;
1135 server.master = NULL;
1136 server.repl_state = REDIS_REPL_NONE;
1137 server.repl_syncio_timeout = REDIS_REPL_SYNCIO_TIMEOUT;
1138 server.repl_serve_stale_data = 1;
1139 server.repl_slave_ro = 1;
1140 server.repl_down_since = time(NULL);
1141
1142 /* Client output buffer limits */
1143 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].hard_limit_bytes = 0;
1144 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].soft_limit_bytes = 0;
1145 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].soft_limit_seconds = 0;
1146 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].hard_limit_bytes = 1024*1024*256;
1147 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].soft_limit_bytes = 1024*1024*64;
1148 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].soft_limit_seconds = 60;
1149 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].hard_limit_bytes = 1024*1024*32;
1150 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].soft_limit_bytes = 1024*1024*8;
1151 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].soft_limit_seconds = 60;
1152
1153 /* Double constants initialization */
1154 R_Zero = 0.0;
1155 R_PosInf = 1.0/R_Zero;
1156 R_NegInf = -1.0/R_Zero;
1157 R_Nan = R_Zero/R_Zero;
1158
1159 /* Command table -- we intiialize it here as it is part of the
1160 * initial configuration, since command names may be changed via
1161 * redis.conf using the rename-command directive. */
1162 server.commands = dictCreate(&commandTableDictType,NULL);
1163 populateCommandTable();
1164 server.delCommand = lookupCommandByCString("del");
1165 server.multiCommand = lookupCommandByCString("multi");
1166 server.lpushCommand = lookupCommandByCString("lpush");
1167
1168 /* Slow log */
1169 server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN;
1170 server.slowlog_max_len = REDIS_SLOWLOG_MAX_LEN;
1171
1172 /* Debugging */
1173 server.assert_failed = "<no assertion failed>";
1174 server.assert_file = "<no file>";
1175 server.assert_line = 0;
1176 server.bug_report_start = 0;
1177 server.watchdog_period = 0;
1178 }
1179
1180 /* This function will try to raise the max number of open files accordingly to
1181 * the configured max number of clients. It will also account for 32 additional
1182 * file descriptors as we need a few more for persistence, listening
1183 * sockets, log files and so forth.
1184 *
1185 * If it will not be possible to set the limit accordingly to the configured
1186 * max number of clients, the function will do the reverse setting
1187 * server.maxclients to the value that we can actually handle. */
1188 void adjustOpenFilesLimit(void) {
1189 rlim_t maxfiles = server.maxclients+32;
1190 struct rlimit limit;
1191
1192 if (getrlimit(RLIMIT_NOFILE,&limit) == -1) {
1193 redisLog(REDIS_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.",
1194 strerror(errno));
1195 server.maxclients = 1024-32;
1196 } else {
1197 rlim_t oldlimit = limit.rlim_cur;
1198
1199 /* Set the max number of files if the current limit is not enough
1200 * for our needs. */
1201 if (oldlimit < maxfiles) {
1202 rlim_t f;
1203
1204 f = maxfiles;
1205 while(f > oldlimit) {
1206 limit.rlim_cur = f;
1207 limit.rlim_max = f;
1208 if (setrlimit(RLIMIT_NOFILE,&limit) != -1) break;
1209 f -= 128;
1210 }
1211 if (f < oldlimit) f = oldlimit;
1212 if (f != maxfiles) {
1213 server.maxclients = f-32;
1214 redisLog(REDIS_WARNING,"Unable to set the max number of files limit to %d (%s), setting the max clients configuration to %d.",
1215 (int) maxfiles, strerror(errno), (int) server.maxclients);
1216 } else {
1217 redisLog(REDIS_NOTICE,"Max number of open files set to %d",
1218 (int) maxfiles);
1219 }
1220 }
1221 }
1222 }
1223
1224 void initServer() {
1225 int j;
1226
1227 signal(SIGHUP, SIG_IGN);
1228 signal(SIGPIPE, SIG_IGN);
1229 setupSignalHandlers();
1230
1231 if (server.syslog_enabled) {
1232 openlog(server.syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT,
1233 server.syslog_facility);
1234 }
1235
1236 server.current_client = NULL;
1237 server.clients = listCreate();
1238 server.clients_to_close = listCreate();
1239 server.slaves = listCreate();
1240 server.monitors = listCreate();
1241 server.unblocked_clients = listCreate();
1242
1243 createSharedObjects();
1244 adjustOpenFilesLimit();
1245 server.el = aeCreateEventLoop(server.maxclients+1024);
1246 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
1247
1248 if (server.port != 0) {
1249 server.ipfd = anetTcpServer(server.neterr,server.port,server.bindaddr);
1250 if (server.ipfd == ANET_ERR) {
1251 redisLog(REDIS_WARNING, "Opening port %d: %s",
1252 server.port, server.neterr);
1253 exit(1);
1254 }
1255 }
1256 if (server.unixsocket != NULL) {
1257 unlink(server.unixsocket); /* don't care if this fails */
1258 server.sofd = anetUnixServer(server.neterr,server.unixsocket,server.unixsocketperm);
1259 if (server.sofd == ANET_ERR) {
1260 redisLog(REDIS_WARNING, "Opening socket: %s", server.neterr);
1261 exit(1);
1262 }
1263 }
1264 if (server.ipfd < 0 && server.sofd < 0) {
1265 redisLog(REDIS_WARNING, "Configured to not listen anywhere, exiting.");
1266 exit(1);
1267 }
1268 for (j = 0; j < server.dbnum; j++) {
1269 server.db[j].dict = dictCreate(&dbDictType,NULL);
1270 server.db[j].expires = dictCreate(&keyptrDictType,NULL);
1271 server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
1272 server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
1273 server.db[j].id = j;
1274 }
1275 server.pubsub_channels = dictCreate(&keylistDictType,NULL);
1276 server.pubsub_patterns = listCreate();
1277 listSetFreeMethod(server.pubsub_patterns,freePubsubPattern);
1278 listSetMatchMethod(server.pubsub_patterns,listMatchPubsubPattern);
1279 server.cronloops = 0;
1280 server.rdb_child_pid = -1;
1281 server.aof_child_pid = -1;
1282 server.aof_rewrite_buf = sdsempty();
1283 server.aof_buf = sdsempty();
1284 server.lastsave = time(NULL);
1285 server.dirty = 0;
1286 server.stat_numcommands = 0;
1287 server.stat_numconnections = 0;
1288 server.stat_expiredkeys = 0;
1289 server.stat_evictedkeys = 0;
1290 server.stat_starttime = time(NULL);
1291 server.stat_keyspace_misses = 0;
1292 server.stat_keyspace_hits = 0;
1293 server.stat_peak_memory = 0;
1294 server.stat_fork_time = 0;
1295 server.stat_rejected_conn = 0;
1296 memset(server.ops_sec_samples,0,sizeof(server.ops_sec_samples));
1297 server.ops_sec_idx = 0;
1298 server.ops_sec_last_sample_time = mstime();
1299 server.ops_sec_last_sample_ops = 0;
1300 server.unixtime = time(NULL);
1301 server.lastbgsave_status = REDIS_OK;
1302 server.stop_writes_on_bgsave_err = 1;
1303 aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
1304 if (server.ipfd > 0 && aeCreateFileEvent(server.el,server.ipfd,AE_READABLE,
1305 acceptTcpHandler,NULL) == AE_ERR) oom("creating file event");
1306 if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
1307 acceptUnixHandler,NULL) == AE_ERR) oom("creating file event");
1308
1309 if (server.aof_state == REDIS_AOF_ON) {
1310 server.aof_fd = open(server.aof_filename,
1311 O_WRONLY|O_APPEND|O_CREAT,0644);
1312 if (server.aof_fd == -1) {
1313 redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
1314 strerror(errno));
1315 exit(1);
1316 }
1317 }
1318
1319 /* 32 bit instances are limited to 4GB of address space, so if there is
1320 * no explicit limit in the user provided configuration we set a limit
1321 * at 3.5GB using maxmemory with 'noeviction' policy'. This saves
1322 * useless crashes of the Redis instance. */
1323 if (server.arch_bits == 32 && server.maxmemory == 0) {
1324 redisLog(REDIS_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3.5 GB maxmemory limit with 'noeviction' policy now.");
1325 server.maxmemory = 3584LL*(1024*1024); /* 3584 MB = 3.5 GB */
1326 server.maxmemory_policy = REDIS_MAXMEMORY_NO_EVICTION;
1327 }
1328
1329 scriptingInit();
1330 slowlogInit();
1331 bioInit();
1332 }
1333
1334 /* Populates the Redis Command Table starting from the hard coded list
1335 * we have on top of redis.c file. */
1336 void populateCommandTable(void) {
1337 int j;
1338 int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
1339
1340 for (j = 0; j < numcommands; j++) {
1341 struct redisCommand *c = redisCommandTable+j;
1342 char *f = c->sflags;
1343 int retval;
1344
1345 while(*f != '\0') {
1346 switch(*f) {
1347 case 'w': c->flags |= REDIS_CMD_WRITE; break;
1348 case 'r': c->flags |= REDIS_CMD_READONLY; break;
1349 case 'm': c->flags |= REDIS_CMD_DENYOOM; break;
1350 case 'a': c->flags |= REDIS_CMD_ADMIN; break;
1351 case 'p': c->flags |= REDIS_CMD_PUBSUB; break;
1352 case 'f': c->flags |= REDIS_CMD_FORCE_REPLICATION; break;
1353 case 's': c->flags |= REDIS_CMD_NOSCRIPT; break;
1354 case 'R': c->flags |= REDIS_CMD_RANDOM; break;
1355 case 'S': c->flags |= REDIS_CMD_SORT_FOR_SCRIPT; break;
1356 default: redisPanic("Unsupported command flag"); break;
1357 }
1358 f++;
1359 }
1360
1361 retval = dictAdd(server.commands, sdsnew(c->name), c);
1362 assert(retval == DICT_OK);
1363 }
1364 }
1365
1366 void resetCommandTableStats(void) {
1367 int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
1368 int j;
1369
1370 for (j = 0; j < numcommands; j++) {
1371 struct redisCommand *c = redisCommandTable+j;
1372
1373 c->microseconds = 0;
1374 c->calls = 0;
1375 }
1376 }
1377
1378 /* ========================== Redis OP Array API ============================ */
1379
1380 void redisOpArrayInit(redisOpArray *oa) {
1381 oa->ops = NULL;
1382 oa->numops = 0;
1383 }
1384
1385 int redisOpArrayAppend(redisOpArray *oa, struct redisCommand *cmd, int dbid,
1386 robj **argv, int argc, int target)
1387 {
1388 redisOp *op;
1389
1390 oa->ops = zrealloc(oa->ops,sizeof(redisOp)*(oa->numops+1));
1391 op = oa->ops+oa->numops;
1392 op->cmd = cmd;
1393 op->dbid = dbid;
1394 op->argv = argv;
1395 op->argc = argc;
1396 op->target = target;
1397 oa->numops++;
1398 return oa->numops;
1399 }
1400
1401 void redisOpArrayFree(redisOpArray *oa) {
1402 while(oa->numops) {
1403 int j;
1404 redisOp *op;
1405
1406 oa->numops--;
1407 op = oa->ops+oa->numops;
1408 for (j = 0; j < op->argc; j++)
1409 decrRefCount(op->argv[j]);
1410 zfree(op->argv);
1411 }
1412 zfree(oa->ops);
1413 }
1414
1415 /* ====================== Commands lookup and execution ===================== */
1416
1417 struct redisCommand *lookupCommand(sds name) {
1418 return dictFetchValue(server.commands, name);
1419 }
1420
1421 struct redisCommand *lookupCommandByCString(char *s) {
1422 struct redisCommand *cmd;
1423 sds name = sdsnew(s);
1424
1425 cmd = dictFetchValue(server.commands, name);
1426 sdsfree(name);
1427 return cmd;
1428 }
1429
1430 /* Propagate the specified command (in the context of the specified database id)
1431 * to AOF, Slaves and Monitors.
1432 *
1433 * flags are an xor between:
1434 * + REDIS_PROPAGATE_NONE (no propagation of command at all)
1435 * + REDIS_PROPAGATE_AOF (propagate into the AOF file if is enabled)
1436 * + REDIS_PROPAGATE_REPL (propagate into the replication link)
1437 */
1438 void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
1439 int flags)
1440 {
1441 if (server.aof_state != REDIS_AOF_OFF && flags & REDIS_PROPAGATE_AOF)
1442 feedAppendOnlyFile(cmd,dbid,argv,argc);
1443 if (flags & REDIS_PROPAGATE_REPL && listLength(server.slaves))
1444 replicationFeedSlaves(server.slaves,dbid,argv,argc);
1445 }
1446
1447 /* Used inside commands to schedule the propagation of additional commands
1448 * after the current command is propagated to AOF / Replication. */
1449 void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
1450 int target)
1451 {
1452 redisOpArrayAppend(&server.also_propagate,cmd,dbid,argv,argc,target);
1453 }
1454
1455 /* Call() is the core of Redis execution of a command */
1456 void call(redisClient *c, int flags) {
1457 long long dirty, start = ustime(), duration;
1458
1459 /* Sent the command to clients in MONITOR mode, only if the commands are
1460 * not geneated from reading an AOF. */
1461 if (listLength(server.monitors) && !server.loading)
1462 replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
1463
1464 /* Call the command. */
1465 redisOpArrayInit(&server.also_propagate);
1466 dirty = server.dirty;
1467 c->cmd->proc(c);
1468 dirty = server.dirty-dirty;
1469 duration = ustime()-start;
1470
1471 /* When EVAL is called loading the AOF we don't want commands called
1472 * from Lua to go into the slowlog or to populate statistics. */
1473 if (server.loading && c->flags & REDIS_LUA_CLIENT)
1474 flags &= ~(REDIS_CALL_SLOWLOG | REDIS_CALL_STATS);
1475
1476 /* Log the command into the Slow log if needed, and populate the
1477 * per-command statistics that we show in INFO commandstats. */
1478 if (flags & REDIS_CALL_SLOWLOG)
1479 slowlogPushEntryIfNeeded(c->argv,c->argc,duration);
1480 if (flags & REDIS_CALL_STATS) {
1481 c->cmd->microseconds += duration;
1482 c->cmd->calls++;
1483 }
1484
1485 /* Propagate the command into the AOF and replication link */
1486 if (flags & REDIS_CALL_PROPAGATE) {
1487 int flags = REDIS_PROPAGATE_NONE;
1488
1489 if (c->cmd->flags & REDIS_CMD_FORCE_REPLICATION)
1490 flags |= REDIS_PROPAGATE_REPL;
1491 if (dirty)
1492 flags |= (REDIS_PROPAGATE_REPL | REDIS_PROPAGATE_AOF);
1493 if (flags != REDIS_PROPAGATE_NONE)
1494 propagate(c->cmd,c->db->id,c->argv,c->argc,flags);
1495 }
1496 /* Commands such as LPUSH or BRPOPLPUSH may propagate an additional
1497 * PUSH command. */
1498 if (server.also_propagate.numops) {
1499 int j;
1500 redisOp *rop;
1501
1502 for (j = 0; j < server.also_propagate.numops; j++) {
1503 rop = &server.also_propagate.ops[j];
1504 propagate(rop->cmd, rop->dbid, rop->argv, rop->argc, rop->target);
1505 }
1506 redisOpArrayFree(&server.also_propagate);
1507 }
1508 server.stat_numcommands++;
1509 }
1510
1511 /* If this function gets called we already read a whole
1512 * command, argments are in the client argv/argc fields.
1513 * processCommand() execute the command or prepare the
1514 * server for a bulk read from the client.
1515 *
1516 * If 1 is returned the client is still alive and valid and
1517 * and other operations can be performed by the caller. Otherwise
1518 * if 0 is returned the client was destroied (i.e. after QUIT). */
1519 int processCommand(redisClient *c) {
1520 /* The QUIT command is handled separately. Normal command procs will
1521 * go through checking for replication and QUIT will cause trouble
1522 * when FORCE_REPLICATION is enabled and would be implemented in
1523 * a regular command proc. */
1524 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
1525 addReply(c,shared.ok);
1526 c->flags |= REDIS_CLOSE_AFTER_REPLY;
1527 return REDIS_ERR;
1528 }
1529
1530 /* Now lookup the command and check ASAP about trivial error conditions
1531 * such as wrong arity, bad command name and so forth. */
1532 c->cmd = c->lastcmd = lookupCommand(c->argv[0]->ptr);
1533 if (!c->cmd) {
1534 addReplyErrorFormat(c,"unknown command '%s'",
1535 (char*)c->argv[0]->ptr);
1536 return REDIS_OK;
1537 } else if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) ||
1538 (c->argc < -c->cmd->arity)) {
1539 addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
1540 c->cmd->name);
1541 return REDIS_OK;
1542 }
1543
1544 /* Check if the user is authenticated */
1545 if (server.requirepass && !c->authenticated && c->cmd->proc != authCommand)
1546 {
1547 addReplyError(c,"operation not permitted");
1548 return REDIS_OK;
1549 }
1550
1551 /* Handle the maxmemory directive.
1552 *
1553 * First we try to free some memory if possible (if there are volatile
1554 * keys in the dataset). If there are not the only thing we can do
1555 * is returning an error. */
1556 if (server.maxmemory) {
1557 int retval = freeMemoryIfNeeded();
1558 if ((c->cmd->flags & REDIS_CMD_DENYOOM) && retval == REDIS_ERR) {
1559 addReply(c, shared.oomerr);
1560 return REDIS_OK;
1561 }
1562 }
1563
1564 /* Don't accept write commands if there are problems persisting on disk. */
1565 if (server.stop_writes_on_bgsave_err &&
1566 server.saveparamslen > 0
1567 && server.lastbgsave_status == REDIS_ERR &&
1568 c->cmd->flags & REDIS_CMD_WRITE)
1569 {
1570 addReply(c, shared.bgsaveerr);
1571 return REDIS_OK;
1572 }
1573
1574 /* Don't accept wirte commands if this is a read only slave. But
1575 * accept write commands if this is our master. */
1576 if (server.masterhost && server.repl_slave_ro &&
1577 !(c->flags & REDIS_MASTER) &&
1578 c->cmd->flags & REDIS_CMD_WRITE)
1579 {
1580 addReply(c, shared.roslaveerr);
1581 return REDIS_OK;
1582 }
1583
1584 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
1585 if ((dictSize(c->pubsub_channels) > 0 || listLength(c->pubsub_patterns) > 0)
1586 &&
1587 c->cmd->proc != subscribeCommand &&
1588 c->cmd->proc != unsubscribeCommand &&
1589 c->cmd->proc != psubscribeCommand &&
1590 c->cmd->proc != punsubscribeCommand) {
1591 addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context");
1592 return REDIS_OK;
1593 }
1594
1595 /* Only allow INFO and SLAVEOF when slave-serve-stale-data is no and
1596 * we are a slave with a broken link with master. */
1597 if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED &&
1598 server.repl_serve_stale_data == 0 &&
1599 c->cmd->proc != infoCommand && c->cmd->proc != slaveofCommand)
1600 {
1601 addReply(c, shared.masterdownerr);
1602 return REDIS_OK;
1603 }
1604
1605 /* Loading DB? Return an error if the command is not INFO */
1606 if (server.loading && c->cmd->proc != infoCommand) {
1607 addReply(c, shared.loadingerr);
1608 return REDIS_OK;
1609 }
1610
1611 /* Lua script too slow? Only allow SHUTDOWN NOSAVE and SCRIPT KILL. */
1612 if (server.lua_timedout &&
1613 !(c->cmd->proc == shutdownCommand &&
1614 c->argc == 2 &&
1615 tolower(((char*)c->argv[1]->ptr)[0]) == 'n') &&
1616 !(c->cmd->proc == scriptCommand &&
1617 c->argc == 2 &&
1618 tolower(((char*)c->argv[1]->ptr)[0]) == 'k'))
1619 {
1620 addReply(c, shared.slowscripterr);
1621 return REDIS_OK;
1622 }
1623
1624 /* Exec the command */
1625 if (c->flags & REDIS_MULTI &&
1626 c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
1627 c->cmd->proc != multiCommand && c->cmd->proc != watchCommand)
1628 {
1629 queueMultiCommand(c);
1630 addReply(c,shared.queued);
1631 } else {
1632 call(c,REDIS_CALL_FULL);
1633 }
1634 return REDIS_OK;
1635 }
1636
1637 /*================================== Shutdown =============================== */
1638
1639 int prepareForShutdown(int flags) {
1640 int save = flags & REDIS_SHUTDOWN_SAVE;
1641 int nosave = flags & REDIS_SHUTDOWN_NOSAVE;
1642
1643 redisLog(REDIS_WARNING,"User requested shutdown...");
1644 /* Kill the saving child if there is a background saving in progress.
1645 We want to avoid race conditions, for instance our saving child may
1646 overwrite the synchronous saving did by SHUTDOWN. */
1647 if (server.rdb_child_pid != -1) {
1648 redisLog(REDIS_WARNING,"There is a child saving an .rdb. Killing it!");
1649 kill(server.rdb_child_pid,SIGKILL);
1650 rdbRemoveTempFile(server.rdb_child_pid);
1651 }
1652 if (server.aof_state != REDIS_AOF_OFF) {
1653 /* Kill the AOF saving child as the AOF we already have may be longer
1654 * but contains the full dataset anyway. */
1655 if (server.aof_child_pid != -1) {
1656 redisLog(REDIS_WARNING,
1657 "There is a child rewriting the AOF. Killing it!");
1658 kill(server.aof_child_pid,SIGKILL);
1659 }
1660 /* Append only file: fsync() the AOF and exit */
1661 redisLog(REDIS_NOTICE,"Calling fsync() on the AOF file.");
1662 aof_fsync(server.aof_fd);
1663 }
1664 if ((server.saveparamslen > 0 && !nosave) || save) {
1665 redisLog(REDIS_NOTICE,"Saving the final RDB snapshot before exiting.");
1666 /* Snapshotting. Perform a SYNC SAVE and exit */
1667 if (rdbSave(server.rdb_filename) != REDIS_OK) {
1668 /* Ooops.. error saving! The best we can do is to continue
1669 * operating. Note that if there was a background saving process,
1670 * in the next cron() Redis will be notified that the background
1671 * saving aborted, handling special stuff like slaves pending for
1672 * synchronization... */
1673 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit.");
1674 return REDIS_ERR;
1675 }
1676 }
1677 if (server.daemonize) {
1678 redisLog(REDIS_NOTICE,"Removing the pid file.");
1679 unlink(server.pidfile);
1680 }
1681 /* Close the listening sockets. Apparently this allows faster restarts. */
1682 if (server.ipfd != -1) close(server.ipfd);
1683 if (server.sofd != -1) close(server.sofd);
1684 if (server.unixsocket) {
1685 redisLog(REDIS_NOTICE,"Removing the unix socket file.");
1686 unlink(server.unixsocket); /* don't care if this fails */
1687 }
1688
1689 redisLog(REDIS_WARNING,"Redis is now ready to exit, bye bye...");
1690 return REDIS_OK;
1691 }
1692
1693 /*================================== Commands =============================== */
1694
1695 void authCommand(redisClient *c) {
1696 if (!server.requirepass) {
1697 addReplyError(c,"Client sent AUTH, but no password is set");
1698 } else if (!strcmp(c->argv[1]->ptr, server.requirepass)) {
1699 c->authenticated = 1;
1700 addReply(c,shared.ok);
1701 } else {
1702 c->authenticated = 0;
1703 addReplyError(c,"invalid password");
1704 }
1705 }
1706
1707 void pingCommand(redisClient *c) {
1708 addReply(c,shared.pong);
1709 }
1710
1711 void echoCommand(redisClient *c) {
1712 addReplyBulk(c,c->argv[1]);
1713 }
1714
1715 void timeCommand(redisClient *c) {
1716 struct timeval tv;
1717
1718 /* gettimeofday() can only fail if &tv is a bad addresss so we
1719 * don't check for errors. */
1720 gettimeofday(&tv,NULL);
1721 addReplyMultiBulkLen(c,2);
1722 addReplyBulkLongLong(c,tv.tv_sec);
1723 addReplyBulkLongLong(c,tv.tv_usec);
1724 }
1725
1726 /* Convert an amount of bytes into a human readable string in the form
1727 * of 100B, 2G, 100M, 4K, and so forth. */
1728 void bytesToHuman(char *s, unsigned long long n) {
1729 double d;
1730
1731 if (n < 1024) {
1732 /* Bytes */
1733 sprintf(s,"%lluB",n);
1734 return;
1735 } else if (n < (1024*1024)) {
1736 d = (double)n/(1024);
1737 sprintf(s,"%.2fK",d);
1738 } else if (n < (1024LL*1024*1024)) {
1739 d = (double)n/(1024*1024);
1740 sprintf(s,"%.2fM",d);
1741 } else if (n < (1024LL*1024*1024*1024)) {
1742 d = (double)n/(1024LL*1024*1024);
1743 sprintf(s,"%.2fG",d);
1744 }
1745 }
1746
1747 /* Create the string returned by the INFO command. This is decoupled
1748 * by the INFO command itself as we need to report the same information
1749 * on memory corruption problems. */
1750 sds genRedisInfoString(char *section) {
1751 sds info = sdsempty();
1752 time_t uptime = server.unixtime-server.stat_starttime;
1753 int j, numcommands;
1754 struct rusage self_ru, c_ru;
1755 unsigned long lol, bib;
1756 int allsections = 0, defsections = 0;
1757 int sections = 0;
1758
1759 if (section) {
1760 allsections = strcasecmp(section,"all") == 0;
1761 defsections = strcasecmp(section,"default") == 0;
1762 }
1763
1764 getrusage(RUSAGE_SELF, &self_ru);
1765 getrusage(RUSAGE_CHILDREN, &c_ru);
1766 getClientsMaxBuffers(&lol,&bib);
1767
1768 /* Server */
1769 if (allsections || defsections || !strcasecmp(section,"server")) {
1770 struct utsname name;
1771
1772 if (sections++) info = sdscat(info,"\r\n");
1773 uname(&name);
1774 info = sdscatprintf(info,
1775 "# Server\r\n"
1776 "redis_version:%s\r\n"
1777 "redis_git_sha1:%s\r\n"
1778 "redis_git_dirty:%d\r\n"
1779 "os:%s %s %s\r\n"
1780 "arch_bits:%d\r\n"
1781 "multiplexing_api:%s\r\n"
1782 "gcc_version:%d.%d.%d\r\n"
1783 "process_id:%ld\r\n"
1784 "run_id:%s\r\n"
1785 "tcp_port:%d\r\n"
1786 "uptime_in_seconds:%ld\r\n"
1787 "uptime_in_days:%ld\r\n"
1788 "lru_clock:%ld\r\n",
1789 REDIS_VERSION,
1790 redisGitSHA1(),
1791 strtol(redisGitDirty(),NULL,10) > 0,
1792 name.sysname, name.release, name.machine,
1793 server.arch_bits,
1794 aeGetApiName(),
1795 #ifdef __GNUC__
1796 __GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__,
1797 #else
1798 0,0,0,
1799 #endif
1800 (long) getpid(),
1801 server.runid,
1802 server.port,
1803 uptime,
1804 uptime/(3600*24),
1805 (unsigned long) server.lruclock);
1806 }
1807
1808 /* Clients */
1809 if (allsections || defsections || !strcasecmp(section,"clients")) {
1810 if (sections++) info = sdscat(info,"\r\n");
1811 info = sdscatprintf(info,
1812 "# Clients\r\n"
1813 "connected_clients:%lu\r\n"
1814 "client_longest_output_list:%lu\r\n"
1815 "client_biggest_input_buf:%lu\r\n"
1816 "blocked_clients:%d\r\n",
1817 listLength(server.clients)-listLength(server.slaves),
1818 lol, bib,
1819 server.bpop_blocked_clients);
1820 }
1821
1822 /* Memory */
1823 if (allsections || defsections || !strcasecmp(section,"memory")) {
1824 char hmem[64];
1825 char peak_hmem[64];
1826
1827 bytesToHuman(hmem,zmalloc_used_memory());
1828 bytesToHuman(peak_hmem,server.stat_peak_memory);
1829 if (sections++) info = sdscat(info,"\r\n");
1830 info = sdscatprintf(info,
1831 "# Memory\r\n"
1832 "used_memory:%zu\r\n"
1833 "used_memory_human:%s\r\n"
1834 "used_memory_rss:%zu\r\n"
1835 "used_memory_peak:%zu\r\n"
1836 "used_memory_peak_human:%s\r\n"
1837 "used_memory_lua:%lld\r\n"
1838 "mem_fragmentation_ratio:%.2f\r\n"
1839 "mem_allocator:%s\r\n",
1840 zmalloc_used_memory(),
1841 hmem,
1842 zmalloc_get_rss(),
1843 server.stat_peak_memory,
1844 peak_hmem,
1845 ((long long)lua_gc(server.lua,LUA_GCCOUNT,0))*1024LL,
1846 zmalloc_get_fragmentation_ratio(),
1847 ZMALLOC_LIB
1848 );
1849 }
1850
1851 /* Persistence */
1852 if (allsections || defsections || !strcasecmp(section,"persistence")) {
1853 if (sections++) info = sdscat(info,"\r\n");
1854 info = sdscatprintf(info,
1855 "# Persistence\r\n"
1856 "loading:%d\r\n"
1857 "aof_enabled:%d\r\n"
1858 "changes_since_last_save:%lld\r\n"
1859 "bgsave_in_progress:%d\r\n"
1860 "last_save_time:%ld\r\n"
1861 "last_bgsave_status:%s\r\n"
1862 "bgrewriteaof_in_progress:%d\r\n"
1863 "bgrewriteaof_scheduled:%d\r\n",
1864 server.loading,
1865 server.aof_state != REDIS_AOF_OFF,
1866 server.dirty,
1867 server.rdb_child_pid != -1,
1868 server.lastsave,
1869 server.lastbgsave_status == REDIS_OK ? "ok" : "err",
1870 server.aof_child_pid != -1,
1871 server.aof_rewrite_scheduled);
1872
1873 if (server.aof_state != REDIS_AOF_OFF) {
1874 info = sdscatprintf(info,
1875 "aof_current_size:%lld\r\n"
1876 "aof_base_size:%lld\r\n"
1877 "aof_pending_rewrite:%d\r\n"
1878 "aof_buffer_length:%zu\r\n"
1879 "aof_pending_bio_fsync:%llu\r\n"
1880 "aof_delayed_fsync:%lu\r\n",
1881 (long long) server.aof_current_size,
1882 (long long) server.aof_rewrite_base_size,
1883 server.aof_rewrite_scheduled,
1884 sdslen(server.aof_buf),
1885 bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC),
1886 server.aof_delayed_fsync);
1887 }
1888
1889 if (server.loading) {
1890 double perc;
1891 time_t eta, elapsed;
1892 off_t remaining_bytes = server.loading_total_bytes-
1893 server.loading_loaded_bytes;
1894
1895 perc = ((double)server.loading_loaded_bytes /
1896 server.loading_total_bytes) * 100;
1897
1898 elapsed = server.unixtime-server.loading_start_time;
1899 if (elapsed == 0) {
1900 eta = 1; /* A fake 1 second figure if we don't have
1901 enough info */
1902 } else {
1903 eta = (elapsed*remaining_bytes)/server.loading_loaded_bytes;
1904 }
1905
1906 info = sdscatprintf(info,
1907 "loading_start_time:%ld\r\n"
1908 "loading_total_bytes:%llu\r\n"
1909 "loading_loaded_bytes:%llu\r\n"
1910 "loading_loaded_perc:%.2f\r\n"
1911 "loading_eta_seconds:%ld\r\n"
1912 ,(unsigned long) server.loading_start_time,
1913 (unsigned long long) server.loading_total_bytes,
1914 (unsigned long long) server.loading_loaded_bytes,
1915 perc,
1916 eta
1917 );
1918 }
1919 }
1920
1921 /* Stats */
1922 if (allsections || defsections || !strcasecmp(section,"stats")) {
1923 if (sections++) info = sdscat(info,"\r\n");
1924 info = sdscatprintf(info,
1925 "# Stats\r\n"
1926 "total_connections_received:%lld\r\n"
1927 "total_commands_processed:%lld\r\n"
1928 "instantaneous_ops_per_sec:%lld\r\n"
1929 "rejected_connections:%lld\r\n"
1930 "expired_keys:%lld\r\n"
1931 "evicted_keys:%lld\r\n"
1932 "keyspace_hits:%lld\r\n"
1933 "keyspace_misses:%lld\r\n"
1934 "pubsub_channels:%ld\r\n"
1935 "pubsub_patterns:%lu\r\n"
1936 "latest_fork_usec:%lld\r\n",
1937 server.stat_numconnections,
1938 server.stat_numcommands,
1939 getOperationsPerSecond(),
1940 server.stat_rejected_conn,
1941 server.stat_expiredkeys,
1942 server.stat_evictedkeys,
1943 server.stat_keyspace_hits,
1944 server.stat_keyspace_misses,
1945 dictSize(server.pubsub_channels),
1946 listLength(server.pubsub_patterns),
1947 server.stat_fork_time);
1948 }
1949
1950 /* Replication */
1951 if (allsections || defsections || !strcasecmp(section,"replication")) {
1952 if (sections++) info = sdscat(info,"\r\n");
1953 info = sdscatprintf(info,
1954 "# Replication\r\n"
1955 "role:%s\r\n",
1956 server.masterhost == NULL ? "master" : "slave");
1957 if (server.masterhost) {
1958 info = sdscatprintf(info,
1959 "master_host:%s\r\n"
1960 "master_port:%d\r\n"
1961 "master_link_status:%s\r\n"
1962 "master_last_io_seconds_ago:%d\r\n"
1963 "master_sync_in_progress:%d\r\n"
1964 ,server.masterhost,
1965 server.masterport,
1966 (server.repl_state == REDIS_REPL_CONNECTED) ?
1967 "up" : "down",
1968 server.master ?
1969 ((int)(server.unixtime-server.master->lastinteraction)) : -1,
1970 server.repl_state == REDIS_REPL_TRANSFER
1971 );
1972
1973 if (server.repl_state == REDIS_REPL_TRANSFER) {
1974 info = sdscatprintf(info,
1975 "master_sync_left_bytes:%ld\r\n"
1976 "master_sync_last_io_seconds_ago:%d\r\n"
1977 ,(long)server.repl_transfer_left,
1978 (int)(server.unixtime-server.repl_transfer_lastio)
1979 );
1980 }
1981
1982 if (server.repl_state != REDIS_REPL_CONNECTED) {
1983 info = sdscatprintf(info,
1984 "master_link_down_since_seconds:%ld\r\n",
1985 (long)server.unixtime-server.repl_down_since);
1986 }
1987 }
1988 info = sdscatprintf(info,
1989 "connected_slaves:%lu\r\n",
1990 listLength(server.slaves));
1991 if (listLength(server.slaves)) {
1992 int slaveid = 0;
1993 listNode *ln;
1994 listIter li;
1995
1996 listRewind(server.slaves,&li);
1997 while((ln = listNext(&li))) {
1998 redisClient *slave = listNodeValue(ln);
1999 char *state = NULL;
2000 char ip[32];
2001 int port;
2002
2003 if (anetPeerToString(slave->fd,ip,&port) == -1) continue;
2004 switch(slave->replstate) {
2005 case REDIS_REPL_WAIT_BGSAVE_START:
2006 case REDIS_REPL_WAIT_BGSAVE_END:
2007 state = "wait_bgsave";
2008 break;
2009 case REDIS_REPL_SEND_BULK:
2010 state = "send_bulk";
2011 break;
2012 case REDIS_REPL_ONLINE:
2013 state = "online";
2014 break;
2015 }
2016 if (state == NULL) continue;
2017 info = sdscatprintf(info,"slave%d:%s,%d,%s\r\n",
2018 slaveid,ip,port,state);
2019 slaveid++;
2020 }
2021 }
2022 }
2023
2024 /* CPU */
2025 if (allsections || defsections || !strcasecmp(section,"cpu")) {
2026 if (sections++) info = sdscat(info,"\r\n");
2027 info = sdscatprintf(info,
2028 "# CPU\r\n"
2029 "used_cpu_sys:%.2f\r\n"
2030 "used_cpu_user:%.2f\r\n"
2031 "used_cpu_sys_children:%.2f\r\n"
2032 "used_cpu_user_children:%.2f\r\n",
2033 (float)self_ru.ru_stime.tv_sec+(float)self_ru.ru_stime.tv_usec/1000000,
2034 (float)self_ru.ru_utime.tv_sec+(float)self_ru.ru_utime.tv_usec/1000000,
2035 (float)c_ru.ru_stime.tv_sec+(float)c_ru.ru_stime.tv_usec/1000000,
2036 (float)c_ru.ru_utime.tv_sec+(float)c_ru.ru_utime.tv_usec/1000000);
2037 }
2038
2039 /* cmdtime */
2040 if (allsections || !strcasecmp(section,"commandstats")) {
2041 if (sections++) info = sdscat(info,"\r\n");
2042 info = sdscatprintf(info, "# Commandstats\r\n");
2043 numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
2044 for (j = 0; j < numcommands; j++) {
2045 struct redisCommand *c = redisCommandTable+j;
2046
2047 if (!c->calls) continue;
2048 info = sdscatprintf(info,
2049 "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f\r\n",
2050 c->name, c->calls, c->microseconds,
2051 (c->calls == 0) ? 0 : ((float)c->microseconds/c->calls));
2052 }
2053 }
2054
2055 /* Key space */
2056 if (allsections || defsections || !strcasecmp(section,"keyspace")) {
2057 if (sections++) info = sdscat(info,"\r\n");
2058 info = sdscatprintf(info, "# Keyspace\r\n");
2059 for (j = 0; j < server.dbnum; j++) {
2060 long long keys, vkeys;
2061
2062 keys = dictSize(server.db[j].dict);
2063 vkeys = dictSize(server.db[j].expires);
2064 if (keys || vkeys) {
2065 info = sdscatprintf(info, "db%d:keys=%lld,expires=%lld\r\n",
2066 j, keys, vkeys);
2067 }
2068 }
2069 }
2070 return info;
2071 }
2072
2073 void infoCommand(redisClient *c) {
2074 char *section = c->argc == 2 ? c->argv[1]->ptr : "default";
2075
2076 if (c->argc > 2) {
2077 addReply(c,shared.syntaxerr);
2078 return;
2079 }
2080 sds info = genRedisInfoString(section);
2081 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
2082 (unsigned long)sdslen(info)));
2083 addReplySds(c,info);
2084 addReply(c,shared.crlf);
2085 }
2086
2087 void monitorCommand(redisClient *c) {
2088 /* ignore MONITOR if aleady slave or in monitor mode */
2089 if (c->flags & REDIS_SLAVE) return;
2090
2091 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
2092 c->slaveseldb = 0;
2093 listAddNodeTail(server.monitors,c);
2094 addReply(c,shared.ok);
2095 }
2096
2097 /* ============================ Maxmemory directive ======================== */
2098
2099 /* This function gets called when 'maxmemory' is set on the config file to limit
2100 * the max memory used by the server, before processing a command.
2101 *
2102 * The goal of the function is to free enough memory to keep Redis under the
2103 * configured memory limit.
2104 *
2105 * The function starts calculating how many bytes should be freed to keep
2106 * Redis under the limit, and enters a loop selecting the best keys to
2107 * evict accordingly to the configured policy.
2108 *
2109 * If all the bytes needed to return back under the limit were freed the
2110 * function returns REDIS_OK, otherwise REDIS_ERR is returned, and the caller
2111 * should block the execution of commands that will result in more memory
2112 * used by the server.
2113 */
2114 int freeMemoryIfNeeded(void) {
2115 size_t mem_used, mem_tofree, mem_freed;
2116 int slaves = listLength(server.slaves);
2117
2118 /* Remove the size of slaves output buffers and AOF buffer from the
2119 * count of used memory. */
2120 mem_used = zmalloc_used_memory();
2121 if (slaves) {
2122 listIter li;
2123 listNode *ln;
2124
2125 listRewind(server.slaves,&li);
2126 while((ln = listNext(&li))) {
2127 redisClient *slave = listNodeValue(ln);
2128 unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
2129 if (obuf_bytes > mem_used)
2130 mem_used = 0;
2131 else
2132 mem_used -= obuf_bytes;
2133 }
2134 }
2135 if (server.aof_state != REDIS_AOF_OFF) {
2136 mem_used -= sdslen(server.aof_buf);
2137 mem_used -= sdslen(server.aof_rewrite_buf);
2138 }
2139
2140 /* Check if we are over the memory limit. */
2141 if (mem_used <= server.maxmemory) return REDIS_OK;
2142
2143 if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION)
2144 return REDIS_ERR; /* We need to free memory, but policy forbids. */
2145
2146 /* Compute how much memory we need to free. */
2147 mem_tofree = mem_used - server.maxmemory;
2148 mem_freed = 0;
2149 while (mem_freed < mem_tofree) {
2150 int j, k, keys_freed = 0;
2151
2152 for (j = 0; j < server.dbnum; j++) {
2153 long bestval = 0; /* just to prevent warning */
2154 sds bestkey = NULL;
2155 struct dictEntry *de;
2156 redisDb *db = server.db+j;
2157 dict *dict;
2158
2159 if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
2160 server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM)
2161 {
2162 dict = server.db[j].dict;
2163 } else {
2164 dict = server.db[j].expires;
2165 }
2166 if (dictSize(dict) == 0) continue;
2167
2168 /* volatile-random and allkeys-random policy */
2169 if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM ||
2170 server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_RANDOM)
2171 {
2172 de = dictGetRandomKey(dict);
2173 bestkey = dictGetKey(de);
2174 }
2175
2176 /* volatile-lru and allkeys-lru policy */
2177 else if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
2178 server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
2179 {
2180 for (k = 0; k < server.maxmemory_samples; k++) {
2181 sds thiskey;
2182 long thisval;
2183 robj *o;
2184
2185 de = dictGetRandomKey(dict);
2186 thiskey = dictGetKey(de);
2187 /* When policy is volatile-lru we need an additonal lookup
2188 * to locate the real key, as dict is set to db->expires. */
2189 if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
2190 de = dictFind(db->dict, thiskey);
2191 o = dictGetVal(de);
2192 thisval = estimateObjectIdleTime(o);
2193
2194 /* Higher idle time is better candidate for deletion */
2195 if (bestkey == NULL || thisval > bestval) {
2196 bestkey = thiskey;
2197 bestval = thisval;
2198 }
2199 }
2200 }
2201
2202 /* volatile-ttl */
2203 else if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_TTL) {
2204 for (k = 0; k < server.maxmemory_samples; k++) {
2205 sds thiskey;
2206 long thisval;
2207
2208 de = dictGetRandomKey(dict);
2209 thiskey = dictGetKey(de);
2210 thisval = (long) dictGetVal(de);
2211
2212 /* Expire sooner (minor expire unix timestamp) is better
2213 * candidate for deletion */
2214 if (bestkey == NULL || thisval < bestval) {
2215 bestkey = thiskey;
2216 bestval = thisval;
2217 }
2218 }
2219 }
2220
2221 /* Finally remove the selected key. */
2222 if (bestkey) {
2223 long long delta;
2224
2225 robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
2226 propagateExpire(db,keyobj);
2227 /* We compute the amount of memory freed by dbDelete() alone.
2228 * It is possible that actually the memory needed to propagate
2229 * the DEL in AOF and replication link is greater than the one
2230 * we are freeing removing the key, but we can't account for
2231 * that otherwise we would never exit the loop.
2232 *
2233 * AOF and Output buffer memory will be freed eventually so
2234 * we only care about memory used by the key space. */
2235 delta = (long long) zmalloc_used_memory();
2236 dbDelete(db,keyobj);
2237 delta -= (long long) zmalloc_used_memory();
2238 mem_freed += delta;
2239 server.stat_evictedkeys++;
2240 decrRefCount(keyobj);
2241 keys_freed++;
2242
2243 /* When the memory to free starts to be big enough, we may
2244 * start spending so much time here that is impossible to
2245 * deliver data to the slaves fast enough, so we force the
2246 * transmission here inside the loop. */
2247 if (slaves) flushSlavesOutputBuffers();
2248 }
2249 }
2250 if (!keys_freed) return REDIS_ERR; /* nothing to free... */
2251 }
2252 return REDIS_OK;
2253 }
2254
2255 /* =================================== Main! ================================ */
2256
2257 #ifdef __linux__
2258 int linuxOvercommitMemoryValue(void) {
2259 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
2260 char buf[64];
2261
2262 if (!fp) return -1;
2263 if (fgets(buf,64,fp) == NULL) {
2264 fclose(fp);
2265 return -1;
2266 }
2267 fclose(fp);
2268
2269 return atoi(buf);
2270 }
2271
2272 void linuxOvercommitMemoryWarning(void) {
2273 if (linuxOvercommitMemoryValue() == 0) {
2274 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
2275 }
2276 }
2277 #endif /* __linux__ */
2278
2279 void createPidFile(void) {
2280 /* Try to write the pid file in a best-effort way. */
2281 FILE *fp = fopen(server.pidfile,"w");
2282 if (fp) {
2283 fprintf(fp,"%d\n",(int)getpid());
2284 fclose(fp);
2285 }
2286 }
2287
2288 void daemonize(void) {
2289 int fd;
2290
2291 if (fork() != 0) exit(0); /* parent exits */
2292 setsid(); /* create a new session */
2293
2294 /* Every output goes to /dev/null. If Redis is daemonized but
2295 * the 'logfile' is set to 'stdout' in the configuration file
2296 * it will not log at all. */
2297 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
2298 dup2(fd, STDIN_FILENO);
2299 dup2(fd, STDOUT_FILENO);
2300 dup2(fd, STDERR_FILENO);
2301 if (fd > STDERR_FILENO) close(fd);
2302 }
2303 }
2304
2305 void version() {
2306 printf("Redis server v=%s sha=%s:%d malloc=%s bits=%d\n",
2307 REDIS_VERSION,
2308 redisGitSHA1(),
2309 atoi(redisGitDirty()) > 0,
2310 ZMALLOC_LIB,
2311 sizeof(long) == 4 ? 32 : 64);
2312 exit(0);
2313 }
2314
2315 void usage() {
2316 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf] [options]\n");
2317 fprintf(stderr," ./redis-server - (read config from stdin)\n");
2318 fprintf(stderr," ./redis-server -v or --version\n");
2319 fprintf(stderr," ./redis-server -h or --help\n");
2320 fprintf(stderr," ./redis-server --test-memory <megabytes>\n\n");
2321 fprintf(stderr,"Examples:\n");
2322 fprintf(stderr," ./redis-server (run the server with default conf)\n");
2323 fprintf(stderr," ./redis-server /etc/redis/6379.conf\n");
2324 fprintf(stderr," ./redis-server --port 7777\n");
2325 fprintf(stderr," ./redis-server --port 7777 --slaveof 127.0.0.1 8888\n");
2326 fprintf(stderr," ./redis-server /etc/myredis.conf --loglevel verbose\n");
2327 exit(1);
2328 }
2329
2330 void redisAsciiArt(void) {
2331 #include "asciilogo.h"
2332 char *buf = zmalloc(1024*16);
2333
2334 snprintf(buf,1024*16,ascii_logo,
2335 REDIS_VERSION,
2336 redisGitSHA1(),
2337 strtol(redisGitDirty(),NULL,10) > 0,
2338 (sizeof(long) == 8) ? "64" : "32",
2339 "stand alone",
2340 server.port,
2341 (long) getpid()
2342 );
2343 redisLogRaw(REDIS_NOTICE|REDIS_LOG_RAW,buf);
2344 zfree(buf);
2345 }
2346
2347 static void sigtermHandler(int sig) {
2348 REDIS_NOTUSED(sig);
2349
2350 redisLogFromHandler(REDIS_WARNING,"Received SIGTERM, scheduling shutdown...");
2351 server.shutdown_asap = 1;
2352 }
2353
2354 void setupSignalHandlers(void) {
2355 struct sigaction act;
2356
2357 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used.
2358 * Otherwise, sa_handler is used. */
2359 sigemptyset(&act.sa_mask);
2360 act.sa_flags = 0;
2361 act.sa_handler = sigtermHandler;
2362 sigaction(SIGTERM, &act, NULL);
2363
2364 #ifdef HAVE_BACKTRACE
2365 sigemptyset(&act.sa_mask);
2366 act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO;
2367 act.sa_sigaction = sigsegvHandler;
2368 sigaction(SIGSEGV, &act, NULL);
2369 sigaction(SIGBUS, &act, NULL);
2370 sigaction(SIGFPE, &act, NULL);
2371 sigaction(SIGILL, &act, NULL);
2372 #endif
2373 return;
2374 }
2375
2376 void memtest(size_t megabytes, int passes);
2377
2378 int main(int argc, char **argv) {
2379 long long start;
2380 struct timeval tv;
2381
2382 /* We need to initialize our libraries, and the server configuration. */
2383 zmalloc_enable_thread_safeness();
2384 srand(time(NULL)^getpid());
2385 gettimeofday(&tv,NULL);
2386 dictSetHashFunctionSeed(tv.tv_sec^tv.tv_usec^getpid());
2387 initServerConfig();
2388
2389 if (argc >= 2) {
2390 int j = 1; /* First option to parse in argv[] */
2391 sds options = sdsempty();
2392 char *configfile = NULL;
2393
2394 /* Handle special options --help and --version */
2395 if (strcmp(argv[1], "-v") == 0 ||
2396 strcmp(argv[1], "--version") == 0) version();
2397 if (strcmp(argv[1], "--help") == 0 ||
2398 strcmp(argv[1], "-h") == 0) usage();
2399 if (strcmp(argv[1], "--test-memory") == 0) {
2400 if (argc == 3) {
2401 memtest(atoi(argv[2]),50);
2402 exit(0);
2403 } else {
2404 fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n");
2405 fprintf(stderr,"Example: ./redis-server --test-memory 4096\n\n");
2406 exit(1);
2407 }
2408 }
2409
2410 /* First argument is the config file name? */
2411 if (argv[j][0] != '-' || argv[j][1] != '-')
2412 configfile = argv[j++];
2413 /* All the other options are parsed and conceptually appended to the
2414 * configuration file. For instance --port 6380 will generate the
2415 * string "port 6380\n" to be parsed after the actual file name
2416 * is parsed, if any. */
2417 while(j != argc) {
2418 if (argv[j][0] == '-' && argv[j][1] == '-') {
2419 /* Option name */
2420 if (sdslen(options)) options = sdscat(options,"\n");
2421 options = sdscat(options,argv[j]+2);
2422 options = sdscat(options," ");
2423 } else {
2424 /* Option argument */
2425 options = sdscatrepr(options,argv[j],strlen(argv[j]));
2426 options = sdscat(options," ");
2427 }
2428 j++;
2429 }
2430 resetServerSaveParams();
2431 loadServerConfig(configfile,options);
2432 sdsfree(options);
2433 } else {
2434 redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
2435 }
2436 if (server.daemonize) daemonize();
2437 initServer();
2438 if (server.daemonize) createPidFile();
2439 redisAsciiArt();
2440 redisLog(REDIS_WARNING,"Server started, Redis version " REDIS_VERSION);
2441 #ifdef __linux__
2442 linuxOvercommitMemoryWarning();
2443 #endif
2444 start = ustime();
2445 if (server.aof_state == REDIS_AOF_ON) {
2446 if (loadAppendOnlyFile(server.aof_filename) == REDIS_OK)
2447 redisLog(REDIS_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000);
2448 } else {
2449 if (rdbLoad(server.rdb_filename) == REDIS_OK) {
2450 redisLog(REDIS_NOTICE,"DB loaded from disk: %.3f seconds",
2451 (float)(ustime()-start)/1000000);
2452 } else if (errno != ENOENT) {
2453 redisLog(REDIS_WARNING,"Fatal error loading the DB. Exiting.");
2454 exit(1);
2455 }
2456 }
2457 if (server.ipfd > 0)
2458 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
2459 if (server.sofd > 0)
2460 redisLog(REDIS_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket);
2461 aeSetBeforeSleepProc(server.el,beforeSleep);
2462 aeMain(server.el);
2463 aeDeleteEventLoop(server.el);
2464 return 0;
2465 }
2466
2467 /* The End */