]> git.saurik.com Git - redis.git/blob - src/redis.c
More incremental active expired keys collection process.
[redis.git] / src / redis.c
1 /*
2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include "redis.h"
31 #include "slowlog.h"
32 #include "bio.h"
33
34 #include <time.h>
35 #include <signal.h>
36 #include <sys/wait.h>
37 #include <errno.h>
38 #include <assert.h>
39 #include <ctype.h>
40 #include <stdarg.h>
41 #include <arpa/inet.h>
42 #include <sys/stat.h>
43 #include <fcntl.h>
44 #include <sys/time.h>
45 #include <sys/resource.h>
46 #include <sys/uio.h>
47 #include <limits.h>
48 #include <float.h>
49 #include <math.h>
50 #include <sys/resource.h>
51 #include <sys/utsname.h>
52
53 /* Our shared "common" objects */
54
55 struct sharedObjectsStruct shared;
56
57 /* Global vars that are actually used as constants. The following double
58 * values are used for double on-disk serialization, and are initialized
59 * at runtime to avoid strange compiler optimizations. */
60
61 double R_Zero, R_PosInf, R_NegInf, R_Nan;
62
63 /*================================= Globals ================================= */
64
65 /* Global vars */
66 struct redisServer server; /* server global state */
67 struct redisCommand *commandTable;
68
69 /* Our command table.
70 *
71 * Every entry is composed of the following fields:
72 *
73 * name: a string representing the command name.
74 * function: pointer to the C function implementing the command.
75 * arity: number of arguments, it is possible to use -N to say >= N
76 * sflags: command flags as string. See below for a table of flags.
77 * flags: flags as bitmask. Computed by Redis using the 'sflags' field.
78 * get_keys_proc: an optional function to get key arguments from a command.
79 * This is only used when the following three fields are not
80 * enough to specify what arguments are keys.
81 * first_key_index: first argument that is a key
82 * last_key_index: last argument that is a key
83 * key_step: step to get all the keys from first to last argument. For instance
84 * in MSET the step is two since arguments are key,val,key,val,...
85 * microseconds: microseconds of total execution time for this command.
86 * calls: total number of calls of this command.
87 *
88 * The flags, microseconds and calls fields are computed by Redis and should
89 * always be set to zero.
90 *
91 * Command flags are expressed using strings where every character represents
92 * a flag. Later the populateCommandTable() function will take care of
93 * populating the real 'flags' field using this characters.
94 *
95 * This is the meaning of the flags:
96 *
97 * w: write command (may modify the key space).
98 * r: read command (will never modify the key space).
99 * m: may increase memory usage once called. Don't allow if out of memory.
100 * a: admin command, like SAVE or SHUTDOWN.
101 * p: Pub/Sub related command.
102 * f: force replication of this command, regarless of server.dirty.
103 * s: command not allowed in scripts.
104 * R: random command. Command is not deterministic, that is, the same command
105 * with the same arguments, with the same key space, may have different
106 * results. For instance SPOP and RANDOMKEY are two random commands.
107 * S: Sort command output array if called from script, so that the output
108 * is deterministic.
109 */
110 struct redisCommand redisCommandTable[] = {
111 {"get",getCommand,2,"r",0,NULL,1,1,1,0,0},
112 {"set",setCommand,3,"wm",0,noPreloadGetKeys,1,1,1,0,0},
113 {"setnx",setnxCommand,3,"wm",0,noPreloadGetKeys,1,1,1,0,0},
114 {"setex",setexCommand,4,"wm",0,noPreloadGetKeys,1,1,1,0,0},
115 {"psetex",psetexCommand,4,"wm",0,noPreloadGetKeys,1,1,1,0,0},
116 {"append",appendCommand,3,"wm",0,NULL,1,1,1,0,0},
117 {"strlen",strlenCommand,2,"r",0,NULL,1,1,1,0,0},
118 {"del",delCommand,-2,"w",0,noPreloadGetKeys,1,-1,1,0,0},
119 {"exists",existsCommand,2,"r",0,NULL,1,1,1,0,0},
120 {"setbit",setbitCommand,4,"wm",0,NULL,1,1,1,0,0},
121 {"getbit",getbitCommand,3,"r",0,NULL,1,1,1,0,0},
122 {"setrange",setrangeCommand,4,"wm",0,NULL,1,1,1,0,0},
123 {"getrange",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
124 {"substr",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
125 {"incr",incrCommand,2,"wm",0,NULL,1,1,1,0,0},
126 {"decr",decrCommand,2,"wm",0,NULL,1,1,1,0,0},
127 {"mget",mgetCommand,-2,"r",0,NULL,1,-1,1,0,0},
128 {"rpush",rpushCommand,-3,"wm",0,NULL,1,1,1,0,0},
129 {"lpush",lpushCommand,-3,"wm",0,NULL,1,1,1,0,0},
130 {"rpushx",rpushxCommand,3,"wm",0,NULL,1,1,1,0,0},
131 {"lpushx",lpushxCommand,3,"wm",0,NULL,1,1,1,0,0},
132 {"linsert",linsertCommand,5,"wm",0,NULL,1,1,1,0,0},
133 {"rpop",rpopCommand,2,"w",0,NULL,1,1,1,0,0},
134 {"lpop",lpopCommand,2,"w",0,NULL,1,1,1,0,0},
135 {"brpop",brpopCommand,-3,"ws",0,NULL,1,1,1,0,0},
136 {"brpoplpush",brpoplpushCommand,4,"wms",0,NULL,1,2,1,0,0},
137 {"blpop",blpopCommand,-3,"ws",0,NULL,1,-2,1,0,0},
138 {"llen",llenCommand,2,"r",0,NULL,1,1,1,0,0},
139 {"lindex",lindexCommand,3,"r",0,NULL,1,1,1,0,0},
140 {"lset",lsetCommand,4,"wm",0,NULL,1,1,1,0,0},
141 {"lrange",lrangeCommand,4,"r",0,NULL,1,1,1,0,0},
142 {"ltrim",ltrimCommand,4,"w",0,NULL,1,1,1,0,0},
143 {"lrem",lremCommand,4,"w",0,NULL,1,1,1,0,0},
144 {"rpoplpush",rpoplpushCommand,3,"wm",0,NULL,1,2,1,0,0},
145 {"sadd",saddCommand,-3,"wm",0,NULL,1,1,1,0,0},
146 {"srem",sremCommand,-3,"w",0,NULL,1,1,1,0,0},
147 {"smove",smoveCommand,4,"w",0,NULL,1,2,1,0,0},
148 {"sismember",sismemberCommand,3,"r",0,NULL,1,1,1,0,0},
149 {"scard",scardCommand,2,"r",0,NULL,1,1,1,0,0},
150 {"spop",spopCommand,2,"wRs",0,NULL,1,1,1,0,0},
151 {"srandmember",srandmemberCommand,2,"rR",0,NULL,1,1,1,0,0},
152 {"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
153 {"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
154 {"sunion",sunionCommand,-2,"rS",0,NULL,1,-1,1,0,0},
155 {"sunionstore",sunionstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
156 {"sdiff",sdiffCommand,-2,"rS",0,NULL,1,-1,1,0,0},
157 {"sdiffstore",sdiffstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
158 {"smembers",sinterCommand,2,"rS",0,NULL,1,1,1,0,0},
159 {"zadd",zaddCommand,-4,"wm",0,NULL,1,1,1,0,0},
160 {"zincrby",zincrbyCommand,4,"wm",0,NULL,1,1,1,0,0},
161 {"zrem",zremCommand,-3,"w",0,NULL,1,1,1,0,0},
162 {"zremrangebyscore",zremrangebyscoreCommand,4,"w",0,NULL,1,1,1,0,0},
163 {"zremrangebyrank",zremrangebyrankCommand,4,"w",0,NULL,1,1,1,0,0},
164 {"zunionstore",zunionstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0},
165 {"zinterstore",zinterstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0},
166 {"zrange",zrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
167 {"zrangebyscore",zrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0},
168 {"zrevrangebyscore",zrevrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0},
169 {"zcount",zcountCommand,4,"r",0,NULL,1,1,1,0,0},
170 {"zrevrange",zrevrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
171 {"zcard",zcardCommand,2,"r",0,NULL,1,1,1,0,0},
172 {"zscore",zscoreCommand,3,"r",0,NULL,1,1,1,0,0},
173 {"zrank",zrankCommand,3,"r",0,NULL,1,1,1,0,0},
174 {"zrevrank",zrevrankCommand,3,"r",0,NULL,1,1,1,0,0},
175 {"hset",hsetCommand,4,"wm",0,NULL,1,1,1,0,0},
176 {"hsetnx",hsetnxCommand,4,"wm",0,NULL,1,1,1,0,0},
177 {"hget",hgetCommand,3,"r",0,NULL,1,1,1,0,0},
178 {"hmset",hmsetCommand,-4,"wm",0,NULL,1,1,1,0,0},
179 {"hmget",hmgetCommand,-3,"r",0,NULL,1,1,1,0,0},
180 {"hincrby",hincrbyCommand,4,"wm",0,NULL,1,1,1,0,0},
181 {"hincrbyfloat",hincrbyfloatCommand,4,"wm",0,NULL,1,1,1,0,0},
182 {"hdel",hdelCommand,-3,"w",0,NULL,1,1,1,0,0},
183 {"hlen",hlenCommand,2,"r",0,NULL,1,1,1,0,0},
184 {"hkeys",hkeysCommand,2,"rS",0,NULL,1,1,1,0,0},
185 {"hvals",hvalsCommand,2,"rS",0,NULL,1,1,1,0,0},
186 {"hgetall",hgetallCommand,2,"r",0,NULL,1,1,1,0,0},
187 {"hexists",hexistsCommand,3,"r",0,NULL,1,1,1,0,0},
188 {"incrby",incrbyCommand,3,"wm",0,NULL,1,1,1,0,0},
189 {"decrby",decrbyCommand,3,"wm",0,NULL,1,1,1,0,0},
190 {"incrbyfloat",incrbyfloatCommand,3,"wm",0,NULL,1,1,1,0,0},
191 {"getset",getsetCommand,3,"wm",0,NULL,1,1,1,0,0},
192 {"mset",msetCommand,-3,"wm",0,NULL,1,-1,2,0,0},
193 {"msetnx",msetnxCommand,-3,"wm",0,NULL,1,-1,2,0,0},
194 {"randomkey",randomkeyCommand,1,"rR",0,NULL,0,0,0,0,0},
195 {"select",selectCommand,2,"r",0,NULL,0,0,0,0,0},
196 {"move",moveCommand,3,"w",0,NULL,1,1,1,0,0},
197 {"rename",renameCommand,3,"w",0,renameGetKeys,1,2,1,0,0},
198 {"renamenx",renamenxCommand,3,"w",0,renameGetKeys,1,2,1,0,0},
199 {"expire",expireCommand,3,"w",0,NULL,1,1,1,0,0},
200 {"expireat",expireatCommand,3,"w",0,NULL,1,1,1,0,0},
201 {"pexpire",pexpireCommand,3,"w",0,NULL,1,1,1,0,0},
202 {"pexpireat",pexpireatCommand,3,"w",0,NULL,1,1,1,0,0},
203 {"keys",keysCommand,2,"rS",0,NULL,0,0,0,0,0},
204 {"dbsize",dbsizeCommand,1,"r",0,NULL,0,0,0,0,0},
205 {"auth",authCommand,2,"rs",0,NULL,0,0,0,0,0},
206 {"ping",pingCommand,1,"r",0,NULL,0,0,0,0,0},
207 {"echo",echoCommand,2,"r",0,NULL,0,0,0,0,0},
208 {"save",saveCommand,1,"ars",0,NULL,0,0,0,0,0},
209 {"bgsave",bgsaveCommand,1,"ar",0,NULL,0,0,0,0,0},
210 {"bgrewriteaof",bgrewriteaofCommand,1,"ar",0,NULL,0,0,0,0,0},
211 {"shutdown",shutdownCommand,-1,"ar",0,NULL,0,0,0,0,0},
212 {"lastsave",lastsaveCommand,1,"r",0,NULL,0,0,0,0,0},
213 {"type",typeCommand,2,"r",0,NULL,1,1,1,0,0},
214 {"multi",multiCommand,1,"rs",0,NULL,0,0,0,0,0},
215 {"exec",execCommand,1,"s",0,NULL,0,0,0,0,0},
216 {"discard",discardCommand,1,"rs",0,NULL,0,0,0,0,0},
217 {"sync",syncCommand,1,"ars",0,NULL,0,0,0,0,0},
218 {"flushdb",flushdbCommand,1,"w",0,NULL,0,0,0,0,0},
219 {"flushall",flushallCommand,1,"w",0,NULL,0,0,0,0,0},
220 {"sort",sortCommand,-2,"wmS",0,NULL,1,1,1,0,0},
221 {"info",infoCommand,-1,"r",0,NULL,0,0,0,0,0},
222 {"monitor",monitorCommand,1,"ars",0,NULL,0,0,0,0,0},
223 {"ttl",ttlCommand,2,"r",0,NULL,1,1,1,0,0},
224 {"pttl",pttlCommand,2,"r",0,NULL,1,1,1,0,0},
225 {"persist",persistCommand,2,"w",0,NULL,1,1,1,0,0},
226 {"slaveof",slaveofCommand,3,"as",0,NULL,0,0,0,0,0},
227 {"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0},
228 {"config",configCommand,-2,"ar",0,NULL,0,0,0,0,0},
229 {"subscribe",subscribeCommand,-2,"rps",0,NULL,0,0,0,0,0},
230 {"unsubscribe",unsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0},
231 {"psubscribe",psubscribeCommand,-2,"rps",0,NULL,0,0,0,0,0},
232 {"punsubscribe",punsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0},
233 {"publish",publishCommand,3,"pf",0,NULL,0,0,0,0,0},
234 {"watch",watchCommand,-2,"rs",0,noPreloadGetKeys,1,-1,1,0,0},
235 {"unwatch",unwatchCommand,1,"rs",0,NULL,0,0,0,0,0},
236 {"cluster",clusterCommand,-2,"ar",0,NULL,0,0,0,0,0},
237 {"restore",restoreCommand,4,"awm",0,NULL,1,1,1,0,0},
238 {"migrate",migrateCommand,6,"aw",0,NULL,0,0,0,0,0},
239 {"asking",askingCommand,1,"r",0,NULL,0,0,0,0,0},
240 {"dump",dumpCommand,2,"ar",0,NULL,1,1,1,0,0},
241 {"object",objectCommand,-2,"r",0,NULL,2,2,2,0,0},
242 {"client",clientCommand,-2,"ar",0,NULL,0,0,0,0,0},
243 {"eval",evalCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0},
244 {"evalsha",evalShaCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0},
245 {"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0},
246 {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0},
247 {"time",timeCommand,1,"rR",0,NULL,0,0,0,0,0}
248 };
249
250 /*============================ Utility functions ============================ */
251
252 /* Low level logging. To use only for very big messages, otherwise
253 * redisLog() is to prefer. */
254 void redisLogRaw(int level, const char *msg) {
255 const int syslogLevelMap[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE, LOG_WARNING };
256 const char *c = ".-*#";
257 FILE *fp;
258 char buf[64];
259 int rawmode = (level & REDIS_LOG_RAW);
260
261 level &= 0xff; /* clear flags */
262 if (level < server.verbosity) return;
263
264 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
265 if (!fp) return;
266
267 if (rawmode) {
268 fprintf(fp,"%s",msg);
269 } else {
270 int off;
271 struct timeval tv;
272
273 gettimeofday(&tv,NULL);
274 off = strftime(buf,sizeof(buf),"%d %b %H:%M:%S.",localtime(&tv.tv_sec));
275 snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000);
276 fprintf(fp,"[%d] %s %c %s\n",(int)getpid(),buf,c[level],msg);
277 }
278 fflush(fp);
279
280 if (server.logfile) fclose(fp);
281
282 if (server.syslog_enabled) syslog(syslogLevelMap[level], "%s", msg);
283 }
284
285 /* Like redisLogRaw() but with printf-alike support. This is the funciton that
286 * is used across the code. The raw version is only used in order to dump
287 * the INFO output on crash. */
288 void redisLog(int level, const char *fmt, ...) {
289 va_list ap;
290 char msg[REDIS_MAX_LOGMSG_LEN];
291
292 if ((level&0xff) < server.verbosity) return;
293
294 va_start(ap, fmt);
295 vsnprintf(msg, sizeof(msg), fmt, ap);
296 va_end(ap);
297
298 redisLogRaw(level,msg);
299 }
300
301 /* Log a fixed message without printf-alike capabilities, in a way that is
302 * safe to call from a signal handler.
303 *
304 * We actually use this only for signals that are not fatal from the point
305 * of view of Redis. Signals that are going to kill the server anyway and
306 * where we need printf-alike features are served by redisLog(). */
307 void redisLogFromHandler(int level, const char *msg) {
308 int fd;
309 char buf[64];
310
311 if ((level&0xff) < server.verbosity ||
312 (server.logfile == NULL && server.daemonize)) return;
313 fd = server.logfile ?
314 open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644) :
315 STDOUT_FILENO;
316 if (fd == -1) return;
317 ll2string(buf,sizeof(buf),getpid());
318 if (write(fd,"[",1) == -1) goto err;
319 if (write(fd,buf,strlen(buf)) == -1) goto err;
320 if (write(fd," | signal handler] (",20) == -1) goto err;
321 ll2string(buf,sizeof(buf),time(NULL));
322 if (write(fd,buf,strlen(buf)) == -1) goto err;
323 if (write(fd,") ",2) == -1) goto err;
324 if (write(fd,msg,strlen(msg)) == -1) goto err;
325 if (write(fd,"\n",1) == -1) goto err;
326 err:
327 if (server.logfile) close(fd);
328 }
329
330 /* Redis generally does not try to recover from out of memory conditions
331 * when allocating objects or strings, it is not clear if it will be possible
332 * to report this condition to the client since the networking layer itself
333 * is based on heap allocation for send buffers, so we simply abort.
334 * At least the code will be simpler to read... */
335 void oom(const char *msg) {
336 redisLog(REDIS_WARNING, "%s: Out of memory\n",msg);
337 sleep(1);
338 abort();
339 }
340
341 /* Return the UNIX time in microseconds */
342 long long ustime(void) {
343 struct timeval tv;
344 long long ust;
345
346 gettimeofday(&tv, NULL);
347 ust = ((long long)tv.tv_sec)*1000000;
348 ust += tv.tv_usec;
349 return ust;
350 }
351
352 /* Return the UNIX time in milliseconds */
353 long long mstime(void) {
354 return ustime()/1000;
355 }
356
357 /* After an RDB dump or AOF rewrite we exit from children using _exit() instead of
358 * exit(), because the latter may interact with the same file objects used by
359 * the parent process. However if we are testing the coverage normal exit() is
360 * used in order to obtain the right coverage information. */
361 void exitFromChild(int retcode) {
362 #ifdef COVERAGE_TEST
363 exit(retcode);
364 #else
365 _exit(retcode);
366 #endif
367 }
368
369 /*====================== Hash table type implementation ==================== */
370
371 /* This is an hash table type that uses the SDS dynamic strings libary as
372 * keys and radis objects as values (objects can hold SDS strings,
373 * lists, sets). */
374
375 void dictVanillaFree(void *privdata, void *val)
376 {
377 DICT_NOTUSED(privdata);
378 zfree(val);
379 }
380
381 void dictListDestructor(void *privdata, void *val)
382 {
383 DICT_NOTUSED(privdata);
384 listRelease((list*)val);
385 }
386
387 int dictSdsKeyCompare(void *privdata, const void *key1,
388 const void *key2)
389 {
390 int l1,l2;
391 DICT_NOTUSED(privdata);
392
393 l1 = sdslen((sds)key1);
394 l2 = sdslen((sds)key2);
395 if (l1 != l2) return 0;
396 return memcmp(key1, key2, l1) == 0;
397 }
398
399 /* A case insensitive version used for the command lookup table. */
400 int dictSdsKeyCaseCompare(void *privdata, const void *key1,
401 const void *key2)
402 {
403 DICT_NOTUSED(privdata);
404
405 return strcasecmp(key1, key2) == 0;
406 }
407
408 void dictRedisObjectDestructor(void *privdata, void *val)
409 {
410 DICT_NOTUSED(privdata);
411
412 if (val == NULL) return; /* Values of swapped out keys as set to NULL */
413 decrRefCount(val);
414 }
415
416 void dictSdsDestructor(void *privdata, void *val)
417 {
418 DICT_NOTUSED(privdata);
419
420 sdsfree(val);
421 }
422
423 int dictObjKeyCompare(void *privdata, const void *key1,
424 const void *key2)
425 {
426 const robj *o1 = key1, *o2 = key2;
427 return dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
428 }
429
430 unsigned int dictObjHash(const void *key) {
431 const robj *o = key;
432 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
433 }
434
435 unsigned int dictSdsHash(const void *key) {
436 return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
437 }
438
439 unsigned int dictSdsCaseHash(const void *key) {
440 return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key));
441 }
442
443 int dictEncObjKeyCompare(void *privdata, const void *key1,
444 const void *key2)
445 {
446 robj *o1 = (robj*) key1, *o2 = (robj*) key2;
447 int cmp;
448
449 if (o1->encoding == REDIS_ENCODING_INT &&
450 o2->encoding == REDIS_ENCODING_INT)
451 return o1->ptr == o2->ptr;
452
453 o1 = getDecodedObject(o1);
454 o2 = getDecodedObject(o2);
455 cmp = dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
456 decrRefCount(o1);
457 decrRefCount(o2);
458 return cmp;
459 }
460
461 unsigned int dictEncObjHash(const void *key) {
462 robj *o = (robj*) key;
463
464 if (o->encoding == REDIS_ENCODING_RAW) {
465 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
466 } else {
467 if (o->encoding == REDIS_ENCODING_INT) {
468 char buf[32];
469 int len;
470
471 len = ll2string(buf,32,(long)o->ptr);
472 return dictGenHashFunction((unsigned char*)buf, len);
473 } else {
474 unsigned int hash;
475
476 o = getDecodedObject(o);
477 hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
478 decrRefCount(o);
479 return hash;
480 }
481 }
482 }
483
484 /* Sets type hash table */
485 dictType setDictType = {
486 dictEncObjHash, /* hash function */
487 NULL, /* key dup */
488 NULL, /* val dup */
489 dictEncObjKeyCompare, /* key compare */
490 dictRedisObjectDestructor, /* key destructor */
491 NULL /* val destructor */
492 };
493
494 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
495 dictType zsetDictType = {
496 dictEncObjHash, /* hash function */
497 NULL, /* key dup */
498 NULL, /* val dup */
499 dictEncObjKeyCompare, /* key compare */
500 dictRedisObjectDestructor, /* key destructor */
501 NULL /* val destructor */
502 };
503
504 /* Db->dict, keys are sds strings, vals are Redis objects. */
505 dictType dbDictType = {
506 dictSdsHash, /* hash function */
507 NULL, /* key dup */
508 NULL, /* val dup */
509 dictSdsKeyCompare, /* key compare */
510 dictSdsDestructor, /* key destructor */
511 dictRedisObjectDestructor /* val destructor */
512 };
513
514 /* Db->expires */
515 dictType keyptrDictType = {
516 dictSdsHash, /* hash function */
517 NULL, /* key dup */
518 NULL, /* val dup */
519 dictSdsKeyCompare, /* key compare */
520 NULL, /* key destructor */
521 NULL /* val destructor */
522 };
523
524 /* Command table. sds string -> command struct pointer. */
525 dictType commandTableDictType = {
526 dictSdsCaseHash, /* hash function */
527 NULL, /* key dup */
528 NULL, /* val dup */
529 dictSdsKeyCaseCompare, /* key compare */
530 dictSdsDestructor, /* key destructor */
531 NULL /* val destructor */
532 };
533
534 /* Hash type hash table (note that small hashes are represented with zimpaps) */
535 dictType hashDictType = {
536 dictEncObjHash, /* hash function */
537 NULL, /* key dup */
538 NULL, /* val dup */
539 dictEncObjKeyCompare, /* key compare */
540 dictRedisObjectDestructor, /* key destructor */
541 dictRedisObjectDestructor /* val destructor */
542 };
543
544 /* Keylist hash table type has unencoded redis objects as keys and
545 * lists as values. It's used for blocking operations (BLPOP) and to
546 * map swapped keys to a list of clients waiting for this keys to be loaded. */
547 dictType keylistDictType = {
548 dictObjHash, /* hash function */
549 NULL, /* key dup */
550 NULL, /* val dup */
551 dictObjKeyCompare, /* key compare */
552 dictRedisObjectDestructor, /* key destructor */
553 dictListDestructor /* val destructor */
554 };
555
556 /* Cluster nodes hash table, mapping nodes addresses 1.2.3.4:6379 to
557 * clusterNode structures. */
558 dictType clusterNodesDictType = {
559 dictSdsHash, /* hash function */
560 NULL, /* key dup */
561 NULL, /* val dup */
562 dictSdsKeyCompare, /* key compare */
563 dictSdsDestructor, /* key destructor */
564 NULL /* val destructor */
565 };
566
567 int htNeedsResize(dict *dict) {
568 long long size, used;
569
570 size = dictSlots(dict);
571 used = dictSize(dict);
572 return (size && used && size > DICT_HT_INITIAL_SIZE &&
573 (used*100/size < REDIS_HT_MINFILL));
574 }
575
576 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
577 * we resize the hash table to save memory */
578 void tryResizeHashTables(void) {
579 int j;
580
581 for (j = 0; j < server.dbnum; j++) {
582 if (htNeedsResize(server.db[j].dict))
583 dictResize(server.db[j].dict);
584 if (htNeedsResize(server.db[j].expires))
585 dictResize(server.db[j].expires);
586 }
587 }
588
589 /* Our hash table implementation performs rehashing incrementally while
590 * we write/read from the hash table. Still if the server is idle, the hash
591 * table will use two tables for a long time. So we try to use 1 millisecond
592 * of CPU time at every serverCron() loop in order to rehash some key. */
593 void incrementallyRehash(void) {
594 int j;
595
596 for (j = 0; j < server.dbnum; j++) {
597 if (dictIsRehashing(server.db[j].dict)) {
598 dictRehashMilliseconds(server.db[j].dict,1);
599 break; /* already used our millisecond for this loop... */
600 }
601 }
602 }
603
604 /* This function is called once a background process of some kind terminates,
605 * as we want to avoid resizing the hash tables when there is a child in order
606 * to play well with copy-on-write (otherwise when a resize happens lots of
607 * memory pages are copied). The goal of this function is to update the ability
608 * for dict.c to resize the hash tables accordingly to the fact we have o not
609 * running childs. */
610 void updateDictResizePolicy(void) {
611 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1)
612 dictEnableResize();
613 else
614 dictDisableResize();
615 }
616
617 /* ======================= Cron: called every 100 ms ======================== */
618
619 /* Try to expire a few timed out keys. The algorithm used is adaptive and
620 * will use few CPU cycles if there are few expiring keys, otherwise
621 * it will get more aggressive to avoid that too much memory is used by
622 * keys that can be removed from the keyspace. */
623 void activeExpireCycle(void) {
624 int j;
625 long long start = mstime();
626
627 for (j = 0; j < server.dbnum; j++) {
628 int expired, iteration = 0;
629 redisDb *db = server.db+j;
630
631 /* Continue to expire if at the end of the cycle more than 25%
632 * of the keys were expired. */
633 do {
634 long num = dictSize(db->expires);
635 long long now = mstime();
636
637 expired = 0;
638 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
639 num = REDIS_EXPIRELOOKUPS_PER_CRON;
640 while (num--) {
641 dictEntry *de;
642 long long t;
643
644 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
645 t = dictGetSignedIntegerVal(de);
646 if (now > t) {
647 sds key = dictGetKey(de);
648 robj *keyobj = createStringObject(key,sdslen(key));
649
650 propagateExpire(db,keyobj);
651 dbDelete(db,keyobj);
652 decrRefCount(keyobj);
653 expired++;
654 server.stat_expiredkeys++;
655 }
656 }
657 /* We can't block forever here even if there are many keys to
658 * expire. So after a given amount of milliseconds return to the
659 * caller waiting for the other active expire cycle. */
660 iteration++;
661 if ((iteration & 0xff) == 0 && /* & 0xff is the same as % 255 */
662 (mstime()-start) > REDIS_EXPIRELOOKUPS_TIME_LIMIT) return;
663 } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
664 }
665 }
666
667 void updateLRUClock(void) {
668 server.lruclock = (server.unixtime/REDIS_LRU_CLOCK_RESOLUTION) &
669 REDIS_LRU_CLOCK_MAX;
670 }
671
672
673 /* Add a sample to the operations per second array of samples. */
674 void trackOperationsPerSecond(void) {
675 long long t = mstime() - server.ops_sec_last_sample_time;
676 long long ops = server.stat_numcommands - server.ops_sec_last_sample_ops;
677 long long ops_sec;
678
679 ops_sec = t > 0 ? (ops*1000/t) : 0;
680
681 server.ops_sec_samples[server.ops_sec_idx] = ops_sec;
682 server.ops_sec_idx = (server.ops_sec_idx+1) % REDIS_OPS_SEC_SAMPLES;
683 server.ops_sec_last_sample_time = mstime();
684 server.ops_sec_last_sample_ops = server.stat_numcommands;
685 }
686
687 /* Return the mean of all the samples. */
688 long long getOperationsPerSecond(void) {
689 int j;
690 long long sum = 0;
691
692 for (j = 0; j < REDIS_OPS_SEC_SAMPLES; j++)
693 sum += server.ops_sec_samples[j];
694 return sum / REDIS_OPS_SEC_SAMPLES;
695 }
696
697 /* Check for timeouts. Returns non-zero if the client was terminated */
698 int clientsCronHandleTimeout(redisClient *c) {
699 time_t now = server.unixtime;
700
701 if (server.maxidletime &&
702 !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
703 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
704 !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */
705 dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */
706 listLength(c->pubsub_patterns) == 0 &&
707 (now - c->lastinteraction > server.maxidletime))
708 {
709 redisLog(REDIS_VERBOSE,"Closing idle client");
710 freeClient(c);
711 return 1;
712 } else if (c->flags & REDIS_BLOCKED) {
713 if (c->bpop.timeout != 0 && c->bpop.timeout < now) {
714 addReply(c,shared.nullmultibulk);
715 unblockClientWaitingData(c);
716 }
717 }
718 return 0;
719 }
720
721 /* The client query buffer is an sds.c string that can end with a lot of
722 * free space not used, this function reclaims space if needed.
723 *
724 * The funciton always returns 0 as it never terminates the client. */
725 int clientsCronResizeQueryBuffer(redisClient *c) {
726 size_t querybuf_size = sdsAllocSize(c->querybuf);
727 time_t idletime = server.unixtime - c->lastinteraction;
728
729 /* There are two conditions to resize the query buffer:
730 * 1) Query buffer is > BIG_ARG and too big for latest peak.
731 * 2) Client is inactive and the buffer is bigger than 1k. */
732 if (((querybuf_size > REDIS_MBULK_BIG_ARG) &&
733 (querybuf_size/(c->querybuf_peak+1)) > 2) ||
734 (querybuf_size > 1024 && idletime > 2))
735 {
736 /* Only resize the query buffer if it is actually wasting space. */
737 if (sdsavail(c->querybuf) > 1024) {
738 c->querybuf = sdsRemoveFreeSpace(c->querybuf);
739 }
740 }
741 /* Reset the peak again to capture the peak memory usage in the next
742 * cycle. */
743 c->querybuf_peak = 0;
744 return 0;
745 }
746
747 void clientsCron(void) {
748 /* Make sure to process at least 1/100 of clients per call.
749 * Since this function is called 10 times per second we are sure that
750 * in the worst case we process all the clients in 10 seconds.
751 * In normal conditions (a reasonable number of clients) we process
752 * all the clients in a shorter time. */
753 int numclients = listLength(server.clients);
754 int iterations = numclients/100;
755
756 if (iterations < 50)
757 iterations = (numclients < 50) ? numclients : 50;
758 while(listLength(server.clients) && iterations--) {
759 redisClient *c;
760 listNode *head;
761
762 /* Rotate the list, take the current head, process.
763 * This way if the client must be removed from the list it's the
764 * first element and we don't incur into O(N) computation. */
765 listRotate(server.clients);
766 head = listFirst(server.clients);
767 c = listNodeValue(head);
768 /* The following functions do different service checks on the client.
769 * The protocol is that they return non-zero if the client was
770 * terminated. */
771 if (clientsCronHandleTimeout(c)) continue;
772 if (clientsCronResizeQueryBuffer(c)) continue;
773 }
774 }
775
776 int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
777 int j, loops = server.cronloops;
778 REDIS_NOTUSED(eventLoop);
779 REDIS_NOTUSED(id);
780 REDIS_NOTUSED(clientData);
781
782 /* Software watchdog: deliver the SIGALRM that will reach the signal
783 * handler if we don't return here fast enough. */
784 if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period);
785
786 /* We take a cached value of the unix time in the global state because
787 * with virtual memory and aging there is to store the current time
788 * in objects at every object access, and accuracy is not needed.
789 * To access a global var is faster than calling time(NULL) */
790 server.unixtime = time(NULL);
791
792 trackOperationsPerSecond();
793
794 /* We have just 22 bits per object for LRU information.
795 * So we use an (eventually wrapping) LRU clock with 10 seconds resolution.
796 * 2^22 bits with 10 seconds resoluton is more or less 1.5 years.
797 *
798 * Note that even if this will wrap after 1.5 years it's not a problem,
799 * everything will still work but just some object will appear younger
800 * to Redis. But for this to happen a given object should never be touched
801 * for 1.5 years.
802 *
803 * Note that you can change the resolution altering the
804 * REDIS_LRU_CLOCK_RESOLUTION define.
805 */
806 updateLRUClock();
807
808 /* Record the max memory used since the server was started. */
809 if (zmalloc_used_memory() > server.stat_peak_memory)
810 server.stat_peak_memory = zmalloc_used_memory();
811
812 /* We received a SIGTERM, shutting down here in a safe way, as it is
813 * not ok doing so inside the signal handler. */
814 if (server.shutdown_asap) {
815 if (prepareForShutdown(0) == REDIS_OK) exit(0);
816 redisLog(REDIS_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
817 }
818
819 /* Show some info about non-empty databases */
820 for (j = 0; j < server.dbnum; j++) {
821 long long size, used, vkeys;
822
823 size = dictSlots(server.db[j].dict);
824 used = dictSize(server.db[j].dict);
825 vkeys = dictSize(server.db[j].expires);
826 if (!(loops % 50) && (used || vkeys)) {
827 redisLog(REDIS_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
828 /* dictPrintStats(server.dict); */
829 }
830 }
831
832 /* We don't want to resize the hash tables while a bacground saving
833 * is in progress: the saving child is created using fork() that is
834 * implemented with a copy-on-write semantic in most modern systems, so
835 * if we resize the HT while there is the saving child at work actually
836 * a lot of memory movements in the parent will cause a lot of pages
837 * copied. */
838 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1) {
839 if (!(loops % 10)) tryResizeHashTables();
840 if (server.activerehashing) incrementallyRehash();
841 }
842
843 /* Show information about connected clients */
844 if (!(loops % 50)) {
845 redisLog(REDIS_VERBOSE,"%d clients connected (%d slaves), %zu bytes in use",
846 listLength(server.clients)-listLength(server.slaves),
847 listLength(server.slaves),
848 zmalloc_used_memory());
849 }
850
851 /* We need to do a few operations on clients asynchronously. */
852 clientsCron();
853
854 /* Start a scheduled AOF rewrite if this was requested by the user while
855 * a BGSAVE was in progress. */
856 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1 &&
857 server.aof_rewrite_scheduled)
858 {
859 rewriteAppendOnlyFileBackground();
860 }
861
862 /* Check if a background saving or AOF rewrite in progress terminated. */
863 if (server.rdb_child_pid != -1 || server.aof_child_pid != -1) {
864 int statloc;
865 pid_t pid;
866
867 if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
868 int exitcode = WEXITSTATUS(statloc);
869 int bysignal = 0;
870
871 if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
872
873 if (pid == server.rdb_child_pid) {
874 backgroundSaveDoneHandler(exitcode,bysignal);
875 } else {
876 backgroundRewriteDoneHandler(exitcode,bysignal);
877 }
878 updateDictResizePolicy();
879 }
880 } else {
881 /* If there is not a background saving/rewrite in progress check if
882 * we have to save/rewrite now */
883 for (j = 0; j < server.saveparamslen; j++) {
884 struct saveparam *sp = server.saveparams+j;
885
886 if (server.dirty >= sp->changes &&
887 server.unixtime-server.lastsave > sp->seconds) {
888 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
889 sp->changes, sp->seconds);
890 rdbSaveBackground(server.rdb_filename);
891 break;
892 }
893 }
894
895 /* Trigger an AOF rewrite if needed */
896 if (server.rdb_child_pid == -1 &&
897 server.aof_child_pid == -1 &&
898 server.aof_rewrite_perc &&
899 server.aof_current_size > server.aof_rewrite_min_size)
900 {
901 long long base = server.aof_rewrite_base_size ?
902 server.aof_rewrite_base_size : 1;
903 long long growth = (server.aof_current_size*100/base) - 100;
904 if (growth >= server.aof_rewrite_perc) {
905 redisLog(REDIS_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
906 rewriteAppendOnlyFileBackground();
907 }
908 }
909 }
910
911
912 /* If we postponed an AOF buffer flush, let's try to do it every time the
913 * cron function is called. */
914 if (server.aof_flush_postponed_start) flushAppendOnlyFile(0);
915
916 /* Expire a few keys per cycle, only if this is a master.
917 * On slaves we wait for DEL operations synthesized by the master
918 * in order to guarantee a strict consistency. */
919 if (server.masterhost == NULL) activeExpireCycle();
920
921 /* Close clients that need to be closed asynchronous */
922 freeClientsInAsyncFreeQueue();
923
924 /* Replication cron function -- used to reconnect to master and
925 * to detect transfer failures. */
926 if (!(loops % 10)) replicationCron();
927
928 /* Run other sub-systems specific cron jobs */
929 if (server.cluster_enabled && !(loops % 10)) clusterCron();
930
931 server.cronloops++;
932 return 100;
933 }
934
935 /* This function gets called every time Redis is entering the
936 * main loop of the event driven library, that is, before to sleep
937 * for ready file descriptors. */
938 void beforeSleep(struct aeEventLoop *eventLoop) {
939 REDIS_NOTUSED(eventLoop);
940 listNode *ln;
941 redisClient *c;
942
943 /* Try to process pending commands for clients that were just unblocked. */
944 while (listLength(server.unblocked_clients)) {
945 ln = listFirst(server.unblocked_clients);
946 redisAssert(ln != NULL);
947 c = ln->value;
948 listDelNode(server.unblocked_clients,ln);
949 c->flags &= ~REDIS_UNBLOCKED;
950
951 /* Process remaining data in the input buffer. */
952 if (c->querybuf && sdslen(c->querybuf) > 0) {
953 server.current_client = c;
954 processInputBuffer(c);
955 server.current_client = NULL;
956 }
957 }
958
959 /* Write the AOF buffer on disk */
960 flushAppendOnlyFile(0);
961 }
962
963 /* =========================== Server initialization ======================== */
964
965 void createSharedObjects(void) {
966 int j;
967
968 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
969 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
970 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
971 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
972 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
973 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
974 shared.cnegone = createObject(REDIS_STRING,sdsnew(":-1\r\n"));
975 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
976 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
977 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
978 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
979 shared.queued = createObject(REDIS_STRING,sdsnew("+QUEUED\r\n"));
980 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
981 "-ERR Operation against a key holding the wrong kind of value\r\n"));
982 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
983 "-ERR no such key\r\n"));
984 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
985 "-ERR syntax error\r\n"));
986 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
987 "-ERR source and destination objects are the same\r\n"));
988 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
989 "-ERR index out of range\r\n"));
990 shared.noscripterr = createObject(REDIS_STRING,sdsnew(
991 "-NOSCRIPT No matching script. Please use EVAL.\r\n"));
992 shared.loadingerr = createObject(REDIS_STRING,sdsnew(
993 "-LOADING Redis is loading the dataset in memory\r\n"));
994 shared.slowscripterr = createObject(REDIS_STRING,sdsnew(
995 "-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n"));
996 shared.masterdownerr = createObject(REDIS_STRING,sdsnew(
997 "-MASTERDOWN Link with MASTER is down and slave-serve-stale-data is set to 'no'.\r\n"));
998 shared.bgsaveerr = createObject(REDIS_STRING,sdsnew(
999 "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error.\r\n"));
1000 shared.roslaveerr = createObject(REDIS_STRING,sdsnew(
1001 "-READONLY You can't write against a read only slave.\r\n"));
1002 shared.oomerr = createObject(REDIS_STRING,sdsnew(
1003 "-OOM command not allowed when used memory > 'maxmemory'.\r\n"));
1004 shared.space = createObject(REDIS_STRING,sdsnew(" "));
1005 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
1006 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
1007
1008 for (j = 0; j < REDIS_SHARED_SELECT_CMDS; j++) {
1009 shared.select[j] = createObject(REDIS_STRING,
1010 sdscatprintf(sdsempty(),"select %d\r\n", j));
1011 }
1012 shared.messagebulk = createStringObject("$7\r\nmessage\r\n",13);
1013 shared.pmessagebulk = createStringObject("$8\r\npmessage\r\n",14);
1014 shared.subscribebulk = createStringObject("$9\r\nsubscribe\r\n",15);
1015 shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18);
1016 shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
1017 shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
1018 shared.del = createStringObject("DEL",3);
1019 shared.rpop = createStringObject("RPOP",4);
1020 shared.lpop = createStringObject("LPOP",4);
1021 for (j = 0; j < REDIS_SHARED_INTEGERS; j++) {
1022 shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j);
1023 shared.integers[j]->encoding = REDIS_ENCODING_INT;
1024 }
1025 for (j = 0; j < REDIS_SHARED_BULKHDR_LEN; j++) {
1026 shared.mbulkhdr[j] = createObject(REDIS_STRING,
1027 sdscatprintf(sdsempty(),"*%d\r\n",j));
1028 shared.bulkhdr[j] = createObject(REDIS_STRING,
1029 sdscatprintf(sdsempty(),"$%d\r\n",j));
1030 }
1031 }
1032
1033 void initServerConfig() {
1034 getRandomHexChars(server.runid,REDIS_RUN_ID_SIZE);
1035 server.runid[REDIS_RUN_ID_SIZE] = '\0';
1036 server.arch_bits = (sizeof(long) == 8) ? 64 : 32;
1037 server.port = REDIS_SERVERPORT;
1038 server.bindaddr = NULL;
1039 server.unixsocket = NULL;
1040 server.unixsocketperm = 0;
1041 server.ipfd = -1;
1042 server.sofd = -1;
1043 server.dbnum = REDIS_DEFAULT_DBNUM;
1044 server.verbosity = REDIS_NOTICE;
1045 server.maxidletime = REDIS_MAXIDLETIME;
1046 server.client_max_querybuf_len = REDIS_MAX_QUERYBUF_LEN;
1047 server.saveparams = NULL;
1048 server.loading = 0;
1049 server.logfile = NULL; /* NULL = log on standard output */
1050 server.syslog_enabled = 0;
1051 server.syslog_ident = zstrdup("redis");
1052 server.syslog_facility = LOG_LOCAL0;
1053 server.daemonize = 0;
1054 server.aof_state = REDIS_AOF_OFF;
1055 server.aof_fsync = AOF_FSYNC_EVERYSEC;
1056 server.aof_no_fsync_on_rewrite = 0;
1057 server.aof_rewrite_perc = REDIS_AOF_REWRITE_PERC;
1058 server.aof_rewrite_min_size = REDIS_AOF_REWRITE_MIN_SIZE;
1059 server.aof_rewrite_base_size = 0;
1060 server.aof_rewrite_scheduled = 0;
1061 server.aof_last_fsync = time(NULL);
1062 server.aof_delayed_fsync = 0;
1063 server.aof_fd = -1;
1064 server.aof_selected_db = -1; /* Make sure the first time will not match */
1065 server.aof_flush_postponed_start = 0;
1066 server.pidfile = zstrdup("/var/run/redis.pid");
1067 server.rdb_filename = zstrdup("dump.rdb");
1068 server.aof_filename = zstrdup("appendonly.aof");
1069 server.requirepass = NULL;
1070 server.rdb_compression = 1;
1071 server.rdb_checksum = 1;
1072 server.activerehashing = 1;
1073 server.maxclients = REDIS_MAX_CLIENTS;
1074 server.bpop_blocked_clients = 0;
1075 server.maxmemory = 0;
1076 server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU;
1077 server.maxmemory_samples = 3;
1078 server.hash_max_ziplist_entries = REDIS_HASH_MAX_ZIPLIST_ENTRIES;
1079 server.hash_max_ziplist_value = REDIS_HASH_MAX_ZIPLIST_VALUE;
1080 server.list_max_ziplist_entries = REDIS_LIST_MAX_ZIPLIST_ENTRIES;
1081 server.list_max_ziplist_value = REDIS_LIST_MAX_ZIPLIST_VALUE;
1082 server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES;
1083 server.zset_max_ziplist_entries = REDIS_ZSET_MAX_ZIPLIST_ENTRIES;
1084 server.zset_max_ziplist_value = REDIS_ZSET_MAX_ZIPLIST_VALUE;
1085 server.shutdown_asap = 0;
1086 server.repl_ping_slave_period = REDIS_REPL_PING_SLAVE_PERIOD;
1087 server.repl_timeout = REDIS_REPL_TIMEOUT;
1088 server.cluster_enabled = 0;
1089 server.cluster.configfile = zstrdup("nodes.conf");
1090 server.lua_caller = NULL;
1091 server.lua_time_limit = REDIS_LUA_TIME_LIMIT;
1092 server.lua_client = NULL;
1093 server.lua_timedout = 0;
1094
1095 updateLRUClock();
1096 resetServerSaveParams();
1097
1098 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1099 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1100 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1101 /* Replication related */
1102 server.masterauth = NULL;
1103 server.masterhost = NULL;
1104 server.masterport = 6379;
1105 server.master = NULL;
1106 server.repl_state = REDIS_REPL_NONE;
1107 server.repl_syncio_timeout = REDIS_REPL_SYNCIO_TIMEOUT;
1108 server.repl_serve_stale_data = 1;
1109 server.repl_slave_ro = 1;
1110 server.repl_down_since = time(NULL);
1111
1112 /* Client output buffer limits */
1113 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].hard_limit_bytes = 0;
1114 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].soft_limit_bytes = 0;
1115 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].soft_limit_seconds = 0;
1116 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].hard_limit_bytes = 1024*1024*256;
1117 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].soft_limit_bytes = 1024*1024*64;
1118 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].soft_limit_seconds = 60;
1119 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].hard_limit_bytes = 1024*1024*32;
1120 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].soft_limit_bytes = 1024*1024*8;
1121 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].soft_limit_seconds = 60;
1122
1123 /* Double constants initialization */
1124 R_Zero = 0.0;
1125 R_PosInf = 1.0/R_Zero;
1126 R_NegInf = -1.0/R_Zero;
1127 R_Nan = R_Zero/R_Zero;
1128
1129 /* Command table -- we intiialize it here as it is part of the
1130 * initial configuration, since command names may be changed via
1131 * redis.conf using the rename-command directive. */
1132 server.commands = dictCreate(&commandTableDictType,NULL);
1133 populateCommandTable();
1134 server.delCommand = lookupCommandByCString("del");
1135 server.multiCommand = lookupCommandByCString("multi");
1136 server.lpushCommand = lookupCommandByCString("lpush");
1137
1138 /* Slow log */
1139 server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN;
1140 server.slowlog_max_len = REDIS_SLOWLOG_MAX_LEN;
1141
1142 /* Debugging */
1143 server.assert_failed = "<no assertion failed>";
1144 server.assert_file = "<no file>";
1145 server.assert_line = 0;
1146 server.bug_report_start = 0;
1147 server.watchdog_period = 0;
1148 }
1149
1150 /* This function will try to raise the max number of open files accordingly to
1151 * the configured max number of clients. It will also account for 32 additional
1152 * file descriptors as we need a few more for persistence, listening
1153 * sockets, log files and so forth.
1154 *
1155 * If it will not be possible to set the limit accordingly to the configured
1156 * max number of clients, the function will do the reverse setting
1157 * server.maxclients to the value that we can actually handle. */
1158 void adjustOpenFilesLimit(void) {
1159 rlim_t maxfiles = server.maxclients+32;
1160 struct rlimit limit;
1161
1162 if (getrlimit(RLIMIT_NOFILE,&limit) == -1) {
1163 redisLog(REDIS_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.",
1164 strerror(errno));
1165 server.maxclients = 1024-32;
1166 } else {
1167 rlim_t oldlimit = limit.rlim_cur;
1168
1169 /* Set the max number of files if the current limit is not enough
1170 * for our needs. */
1171 if (oldlimit < maxfiles) {
1172 rlim_t f;
1173
1174 f = maxfiles;
1175 while(f > oldlimit) {
1176 limit.rlim_cur = f;
1177 limit.rlim_max = f;
1178 if (setrlimit(RLIMIT_NOFILE,&limit) != -1) break;
1179 f -= 128;
1180 }
1181 if (f < oldlimit) f = oldlimit;
1182 if (f != maxfiles) {
1183 server.maxclients = f-32;
1184 redisLog(REDIS_WARNING,"Unable to set the max number of files limit to %d (%s), setting the max clients configuration to %d.",
1185 (int) maxfiles, strerror(errno), (int) server.maxclients);
1186 } else {
1187 redisLog(REDIS_NOTICE,"Max number of open files set to %d",
1188 (int) maxfiles);
1189 }
1190 }
1191 }
1192 }
1193
1194 void initServer() {
1195 int j;
1196
1197 signal(SIGHUP, SIG_IGN);
1198 signal(SIGPIPE, SIG_IGN);
1199 setupSignalHandlers();
1200
1201 if (server.syslog_enabled) {
1202 openlog(server.syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT,
1203 server.syslog_facility);
1204 }
1205
1206 server.current_client = NULL;
1207 server.clients = listCreate();
1208 server.clients_to_close = listCreate();
1209 server.slaves = listCreate();
1210 server.monitors = listCreate();
1211 server.unblocked_clients = listCreate();
1212
1213 createSharedObjects();
1214 adjustOpenFilesLimit();
1215 server.el = aeCreateEventLoop(server.maxclients+1024);
1216 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
1217
1218 if (server.port != 0) {
1219 server.ipfd = anetTcpServer(server.neterr,server.port,server.bindaddr);
1220 if (server.ipfd == ANET_ERR) {
1221 redisLog(REDIS_WARNING, "Opening port %d: %s",
1222 server.port, server.neterr);
1223 exit(1);
1224 }
1225 }
1226 if (server.unixsocket != NULL) {
1227 unlink(server.unixsocket); /* don't care if this fails */
1228 server.sofd = anetUnixServer(server.neterr,server.unixsocket,server.unixsocketperm);
1229 if (server.sofd == ANET_ERR) {
1230 redisLog(REDIS_WARNING, "Opening socket: %s", server.neterr);
1231 exit(1);
1232 }
1233 }
1234 if (server.ipfd < 0 && server.sofd < 0) {
1235 redisLog(REDIS_WARNING, "Configured to not listen anywhere, exiting.");
1236 exit(1);
1237 }
1238 for (j = 0; j < server.dbnum; j++) {
1239 server.db[j].dict = dictCreate(&dbDictType,NULL);
1240 server.db[j].expires = dictCreate(&keyptrDictType,NULL);
1241 server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
1242 server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
1243 server.db[j].id = j;
1244 }
1245 server.pubsub_channels = dictCreate(&keylistDictType,NULL);
1246 server.pubsub_patterns = listCreate();
1247 listSetFreeMethod(server.pubsub_patterns,freePubsubPattern);
1248 listSetMatchMethod(server.pubsub_patterns,listMatchPubsubPattern);
1249 server.cronloops = 0;
1250 server.rdb_child_pid = -1;
1251 server.aof_child_pid = -1;
1252 server.aof_rewrite_buf = sdsempty();
1253 server.aof_buf = sdsempty();
1254 server.lastsave = time(NULL);
1255 server.dirty = 0;
1256 server.stat_numcommands = 0;
1257 server.stat_numconnections = 0;
1258 server.stat_expiredkeys = 0;
1259 server.stat_evictedkeys = 0;
1260 server.stat_starttime = time(NULL);
1261 server.stat_keyspace_misses = 0;
1262 server.stat_keyspace_hits = 0;
1263 server.stat_peak_memory = 0;
1264 server.stat_fork_time = 0;
1265 server.stat_rejected_conn = 0;
1266 memset(server.ops_sec_samples,0,sizeof(server.ops_sec_samples));
1267 server.ops_sec_idx = 0;
1268 server.ops_sec_last_sample_time = mstime();
1269 server.ops_sec_last_sample_ops = 0;
1270 server.unixtime = time(NULL);
1271 server.lastbgsave_status = REDIS_OK;
1272 server.stop_writes_on_bgsave_err = 1;
1273 aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
1274 if (server.ipfd > 0 && aeCreateFileEvent(server.el,server.ipfd,AE_READABLE,
1275 acceptTcpHandler,NULL) == AE_ERR) oom("creating file event");
1276 if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
1277 acceptUnixHandler,NULL) == AE_ERR) oom("creating file event");
1278
1279 if (server.aof_state == REDIS_AOF_ON) {
1280 server.aof_fd = open(server.aof_filename,
1281 O_WRONLY|O_APPEND|O_CREAT,0644);
1282 if (server.aof_fd == -1) {
1283 redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
1284 strerror(errno));
1285 exit(1);
1286 }
1287 }
1288
1289 /* 32 bit instances are limited to 4GB of address space, so if there is
1290 * no explicit limit in the user provided configuration we set a limit
1291 * at 3.5GB using maxmemory with 'noeviction' policy'. This saves
1292 * useless crashes of the Redis instance. */
1293 if (server.arch_bits == 32 && server.maxmemory == 0) {
1294 redisLog(REDIS_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3.5 GB maxmemory limit with 'noeviction' policy now.");
1295 server.maxmemory = 3584LL*(1024*1024); /* 3584 MB = 3.5 GB */
1296 server.maxmemory_policy = REDIS_MAXMEMORY_NO_EVICTION;
1297 }
1298
1299 if (server.cluster_enabled) clusterInit();
1300 scriptingInit();
1301 slowlogInit();
1302 bioInit();
1303 }
1304
1305 /* Populates the Redis Command Table starting from the hard coded list
1306 * we have on top of redis.c file. */
1307 void populateCommandTable(void) {
1308 int j;
1309 int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
1310
1311 for (j = 0; j < numcommands; j++) {
1312 struct redisCommand *c = redisCommandTable+j;
1313 char *f = c->sflags;
1314 int retval;
1315
1316 while(*f != '\0') {
1317 switch(*f) {
1318 case 'w': c->flags |= REDIS_CMD_WRITE; break;
1319 case 'r': c->flags |= REDIS_CMD_READONLY; break;
1320 case 'm': c->flags |= REDIS_CMD_DENYOOM; break;
1321 case 'a': c->flags |= REDIS_CMD_ADMIN; break;
1322 case 'p': c->flags |= REDIS_CMD_PUBSUB; break;
1323 case 'f': c->flags |= REDIS_CMD_FORCE_REPLICATION; break;
1324 case 's': c->flags |= REDIS_CMD_NOSCRIPT; break;
1325 case 'R': c->flags |= REDIS_CMD_RANDOM; break;
1326 case 'S': c->flags |= REDIS_CMD_SORT_FOR_SCRIPT; break;
1327 default: redisPanic("Unsupported command flag"); break;
1328 }
1329 f++;
1330 }
1331
1332 retval = dictAdd(server.commands, sdsnew(c->name), c);
1333 assert(retval == DICT_OK);
1334 }
1335 }
1336
1337 void resetCommandTableStats(void) {
1338 int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
1339 int j;
1340
1341 for (j = 0; j < numcommands; j++) {
1342 struct redisCommand *c = redisCommandTable+j;
1343
1344 c->microseconds = 0;
1345 c->calls = 0;
1346 }
1347 }
1348
1349 /* ========================== Redis OP Array API ============================ */
1350
1351 void redisOpArrayInit(redisOpArray *oa) {
1352 oa->ops = NULL;
1353 oa->numops = 0;
1354 }
1355
1356 int redisOpArrayAppend(redisOpArray *oa, struct redisCommand *cmd, int dbid,
1357 robj **argv, int argc, int target)
1358 {
1359 redisOp *op;
1360
1361 oa->ops = zrealloc(oa->ops,sizeof(redisOp)*(oa->numops+1));
1362 op = oa->ops+oa->numops;
1363 op->cmd = cmd;
1364 op->dbid = dbid;
1365 op->argv = argv;
1366 op->argc = argc;
1367 op->target = target;
1368 oa->numops++;
1369 return oa->numops;
1370 }
1371
1372 void redisOpArrayFree(redisOpArray *oa) {
1373 while(oa->numops) {
1374 int j;
1375 redisOp *op;
1376
1377 oa->numops--;
1378 op = oa->ops+oa->numops;
1379 for (j = 0; j < op->argc; j++)
1380 decrRefCount(op->argv[j]);
1381 zfree(op->argv);
1382 }
1383 zfree(oa->ops);
1384 }
1385
1386 /* ====================== Commands lookup and execution ===================== */
1387
1388 struct redisCommand *lookupCommand(sds name) {
1389 return dictFetchValue(server.commands, name);
1390 }
1391
1392 struct redisCommand *lookupCommandByCString(char *s) {
1393 struct redisCommand *cmd;
1394 sds name = sdsnew(s);
1395
1396 cmd = dictFetchValue(server.commands, name);
1397 sdsfree(name);
1398 return cmd;
1399 }
1400
1401 /* Propagate the specified command (in the context of the specified database id)
1402 * to AOF, Slaves and Monitors.
1403 *
1404 * flags are an xor between:
1405 * + REDIS_PROPAGATE_NONE (no propagation of command at all)
1406 * + REDIS_PROPAGATE_AOF (propagate into the AOF file if is enabled)
1407 * + REDIS_PROPAGATE_REPL (propagate into the replication link)
1408 */
1409 void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
1410 int flags)
1411 {
1412 if (server.aof_state != REDIS_AOF_OFF && flags & REDIS_PROPAGATE_AOF)
1413 feedAppendOnlyFile(cmd,dbid,argv,argc);
1414 if (flags & REDIS_PROPAGATE_REPL && listLength(server.slaves))
1415 replicationFeedSlaves(server.slaves,dbid,argv,argc);
1416 }
1417
1418 /* Used inside commands to schedule the propagation of additional commands
1419 * after the current command is propagated to AOF / Replication. */
1420 void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
1421 int target)
1422 {
1423 redisOpArrayAppend(&server.also_propagate,cmd,dbid,argv,argc,target);
1424 }
1425
1426 /* Call() is the core of Redis execution of a command */
1427 void call(redisClient *c, int flags) {
1428 long long dirty, start = ustime(), duration;
1429
1430 /* Sent the command to clients in MONITOR mode, only if the commands are
1431 * not geneated from reading an AOF. */
1432 if (listLength(server.monitors) && !server.loading)
1433 replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
1434
1435 /* Call the command. */
1436 redisOpArrayInit(&server.also_propagate);
1437 dirty = server.dirty;
1438 c->cmd->proc(c);
1439 dirty = server.dirty-dirty;
1440 duration = ustime()-start;
1441
1442 /* When EVAL is called loading the AOF we don't want commands called
1443 * from Lua to go into the slowlog or to populate statistics. */
1444 if (server.loading && c->flags & REDIS_LUA_CLIENT)
1445 flags &= ~(REDIS_CALL_SLOWLOG | REDIS_CALL_STATS);
1446
1447 /* Log the command into the Slow log if needed, and populate the
1448 * per-command statistics that we show in INFO commandstats. */
1449 if (flags & REDIS_CALL_SLOWLOG)
1450 slowlogPushEntryIfNeeded(c->argv,c->argc,duration);
1451 if (flags & REDIS_CALL_STATS) {
1452 c->cmd->microseconds += duration;
1453 c->cmd->calls++;
1454 }
1455
1456 /* Propagate the command into the AOF and replication link */
1457 if (flags & REDIS_CALL_PROPAGATE) {
1458 int flags = REDIS_PROPAGATE_NONE;
1459
1460 if (c->cmd->flags & REDIS_CMD_FORCE_REPLICATION)
1461 flags |= REDIS_PROPAGATE_REPL;
1462 if (dirty)
1463 flags |= (REDIS_PROPAGATE_REPL | REDIS_PROPAGATE_AOF);
1464 if (flags != REDIS_PROPAGATE_NONE)
1465 propagate(c->cmd,c->db->id,c->argv,c->argc,flags);
1466 }
1467 /* Commands such as LPUSH or BRPOPLPUSH may propagate an additional
1468 * PUSH command. */
1469 if (server.also_propagate.numops) {
1470 int j;
1471 redisOp *rop;
1472
1473 for (j = 0; j < server.also_propagate.numops; j++) {
1474 rop = &server.also_propagate.ops[j];
1475 propagate(rop->cmd, rop->dbid, rop->argv, rop->argc, rop->target);
1476 }
1477 redisOpArrayFree(&server.also_propagate);
1478 }
1479 server.stat_numcommands++;
1480 }
1481
1482 /* If this function gets called we already read a whole
1483 * command, argments are in the client argv/argc fields.
1484 * processCommand() execute the command or prepare the
1485 * server for a bulk read from the client.
1486 *
1487 * If 1 is returned the client is still alive and valid and
1488 * and other operations can be performed by the caller. Otherwise
1489 * if 0 is returned the client was destroied (i.e. after QUIT). */
1490 int processCommand(redisClient *c) {
1491 /* The QUIT command is handled separately. Normal command procs will
1492 * go through checking for replication and QUIT will cause trouble
1493 * when FORCE_REPLICATION is enabled and would be implemented in
1494 * a regular command proc. */
1495 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
1496 addReply(c,shared.ok);
1497 c->flags |= REDIS_CLOSE_AFTER_REPLY;
1498 return REDIS_ERR;
1499 }
1500
1501 /* Now lookup the command and check ASAP about trivial error conditions
1502 * such as wrong arity, bad command name and so forth. */
1503 c->cmd = c->lastcmd = lookupCommand(c->argv[0]->ptr);
1504 if (!c->cmd) {
1505 addReplyErrorFormat(c,"unknown command '%s'",
1506 (char*)c->argv[0]->ptr);
1507 return REDIS_OK;
1508 } else if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) ||
1509 (c->argc < -c->cmd->arity)) {
1510 addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
1511 c->cmd->name);
1512 return REDIS_OK;
1513 }
1514
1515 /* Check if the user is authenticated */
1516 if (server.requirepass && !c->authenticated && c->cmd->proc != authCommand)
1517 {
1518 addReplyError(c,"operation not permitted");
1519 return REDIS_OK;
1520 }
1521
1522 /* If cluster is enabled, redirect here */
1523 if (server.cluster_enabled &&
1524 !(c->cmd->getkeys_proc == NULL && c->cmd->firstkey == 0)) {
1525 int hashslot;
1526
1527 if (server.cluster.state != REDIS_CLUSTER_OK) {
1528 addReplyError(c,"The cluster is down. Check with CLUSTER INFO for more information");
1529 return REDIS_OK;
1530 } else {
1531 int ask;
1532 clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,&hashslot,&ask);
1533 if (n == NULL) {
1534 addReplyError(c,"Multi keys request invalid in cluster");
1535 return REDIS_OK;
1536 } else if (n != server.cluster.myself) {
1537 addReplySds(c,sdscatprintf(sdsempty(),
1538 "-%s %d %s:%d\r\n", ask ? "ASK" : "MOVED",
1539 hashslot,n->ip,n->port));
1540 return REDIS_OK;
1541 }
1542 }
1543 }
1544
1545 /* Handle the maxmemory directive.
1546 *
1547 * First we try to free some memory if possible (if there are volatile
1548 * keys in the dataset). If there are not the only thing we can do
1549 * is returning an error. */
1550 if (server.maxmemory) {
1551 int retval = freeMemoryIfNeeded();
1552 if ((c->cmd->flags & REDIS_CMD_DENYOOM) && retval == REDIS_ERR) {
1553 addReply(c, shared.oomerr);
1554 return REDIS_OK;
1555 }
1556 }
1557
1558 /* Don't accept write commands if there are problems persisting on disk. */
1559 if (server.stop_writes_on_bgsave_err &&
1560 server.saveparamslen > 0
1561 && server.lastbgsave_status == REDIS_ERR &&
1562 c->cmd->flags & REDIS_CMD_WRITE)
1563 {
1564 addReply(c, shared.bgsaveerr);
1565 return REDIS_OK;
1566 }
1567
1568 /* Don't accept wirte commands if this is a read only slave. But
1569 * accept write commands if this is our master. */
1570 if (server.masterhost && server.repl_slave_ro &&
1571 !(c->flags & REDIS_MASTER) &&
1572 c->cmd->flags & REDIS_CMD_WRITE)
1573 {
1574 addReply(c, shared.roslaveerr);
1575 return REDIS_OK;
1576 }
1577
1578 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
1579 if ((dictSize(c->pubsub_channels) > 0 || listLength(c->pubsub_patterns) > 0)
1580 &&
1581 c->cmd->proc != subscribeCommand &&
1582 c->cmd->proc != unsubscribeCommand &&
1583 c->cmd->proc != psubscribeCommand &&
1584 c->cmd->proc != punsubscribeCommand) {
1585 addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context");
1586 return REDIS_OK;
1587 }
1588
1589 /* Only allow INFO and SLAVEOF when slave-serve-stale-data is no and
1590 * we are a slave with a broken link with master. */
1591 if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED &&
1592 server.repl_serve_stale_data == 0 &&
1593 c->cmd->proc != infoCommand && c->cmd->proc != slaveofCommand)
1594 {
1595 addReply(c, shared.masterdownerr);
1596 return REDIS_OK;
1597 }
1598
1599 /* Loading DB? Return an error if the command is not INFO */
1600 if (server.loading && c->cmd->proc != infoCommand) {
1601 addReply(c, shared.loadingerr);
1602 return REDIS_OK;
1603 }
1604
1605 /* Lua script too slow? Only allow SHUTDOWN NOSAVE and SCRIPT KILL. */
1606 if (server.lua_timedout &&
1607 !(c->cmd->proc == shutdownCommand &&
1608 c->argc == 2 &&
1609 tolower(((char*)c->argv[1]->ptr)[0]) == 'n') &&
1610 !(c->cmd->proc == scriptCommand &&
1611 c->argc == 2 &&
1612 tolower(((char*)c->argv[1]->ptr)[0]) == 'k'))
1613 {
1614 addReply(c, shared.slowscripterr);
1615 return REDIS_OK;
1616 }
1617
1618 /* Exec the command */
1619 if (c->flags & REDIS_MULTI &&
1620 c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
1621 c->cmd->proc != multiCommand && c->cmd->proc != watchCommand)
1622 {
1623 queueMultiCommand(c);
1624 addReply(c,shared.queued);
1625 } else {
1626 call(c,REDIS_CALL_FULL);
1627 }
1628 return REDIS_OK;
1629 }
1630
1631 /*================================== Shutdown =============================== */
1632
1633 int prepareForShutdown(int flags) {
1634 int save = flags & REDIS_SHUTDOWN_SAVE;
1635 int nosave = flags & REDIS_SHUTDOWN_NOSAVE;
1636
1637 redisLog(REDIS_WARNING,"User requested shutdown...");
1638 /* Kill the saving child if there is a background saving in progress.
1639 We want to avoid race conditions, for instance our saving child may
1640 overwrite the synchronous saving did by SHUTDOWN. */
1641 if (server.rdb_child_pid != -1) {
1642 redisLog(REDIS_WARNING,"There is a child saving an .rdb. Killing it!");
1643 kill(server.rdb_child_pid,SIGKILL);
1644 rdbRemoveTempFile(server.rdb_child_pid);
1645 }
1646 if (server.aof_state != REDIS_AOF_OFF) {
1647 /* Kill the AOF saving child as the AOF we already have may be longer
1648 * but contains the full dataset anyway. */
1649 if (server.aof_child_pid != -1) {
1650 redisLog(REDIS_WARNING,
1651 "There is a child rewriting the AOF. Killing it!");
1652 kill(server.aof_child_pid,SIGKILL);
1653 }
1654 /* Append only file: fsync() the AOF and exit */
1655 redisLog(REDIS_NOTICE,"Calling fsync() on the AOF file.");
1656 aof_fsync(server.aof_fd);
1657 }
1658 if ((server.saveparamslen > 0 && !nosave) || save) {
1659 redisLog(REDIS_NOTICE,"Saving the final RDB snapshot before exiting.");
1660 /* Snapshotting. Perform a SYNC SAVE and exit */
1661 if (rdbSave(server.rdb_filename) != REDIS_OK) {
1662 /* Ooops.. error saving! The best we can do is to continue
1663 * operating. Note that if there was a background saving process,
1664 * in the next cron() Redis will be notified that the background
1665 * saving aborted, handling special stuff like slaves pending for
1666 * synchronization... */
1667 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit.");
1668 return REDIS_ERR;
1669 }
1670 }
1671 if (server.daemonize) {
1672 redisLog(REDIS_NOTICE,"Removing the pid file.");
1673 unlink(server.pidfile);
1674 }
1675 /* Close the listening sockets. Apparently this allows faster restarts. */
1676 if (server.ipfd != -1) close(server.ipfd);
1677 if (server.sofd != -1) close(server.sofd);
1678 if (server.unixsocket) {
1679 redisLog(REDIS_NOTICE,"Removing the unix socket file.");
1680 unlink(server.unixsocket); /* don't care if this fails */
1681 }
1682
1683 redisLog(REDIS_WARNING,"Redis is now ready to exit, bye bye...");
1684 return REDIS_OK;
1685 }
1686
1687 /*================================== Commands =============================== */
1688
1689 void authCommand(redisClient *c) {
1690 if (!server.requirepass) {
1691 addReplyError(c,"Client sent AUTH, but no password is set");
1692 } else if (!strcmp(c->argv[1]->ptr, server.requirepass)) {
1693 c->authenticated = 1;
1694 addReply(c,shared.ok);
1695 } else {
1696 c->authenticated = 0;
1697 addReplyError(c,"invalid password");
1698 }
1699 }
1700
1701 void pingCommand(redisClient *c) {
1702 addReply(c,shared.pong);
1703 }
1704
1705 void echoCommand(redisClient *c) {
1706 addReplyBulk(c,c->argv[1]);
1707 }
1708
1709 void timeCommand(redisClient *c) {
1710 struct timeval tv;
1711
1712 /* gettimeofday() can only fail if &tv is a bad addresss so we
1713 * don't check for errors. */
1714 gettimeofday(&tv,NULL);
1715 addReplyMultiBulkLen(c,2);
1716 addReplyBulkLongLong(c,tv.tv_sec);
1717 addReplyBulkLongLong(c,tv.tv_usec);
1718 }
1719
1720 /* Convert an amount of bytes into a human readable string in the form
1721 * of 100B, 2G, 100M, 4K, and so forth. */
1722 void bytesToHuman(char *s, unsigned long long n) {
1723 double d;
1724
1725 if (n < 1024) {
1726 /* Bytes */
1727 sprintf(s,"%lluB",n);
1728 return;
1729 } else if (n < (1024*1024)) {
1730 d = (double)n/(1024);
1731 sprintf(s,"%.2fK",d);
1732 } else if (n < (1024LL*1024*1024)) {
1733 d = (double)n/(1024*1024);
1734 sprintf(s,"%.2fM",d);
1735 } else if (n < (1024LL*1024*1024*1024)) {
1736 d = (double)n/(1024LL*1024*1024);
1737 sprintf(s,"%.2fG",d);
1738 }
1739 }
1740
1741 /* Create the string returned by the INFO command. This is decoupled
1742 * by the INFO command itself as we need to report the same information
1743 * on memory corruption problems. */
1744 sds genRedisInfoString(char *section) {
1745 sds info = sdsempty();
1746 time_t uptime = server.unixtime-server.stat_starttime;
1747 int j, numcommands;
1748 struct rusage self_ru, c_ru;
1749 unsigned long lol, bib;
1750 int allsections = 0, defsections = 0;
1751 int sections = 0;
1752
1753 if (section) {
1754 allsections = strcasecmp(section,"all") == 0;
1755 defsections = strcasecmp(section,"default") == 0;
1756 }
1757
1758 getrusage(RUSAGE_SELF, &self_ru);
1759 getrusage(RUSAGE_CHILDREN, &c_ru);
1760 getClientsMaxBuffers(&lol,&bib);
1761
1762 /* Server */
1763 if (allsections || defsections || !strcasecmp(section,"server")) {
1764 struct utsname name;
1765
1766 if (sections++) info = sdscat(info,"\r\n");
1767 uname(&name);
1768 info = sdscatprintf(info,
1769 "# Server\r\n"
1770 "redis_version:%s\r\n"
1771 "redis_git_sha1:%s\r\n"
1772 "redis_git_dirty:%d\r\n"
1773 "os:%s %s %s\r\n"
1774 "arch_bits:%d\r\n"
1775 "multiplexing_api:%s\r\n"
1776 "gcc_version:%d.%d.%d\r\n"
1777 "process_id:%ld\r\n"
1778 "run_id:%s\r\n"
1779 "tcp_port:%d\r\n"
1780 "uptime_in_seconds:%ld\r\n"
1781 "uptime_in_days:%ld\r\n"
1782 "lru_clock:%ld\r\n",
1783 REDIS_VERSION,
1784 redisGitSHA1(),
1785 strtol(redisGitDirty(),NULL,10) > 0,
1786 name.sysname, name.release, name.machine,
1787 server.arch_bits,
1788 aeGetApiName(),
1789 #ifdef __GNUC__
1790 __GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__,
1791 #else
1792 0,0,0,
1793 #endif
1794 (long) getpid(),
1795 server.runid,
1796 server.port,
1797 uptime,
1798 uptime/(3600*24),
1799 (unsigned long) server.lruclock);
1800 }
1801
1802 /* Clients */
1803 if (allsections || defsections || !strcasecmp(section,"clients")) {
1804 if (sections++) info = sdscat(info,"\r\n");
1805 info = sdscatprintf(info,
1806 "# Clients\r\n"
1807 "connected_clients:%lu\r\n"
1808 "client_longest_output_list:%lu\r\n"
1809 "client_biggest_input_buf:%lu\r\n"
1810 "blocked_clients:%d\r\n",
1811 listLength(server.clients)-listLength(server.slaves),
1812 lol, bib,
1813 server.bpop_blocked_clients);
1814 }
1815
1816 /* Memory */
1817 if (allsections || defsections || !strcasecmp(section,"memory")) {
1818 char hmem[64];
1819 char peak_hmem[64];
1820
1821 bytesToHuman(hmem,zmalloc_used_memory());
1822 bytesToHuman(peak_hmem,server.stat_peak_memory);
1823 if (sections++) info = sdscat(info,"\r\n");
1824 info = sdscatprintf(info,
1825 "# Memory\r\n"
1826 "used_memory:%zu\r\n"
1827 "used_memory_human:%s\r\n"
1828 "used_memory_rss:%zu\r\n"
1829 "used_memory_peak:%zu\r\n"
1830 "used_memory_peak_human:%s\r\n"
1831 "used_memory_lua:%lld\r\n"
1832 "mem_fragmentation_ratio:%.2f\r\n"
1833 "mem_allocator:%s\r\n",
1834 zmalloc_used_memory(),
1835 hmem,
1836 zmalloc_get_rss(),
1837 server.stat_peak_memory,
1838 peak_hmem,
1839 ((long long)lua_gc(server.lua,LUA_GCCOUNT,0))*1024LL,
1840 zmalloc_get_fragmentation_ratio(),
1841 ZMALLOC_LIB
1842 );
1843 }
1844
1845 /* Persistence */
1846 if (allsections || defsections || !strcasecmp(section,"persistence")) {
1847 if (sections++) info = sdscat(info,"\r\n");
1848 info = sdscatprintf(info,
1849 "# Persistence\r\n"
1850 "loading:%d\r\n"
1851 "aof_enabled:%d\r\n"
1852 "changes_since_last_save:%lld\r\n"
1853 "bgsave_in_progress:%d\r\n"
1854 "last_save_time:%ld\r\n"
1855 "last_bgsave_status:%s\r\n"
1856 "bgrewriteaof_in_progress:%d\r\n"
1857 "bgrewriteaof_scheduled:%d\r\n",
1858 server.loading,
1859 server.aof_state != REDIS_AOF_OFF,
1860 server.dirty,
1861 server.rdb_child_pid != -1,
1862 server.lastsave,
1863 server.lastbgsave_status == REDIS_OK ? "ok" : "err",
1864 server.aof_child_pid != -1,
1865 server.aof_rewrite_scheduled);
1866
1867 if (server.aof_state != REDIS_AOF_OFF) {
1868 info = sdscatprintf(info,
1869 "aof_current_size:%lld\r\n"
1870 "aof_base_size:%lld\r\n"
1871 "aof_pending_rewrite:%d\r\n"
1872 "aof_buffer_length:%zu\r\n"
1873 "aof_pending_bio_fsync:%llu\r\n"
1874 "aof_delayed_fsync:%lu\r\n",
1875 (long long) server.aof_current_size,
1876 (long long) server.aof_rewrite_base_size,
1877 server.aof_rewrite_scheduled,
1878 sdslen(server.aof_buf),
1879 bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC),
1880 server.aof_delayed_fsync);
1881 }
1882
1883 if (server.loading) {
1884 double perc;
1885 time_t eta, elapsed;
1886 off_t remaining_bytes = server.loading_total_bytes-
1887 server.loading_loaded_bytes;
1888
1889 perc = ((double)server.loading_loaded_bytes /
1890 server.loading_total_bytes) * 100;
1891
1892 elapsed = server.unixtime-server.loading_start_time;
1893 if (elapsed == 0) {
1894 eta = 1; /* A fake 1 second figure if we don't have
1895 enough info */
1896 } else {
1897 eta = (elapsed*remaining_bytes)/server.loading_loaded_bytes;
1898 }
1899
1900 info = sdscatprintf(info,
1901 "loading_start_time:%ld\r\n"
1902 "loading_total_bytes:%llu\r\n"
1903 "loading_loaded_bytes:%llu\r\n"
1904 "loading_loaded_perc:%.2f\r\n"
1905 "loading_eta_seconds:%ld\r\n"
1906 ,(unsigned long) server.loading_start_time,
1907 (unsigned long long) server.loading_total_bytes,
1908 (unsigned long long) server.loading_loaded_bytes,
1909 perc,
1910 eta
1911 );
1912 }
1913 }
1914
1915 /* Stats */
1916 if (allsections || defsections || !strcasecmp(section,"stats")) {
1917 if (sections++) info = sdscat(info,"\r\n");
1918 info = sdscatprintf(info,
1919 "# Stats\r\n"
1920 "total_connections_received:%lld\r\n"
1921 "total_commands_processed:%lld\r\n"
1922 "instantaneous_ops_per_sec:%lld\r\n"
1923 "rejected_connections:%lld\r\n"
1924 "expired_keys:%lld\r\n"
1925 "evicted_keys:%lld\r\n"
1926 "keyspace_hits:%lld\r\n"
1927 "keyspace_misses:%lld\r\n"
1928 "pubsub_channels:%ld\r\n"
1929 "pubsub_patterns:%lu\r\n"
1930 "latest_fork_usec:%lld\r\n",
1931 server.stat_numconnections,
1932 server.stat_numcommands,
1933 getOperationsPerSecond(),
1934 server.stat_rejected_conn,
1935 server.stat_expiredkeys,
1936 server.stat_evictedkeys,
1937 server.stat_keyspace_hits,
1938 server.stat_keyspace_misses,
1939 dictSize(server.pubsub_channels),
1940 listLength(server.pubsub_patterns),
1941 server.stat_fork_time);
1942 }
1943
1944 /* Replication */
1945 if (allsections || defsections || !strcasecmp(section,"replication")) {
1946 if (sections++) info = sdscat(info,"\r\n");
1947 info = sdscatprintf(info,
1948 "# Replication\r\n"
1949 "role:%s\r\n",
1950 server.masterhost == NULL ? "master" : "slave");
1951 if (server.masterhost) {
1952 info = sdscatprintf(info,
1953 "master_host:%s\r\n"
1954 "master_port:%d\r\n"
1955 "master_link_status:%s\r\n"
1956 "master_last_io_seconds_ago:%d\r\n"
1957 "master_sync_in_progress:%d\r\n"
1958 ,server.masterhost,
1959 server.masterport,
1960 (server.repl_state == REDIS_REPL_CONNECTED) ?
1961 "up" : "down",
1962 server.master ?
1963 ((int)(server.unixtime-server.master->lastinteraction)) : -1,
1964 server.repl_state == REDIS_REPL_TRANSFER
1965 );
1966
1967 if (server.repl_state == REDIS_REPL_TRANSFER) {
1968 info = sdscatprintf(info,
1969 "master_sync_left_bytes:%ld\r\n"
1970 "master_sync_last_io_seconds_ago:%d\r\n"
1971 ,(long)server.repl_transfer_left,
1972 (int)(server.unixtime-server.repl_transfer_lastio)
1973 );
1974 }
1975
1976 if (server.repl_state != REDIS_REPL_CONNECTED) {
1977 info = sdscatprintf(info,
1978 "master_link_down_since_seconds:%ld\r\n",
1979 (long)server.unixtime-server.repl_down_since);
1980 }
1981 }
1982 info = sdscatprintf(info,
1983 "connected_slaves:%lu\r\n",
1984 listLength(server.slaves));
1985 if (listLength(server.slaves)) {
1986 int slaveid = 0;
1987 listNode *ln;
1988 listIter li;
1989
1990 listRewind(server.slaves,&li);
1991 while((ln = listNext(&li))) {
1992 redisClient *slave = listNodeValue(ln);
1993 char *state = NULL;
1994 char ip[32];
1995 int port;
1996
1997 if (anetPeerToString(slave->fd,ip,&port) == -1) continue;
1998 switch(slave->replstate) {
1999 case REDIS_REPL_WAIT_BGSAVE_START:
2000 case REDIS_REPL_WAIT_BGSAVE_END:
2001 state = "wait_bgsave";
2002 break;
2003 case REDIS_REPL_SEND_BULK:
2004 state = "send_bulk";
2005 break;
2006 case REDIS_REPL_ONLINE:
2007 state = "online";
2008 break;
2009 }
2010 if (state == NULL) continue;
2011 info = sdscatprintf(info,"slave%d:%s,%d,%s\r\n",
2012 slaveid,ip,port,state);
2013 slaveid++;
2014 }
2015 }
2016 }
2017
2018 /* CPU */
2019 if (allsections || defsections || !strcasecmp(section,"cpu")) {
2020 if (sections++) info = sdscat(info,"\r\n");
2021 info = sdscatprintf(info,
2022 "# CPU\r\n"
2023 "used_cpu_sys:%.2f\r\n"
2024 "used_cpu_user:%.2f\r\n"
2025 "used_cpu_sys_children:%.2f\r\n"
2026 "used_cpu_user_children:%.2f\r\n",
2027 (float)self_ru.ru_stime.tv_sec+(float)self_ru.ru_stime.tv_usec/1000000,
2028 (float)self_ru.ru_utime.tv_sec+(float)self_ru.ru_utime.tv_usec/1000000,
2029 (float)c_ru.ru_stime.tv_sec+(float)c_ru.ru_stime.tv_usec/1000000,
2030 (float)c_ru.ru_utime.tv_sec+(float)c_ru.ru_utime.tv_usec/1000000);
2031 }
2032
2033 /* cmdtime */
2034 if (allsections || !strcasecmp(section,"commandstats")) {
2035 if (sections++) info = sdscat(info,"\r\n");
2036 info = sdscatprintf(info, "# Commandstats\r\n");
2037 numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
2038 for (j = 0; j < numcommands; j++) {
2039 struct redisCommand *c = redisCommandTable+j;
2040
2041 if (!c->calls) continue;
2042 info = sdscatprintf(info,
2043 "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f\r\n",
2044 c->name, c->calls, c->microseconds,
2045 (c->calls == 0) ? 0 : ((float)c->microseconds/c->calls));
2046 }
2047 }
2048
2049 /* Cluster */
2050 if (allsections || defsections || !strcasecmp(section,"cluster")) {
2051 if (sections++) info = sdscat(info,"\r\n");
2052 info = sdscatprintf(info,
2053 "# Cluster\r\n"
2054 "cluster_enabled:%d\r\n",
2055 server.cluster_enabled);
2056 }
2057
2058 /* Key space */
2059 if (allsections || defsections || !strcasecmp(section,"keyspace")) {
2060 if (sections++) info = sdscat(info,"\r\n");
2061 info = sdscatprintf(info, "# Keyspace\r\n");
2062 for (j = 0; j < server.dbnum; j++) {
2063 long long keys, vkeys;
2064
2065 keys = dictSize(server.db[j].dict);
2066 vkeys = dictSize(server.db[j].expires);
2067 if (keys || vkeys) {
2068 info = sdscatprintf(info, "db%d:keys=%lld,expires=%lld\r\n",
2069 j, keys, vkeys);
2070 }
2071 }
2072 }
2073 return info;
2074 }
2075
2076 void infoCommand(redisClient *c) {
2077 char *section = c->argc == 2 ? c->argv[1]->ptr : "default";
2078
2079 if (c->argc > 2) {
2080 addReply(c,shared.syntaxerr);
2081 return;
2082 }
2083 sds info = genRedisInfoString(section);
2084 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
2085 (unsigned long)sdslen(info)));
2086 addReplySds(c,info);
2087 addReply(c,shared.crlf);
2088 }
2089
2090 void monitorCommand(redisClient *c) {
2091 /* ignore MONITOR if aleady slave or in monitor mode */
2092 if (c->flags & REDIS_SLAVE) return;
2093
2094 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
2095 c->slaveseldb = 0;
2096 listAddNodeTail(server.monitors,c);
2097 addReply(c,shared.ok);
2098 }
2099
2100 /* ============================ Maxmemory directive ======================== */
2101
2102 /* This function gets called when 'maxmemory' is set on the config file to limit
2103 * the max memory used by the server, before processing a command.
2104 *
2105 * The goal of the function is to free enough memory to keep Redis under the
2106 * configured memory limit.
2107 *
2108 * The function starts calculating how many bytes should be freed to keep
2109 * Redis under the limit, and enters a loop selecting the best keys to
2110 * evict accordingly to the configured policy.
2111 *
2112 * If all the bytes needed to return back under the limit were freed the
2113 * function returns REDIS_OK, otherwise REDIS_ERR is returned, and the caller
2114 * should block the execution of commands that will result in more memory
2115 * used by the server.
2116 */
2117 int freeMemoryIfNeeded(void) {
2118 size_t mem_used, mem_tofree, mem_freed;
2119 int slaves = listLength(server.slaves);
2120
2121 /* Remove the size of slaves output buffers and AOF buffer from the
2122 * count of used memory. */
2123 mem_used = zmalloc_used_memory();
2124 if (slaves) {
2125 listIter li;
2126 listNode *ln;
2127
2128 listRewind(server.slaves,&li);
2129 while((ln = listNext(&li))) {
2130 redisClient *slave = listNodeValue(ln);
2131 unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
2132 if (obuf_bytes > mem_used)
2133 mem_used = 0;
2134 else
2135 mem_used -= obuf_bytes;
2136 }
2137 }
2138 if (server.aof_state != REDIS_AOF_OFF) {
2139 mem_used -= sdslen(server.aof_buf);
2140 mem_used -= sdslen(server.aof_rewrite_buf);
2141 }
2142
2143 /* Check if we are over the memory limit. */
2144 if (mem_used <= server.maxmemory) return REDIS_OK;
2145
2146 if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION)
2147 return REDIS_ERR; /* We need to free memory, but policy forbids. */
2148
2149 /* Compute how much memory we need to free. */
2150 mem_tofree = mem_used - server.maxmemory;
2151 mem_freed = 0;
2152 while (mem_freed < mem_tofree) {
2153 int j, k, keys_freed = 0;
2154
2155 for (j = 0; j < server.dbnum; j++) {
2156 long bestval = 0; /* just to prevent warning */
2157 sds bestkey = NULL;
2158 struct dictEntry *de;
2159 redisDb *db = server.db+j;
2160 dict *dict;
2161
2162 if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
2163 server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM)
2164 {
2165 dict = server.db[j].dict;
2166 } else {
2167 dict = server.db[j].expires;
2168 }
2169 if (dictSize(dict) == 0) continue;
2170
2171 /* volatile-random and allkeys-random policy */
2172 if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM ||
2173 server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_RANDOM)
2174 {
2175 de = dictGetRandomKey(dict);
2176 bestkey = dictGetKey(de);
2177 }
2178
2179 /* volatile-lru and allkeys-lru policy */
2180 else if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
2181 server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
2182 {
2183 for (k = 0; k < server.maxmemory_samples; k++) {
2184 sds thiskey;
2185 long thisval;
2186 robj *o;
2187
2188 de = dictGetRandomKey(dict);
2189 thiskey = dictGetKey(de);
2190 /* When policy is volatile-lru we need an additonal lookup
2191 * to locate the real key, as dict is set to db->expires. */
2192 if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
2193 de = dictFind(db->dict, thiskey);
2194 o = dictGetVal(de);
2195 thisval = estimateObjectIdleTime(o);
2196
2197 /* Higher idle time is better candidate for deletion */
2198 if (bestkey == NULL || thisval > bestval) {
2199 bestkey = thiskey;
2200 bestval = thisval;
2201 }
2202 }
2203 }
2204
2205 /* volatile-ttl */
2206 else if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_TTL) {
2207 for (k = 0; k < server.maxmemory_samples; k++) {
2208 sds thiskey;
2209 long thisval;
2210
2211 de = dictGetRandomKey(dict);
2212 thiskey = dictGetKey(de);
2213 thisval = (long) dictGetVal(de);
2214
2215 /* Expire sooner (minor expire unix timestamp) is better
2216 * candidate for deletion */
2217 if (bestkey == NULL || thisval < bestval) {
2218 bestkey = thiskey;
2219 bestval = thisval;
2220 }
2221 }
2222 }
2223
2224 /* Finally remove the selected key. */
2225 if (bestkey) {
2226 long long delta;
2227
2228 robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
2229 propagateExpire(db,keyobj);
2230 /* We compute the amount of memory freed by dbDelete() alone.
2231 * It is possible that actually the memory needed to propagate
2232 * the DEL in AOF and replication link is greater than the one
2233 * we are freeing removing the key, but we can't account for
2234 * that otherwise we would never exit the loop.
2235 *
2236 * AOF and Output buffer memory will be freed eventually so
2237 * we only care about memory used by the key space. */
2238 delta = (long long) zmalloc_used_memory();
2239 dbDelete(db,keyobj);
2240 delta -= (long long) zmalloc_used_memory();
2241 mem_freed += delta;
2242 server.stat_evictedkeys++;
2243 decrRefCount(keyobj);
2244 keys_freed++;
2245
2246 /* When the memory to free starts to be big enough, we may
2247 * start spending so much time here that is impossible to
2248 * deliver data to the slaves fast enough, so we force the
2249 * transmission here inside the loop. */
2250 if (slaves) flushSlavesOutputBuffers();
2251 }
2252 }
2253 if (!keys_freed) return REDIS_ERR; /* nothing to free... */
2254 }
2255 return REDIS_OK;
2256 }
2257
2258 /* =================================== Main! ================================ */
2259
2260 #ifdef __linux__
2261 int linuxOvercommitMemoryValue(void) {
2262 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
2263 char buf[64];
2264
2265 if (!fp) return -1;
2266 if (fgets(buf,64,fp) == NULL) {
2267 fclose(fp);
2268 return -1;
2269 }
2270 fclose(fp);
2271
2272 return atoi(buf);
2273 }
2274
2275 void linuxOvercommitMemoryWarning(void) {
2276 if (linuxOvercommitMemoryValue() == 0) {
2277 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
2278 }
2279 }
2280 #endif /* __linux__ */
2281
2282 void createPidFile(void) {
2283 /* Try to write the pid file in a best-effort way. */
2284 FILE *fp = fopen(server.pidfile,"w");
2285 if (fp) {
2286 fprintf(fp,"%d\n",(int)getpid());
2287 fclose(fp);
2288 }
2289 }
2290
2291 void daemonize(void) {
2292 int fd;
2293
2294 if (fork() != 0) exit(0); /* parent exits */
2295 setsid(); /* create a new session */
2296
2297 /* Every output goes to /dev/null. If Redis is daemonized but
2298 * the 'logfile' is set to 'stdout' in the configuration file
2299 * it will not log at all. */
2300 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
2301 dup2(fd, STDIN_FILENO);
2302 dup2(fd, STDOUT_FILENO);
2303 dup2(fd, STDERR_FILENO);
2304 if (fd > STDERR_FILENO) close(fd);
2305 }
2306 }
2307
2308 void version() {
2309 printf("Redis server v=%s sha=%s:%d malloc=%s bits=%d\n",
2310 REDIS_VERSION,
2311 redisGitSHA1(),
2312 atoi(redisGitDirty()) > 0,
2313 ZMALLOC_LIB,
2314 sizeof(long) == 4 ? 32 : 64);
2315 exit(0);
2316 }
2317
2318 void usage() {
2319 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf] [options]\n");
2320 fprintf(stderr," ./redis-server - (read config from stdin)\n");
2321 fprintf(stderr," ./redis-server -v or --version\n");
2322 fprintf(stderr," ./redis-server -h or --help\n");
2323 fprintf(stderr," ./redis-server --test-memory <megabytes>\n\n");
2324 fprintf(stderr,"Examples:\n");
2325 fprintf(stderr," ./redis-server (run the server with default conf)\n");
2326 fprintf(stderr," ./redis-server /etc/redis/6379.conf\n");
2327 fprintf(stderr," ./redis-server --port 7777\n");
2328 fprintf(stderr," ./redis-server --port 7777 --slaveof 127.0.0.1 8888\n");
2329 fprintf(stderr," ./redis-server /etc/myredis.conf --loglevel verbose\n");
2330 exit(1);
2331 }
2332
2333 void redisAsciiArt(void) {
2334 #include "asciilogo.h"
2335 char *buf = zmalloc(1024*16);
2336
2337 snprintf(buf,1024*16,ascii_logo,
2338 REDIS_VERSION,
2339 redisGitSHA1(),
2340 strtol(redisGitDirty(),NULL,10) > 0,
2341 (sizeof(long) == 8) ? "64" : "32",
2342 server.cluster_enabled ? "cluster" : "stand alone",
2343 server.port,
2344 (long) getpid()
2345 );
2346 redisLogRaw(REDIS_NOTICE|REDIS_LOG_RAW,buf);
2347 zfree(buf);
2348 }
2349
2350 static void sigtermHandler(int sig) {
2351 REDIS_NOTUSED(sig);
2352
2353 redisLogFromHandler(REDIS_WARNING,"Received SIGTERM, scheduling shutdown...");
2354 server.shutdown_asap = 1;
2355 }
2356
2357 void setupSignalHandlers(void) {
2358 struct sigaction act;
2359
2360 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used.
2361 * Otherwise, sa_handler is used. */
2362 sigemptyset(&act.sa_mask);
2363 act.sa_flags = 0;
2364 act.sa_handler = sigtermHandler;
2365 sigaction(SIGTERM, &act, NULL);
2366
2367 #ifdef HAVE_BACKTRACE
2368 sigemptyset(&act.sa_mask);
2369 act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO;
2370 act.sa_sigaction = sigsegvHandler;
2371 sigaction(SIGSEGV, &act, NULL);
2372 sigaction(SIGBUS, &act, NULL);
2373 sigaction(SIGFPE, &act, NULL);
2374 sigaction(SIGILL, &act, NULL);
2375 #endif
2376 return;
2377 }
2378
2379 void memtest(size_t megabytes, int passes);
2380
2381 int main(int argc, char **argv) {
2382 long long start;
2383 struct timeval tv;
2384
2385 /* We need to initialize our libraries, and the server configuration. */
2386 zmalloc_enable_thread_safeness();
2387 srand(time(NULL)^getpid());
2388 gettimeofday(&tv,NULL);
2389 dictSetHashFunctionSeed(tv.tv_sec^tv.tv_usec^getpid());
2390 initServerConfig();
2391
2392 if (argc >= 2) {
2393 int j = 1; /* First option to parse in argv[] */
2394 sds options = sdsempty();
2395 char *configfile = NULL;
2396
2397 /* Handle special options --help and --version */
2398 if (strcmp(argv[1], "-v") == 0 ||
2399 strcmp(argv[1], "--version") == 0) version();
2400 if (strcmp(argv[1], "--help") == 0 ||
2401 strcmp(argv[1], "-h") == 0) usage();
2402 if (strcmp(argv[1], "--test-memory") == 0) {
2403 if (argc == 3) {
2404 memtest(atoi(argv[2]),50);
2405 exit(0);
2406 } else {
2407 fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n");
2408 fprintf(stderr,"Example: ./redis-server --test-memory 4096\n\n");
2409 exit(1);
2410 }
2411 }
2412
2413 /* First argument is the config file name? */
2414 if (argv[j][0] != '-' || argv[j][1] != '-')
2415 configfile = argv[j++];
2416 /* All the other options are parsed and conceptually appended to the
2417 * configuration file. For instance --port 6380 will generate the
2418 * string "port 6380\n" to be parsed after the actual file name
2419 * is parsed, if any. */
2420 while(j != argc) {
2421 if (argv[j][0] == '-' && argv[j][1] == '-') {
2422 /* Option name */
2423 if (sdslen(options)) options = sdscat(options,"\n");
2424 options = sdscat(options,argv[j]+2);
2425 options = sdscat(options," ");
2426 } else {
2427 /* Option argument */
2428 options = sdscatrepr(options,argv[j],strlen(argv[j]));
2429 options = sdscat(options," ");
2430 }
2431 j++;
2432 }
2433 resetServerSaveParams();
2434 loadServerConfig(configfile,options);
2435 sdsfree(options);
2436 } else {
2437 redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
2438 }
2439 if (server.daemonize) daemonize();
2440 initServer();
2441 if (server.daemonize) createPidFile();
2442 redisAsciiArt();
2443 redisLog(REDIS_WARNING,"Server started, Redis version " REDIS_VERSION);
2444 #ifdef __linux__
2445 linuxOvercommitMemoryWarning();
2446 #endif
2447 start = ustime();
2448 if (server.aof_state == REDIS_AOF_ON) {
2449 if (loadAppendOnlyFile(server.aof_filename) == REDIS_OK)
2450 redisLog(REDIS_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000);
2451 } else {
2452 if (rdbLoad(server.rdb_filename) == REDIS_OK) {
2453 redisLog(REDIS_NOTICE,"DB loaded from disk: %.3f seconds",
2454 (float)(ustime()-start)/1000000);
2455 } else if (errno != ENOENT) {
2456 redisLog(REDIS_WARNING,"Fatal error loading the DB. Exiting.");
2457 exit(1);
2458 }
2459 }
2460 if (server.ipfd > 0)
2461 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
2462 if (server.sofd > 0)
2463 redisLog(REDIS_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket);
2464 aeSetBeforeSleepProc(server.el,beforeSleep);
2465 aeMain(server.el);
2466 aeDeleteEventLoop(server.el);
2467 return 0;
2468 }
2469
2470 /* The End */