]> git.saurik.com Git - redis.git/blob - src/redis.c
Redis timer interrupt frequency configurable as REDIS_HZ.
[redis.git] / src / redis.c
1 /*
2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include "redis.h"
31 #include "slowlog.h"
32 #include "bio.h"
33
34 #include <time.h>
35 #include <signal.h>
36 #include <sys/wait.h>
37 #include <errno.h>
38 #include <assert.h>
39 #include <ctype.h>
40 #include <stdarg.h>
41 #include <arpa/inet.h>
42 #include <sys/stat.h>
43 #include <fcntl.h>
44 #include <sys/time.h>
45 #include <sys/resource.h>
46 #include <sys/uio.h>
47 #include <limits.h>
48 #include <float.h>
49 #include <math.h>
50 #include <sys/resource.h>
51 #include <sys/utsname.h>
52
53 /* Our shared "common" objects */
54
55 struct sharedObjectsStruct shared;
56
57 /* Global vars that are actually used as constants. The following double
58 * values are used for double on-disk serialization, and are initialized
59 * at runtime to avoid strange compiler optimizations. */
60
61 double R_Zero, R_PosInf, R_NegInf, R_Nan;
62
63 /*================================= Globals ================================= */
64
65 /* Global vars */
66 struct redisServer server; /* server global state */
67 struct redisCommand *commandTable;
68
69 /* Our command table.
70 *
71 * Every entry is composed of the following fields:
72 *
73 * name: a string representing the command name.
74 * function: pointer to the C function implementing the command.
75 * arity: number of arguments, it is possible to use -N to say >= N
76 * sflags: command flags as string. See below for a table of flags.
77 * flags: flags as bitmask. Computed by Redis using the 'sflags' field.
78 * get_keys_proc: an optional function to get key arguments from a command.
79 * This is only used when the following three fields are not
80 * enough to specify what arguments are keys.
81 * first_key_index: first argument that is a key
82 * last_key_index: last argument that is a key
83 * key_step: step to get all the keys from first to last argument. For instance
84 * in MSET the step is two since arguments are key,val,key,val,...
85 * microseconds: microseconds of total execution time for this command.
86 * calls: total number of calls of this command.
87 *
88 * The flags, microseconds and calls fields are computed by Redis and should
89 * always be set to zero.
90 *
91 * Command flags are expressed using strings where every character represents
92 * a flag. Later the populateCommandTable() function will take care of
93 * populating the real 'flags' field using this characters.
94 *
95 * This is the meaning of the flags:
96 *
97 * w: write command (may modify the key space).
98 * r: read command (will never modify the key space).
99 * m: may increase memory usage once called. Don't allow if out of memory.
100 * a: admin command, like SAVE or SHUTDOWN.
101 * p: Pub/Sub related command.
102 * f: force replication of this command, regarless of server.dirty.
103 * s: command not allowed in scripts.
104 * R: random command. Command is not deterministic, that is, the same command
105 * with the same arguments, with the same key space, may have different
106 * results. For instance SPOP and RANDOMKEY are two random commands.
107 * S: Sort command output array if called from script, so that the output
108 * is deterministic.
109 */
110 struct redisCommand redisCommandTable[] = {
111 {"get",getCommand,2,"r",0,NULL,1,1,1,0,0},
112 {"set",setCommand,3,"wm",0,noPreloadGetKeys,1,1,1,0,0},
113 {"setnx",setnxCommand,3,"wm",0,noPreloadGetKeys,1,1,1,0,0},
114 {"setex",setexCommand,4,"wm",0,noPreloadGetKeys,1,1,1,0,0},
115 {"psetex",psetexCommand,4,"wm",0,noPreloadGetKeys,1,1,1,0,0},
116 {"append",appendCommand,3,"wm",0,NULL,1,1,1,0,0},
117 {"strlen",strlenCommand,2,"r",0,NULL,1,1,1,0,0},
118 {"del",delCommand,-2,"w",0,noPreloadGetKeys,1,-1,1,0,0},
119 {"exists",existsCommand,2,"r",0,NULL,1,1,1,0,0},
120 {"setbit",setbitCommand,4,"wm",0,NULL,1,1,1,0,0},
121 {"getbit",getbitCommand,3,"r",0,NULL,1,1,1,0,0},
122 {"setrange",setrangeCommand,4,"wm",0,NULL,1,1,1,0,0},
123 {"getrange",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
124 {"substr",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
125 {"incr",incrCommand,2,"wm",0,NULL,1,1,1,0,0},
126 {"decr",decrCommand,2,"wm",0,NULL,1,1,1,0,0},
127 {"mget",mgetCommand,-2,"r",0,NULL,1,-1,1,0,0},
128 {"rpush",rpushCommand,-3,"wm",0,NULL,1,1,1,0,0},
129 {"lpush",lpushCommand,-3,"wm",0,NULL,1,1,1,0,0},
130 {"rpushx",rpushxCommand,3,"wm",0,NULL,1,1,1,0,0},
131 {"lpushx",lpushxCommand,3,"wm",0,NULL,1,1,1,0,0},
132 {"linsert",linsertCommand,5,"wm",0,NULL,1,1,1,0,0},
133 {"rpop",rpopCommand,2,"w",0,NULL,1,1,1,0,0},
134 {"lpop",lpopCommand,2,"w",0,NULL,1,1,1,0,0},
135 {"brpop",brpopCommand,-3,"ws",0,NULL,1,1,1,0,0},
136 {"brpoplpush",brpoplpushCommand,4,"wms",0,NULL,1,2,1,0,0},
137 {"blpop",blpopCommand,-3,"ws",0,NULL,1,-2,1,0,0},
138 {"llen",llenCommand,2,"r",0,NULL,1,1,1,0,0},
139 {"lindex",lindexCommand,3,"r",0,NULL,1,1,1,0,0},
140 {"lset",lsetCommand,4,"wm",0,NULL,1,1,1,0,0},
141 {"lrange",lrangeCommand,4,"r",0,NULL,1,1,1,0,0},
142 {"ltrim",ltrimCommand,4,"w",0,NULL,1,1,1,0,0},
143 {"lrem",lremCommand,4,"w",0,NULL,1,1,1,0,0},
144 {"rpoplpush",rpoplpushCommand,3,"wm",0,NULL,1,2,1,0,0},
145 {"sadd",saddCommand,-3,"wm",0,NULL,1,1,1,0,0},
146 {"srem",sremCommand,-3,"w",0,NULL,1,1,1,0,0},
147 {"smove",smoveCommand,4,"w",0,NULL,1,2,1,0,0},
148 {"sismember",sismemberCommand,3,"r",0,NULL,1,1,1,0,0},
149 {"scard",scardCommand,2,"r",0,NULL,1,1,1,0,0},
150 {"spop",spopCommand,2,"wRs",0,NULL,1,1,1,0,0},
151 {"srandmember",srandmemberCommand,2,"rR",0,NULL,1,1,1,0,0},
152 {"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
153 {"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
154 {"sunion",sunionCommand,-2,"rS",0,NULL,1,-1,1,0,0},
155 {"sunionstore",sunionstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
156 {"sdiff",sdiffCommand,-2,"rS",0,NULL,1,-1,1,0,0},
157 {"sdiffstore",sdiffstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
158 {"smembers",sinterCommand,2,"rS",0,NULL,1,1,1,0,0},
159 {"zadd",zaddCommand,-4,"wm",0,NULL,1,1,1,0,0},
160 {"zincrby",zincrbyCommand,4,"wm",0,NULL,1,1,1,0,0},
161 {"zrem",zremCommand,-3,"w",0,NULL,1,1,1,0,0},
162 {"zremrangebyscore",zremrangebyscoreCommand,4,"w",0,NULL,1,1,1,0,0},
163 {"zremrangebyrank",zremrangebyrankCommand,4,"w",0,NULL,1,1,1,0,0},
164 {"zunionstore",zunionstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0},
165 {"zinterstore",zinterstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0},
166 {"zrange",zrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
167 {"zrangebyscore",zrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0},
168 {"zrevrangebyscore",zrevrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0},
169 {"zcount",zcountCommand,4,"r",0,NULL,1,1,1,0,0},
170 {"zrevrange",zrevrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
171 {"zcard",zcardCommand,2,"r",0,NULL,1,1,1,0,0},
172 {"zscore",zscoreCommand,3,"r",0,NULL,1,1,1,0,0},
173 {"zrank",zrankCommand,3,"r",0,NULL,1,1,1,0,0},
174 {"zrevrank",zrevrankCommand,3,"r",0,NULL,1,1,1,0,0},
175 {"hset",hsetCommand,4,"wm",0,NULL,1,1,1,0,0},
176 {"hsetnx",hsetnxCommand,4,"wm",0,NULL,1,1,1,0,0},
177 {"hget",hgetCommand,3,"r",0,NULL,1,1,1,0,0},
178 {"hmset",hmsetCommand,-4,"wm",0,NULL,1,1,1,0,0},
179 {"hmget",hmgetCommand,-3,"r",0,NULL,1,1,1,0,0},
180 {"hincrby",hincrbyCommand,4,"wm",0,NULL,1,1,1,0,0},
181 {"hincrbyfloat",hincrbyfloatCommand,4,"wm",0,NULL,1,1,1,0,0},
182 {"hdel",hdelCommand,-3,"w",0,NULL,1,1,1,0,0},
183 {"hlen",hlenCommand,2,"r",0,NULL,1,1,1,0,0},
184 {"hkeys",hkeysCommand,2,"rS",0,NULL,1,1,1,0,0},
185 {"hvals",hvalsCommand,2,"rS",0,NULL,1,1,1,0,0},
186 {"hgetall",hgetallCommand,2,"r",0,NULL,1,1,1,0,0},
187 {"hexists",hexistsCommand,3,"r",0,NULL,1,1,1,0,0},
188 {"incrby",incrbyCommand,3,"wm",0,NULL,1,1,1,0,0},
189 {"decrby",decrbyCommand,3,"wm",0,NULL,1,1,1,0,0},
190 {"incrbyfloat",incrbyfloatCommand,3,"wm",0,NULL,1,1,1,0,0},
191 {"getset",getsetCommand,3,"wm",0,NULL,1,1,1,0,0},
192 {"mset",msetCommand,-3,"wm",0,NULL,1,-1,2,0,0},
193 {"msetnx",msetnxCommand,-3,"wm",0,NULL,1,-1,2,0,0},
194 {"randomkey",randomkeyCommand,1,"rR",0,NULL,0,0,0,0,0},
195 {"select",selectCommand,2,"r",0,NULL,0,0,0,0,0},
196 {"move",moveCommand,3,"w",0,NULL,1,1,1,0,0},
197 {"rename",renameCommand,3,"w",0,renameGetKeys,1,2,1,0,0},
198 {"renamenx",renamenxCommand,3,"w",0,renameGetKeys,1,2,1,0,0},
199 {"expire",expireCommand,3,"w",0,NULL,1,1,1,0,0},
200 {"expireat",expireatCommand,3,"w",0,NULL,1,1,1,0,0},
201 {"pexpire",pexpireCommand,3,"w",0,NULL,1,1,1,0,0},
202 {"pexpireat",pexpireatCommand,3,"w",0,NULL,1,1,1,0,0},
203 {"keys",keysCommand,2,"rS",0,NULL,0,0,0,0,0},
204 {"dbsize",dbsizeCommand,1,"r",0,NULL,0,0,0,0,0},
205 {"auth",authCommand,2,"rs",0,NULL,0,0,0,0,0},
206 {"ping",pingCommand,1,"r",0,NULL,0,0,0,0,0},
207 {"echo",echoCommand,2,"r",0,NULL,0,0,0,0,0},
208 {"save",saveCommand,1,"ars",0,NULL,0,0,0,0,0},
209 {"bgsave",bgsaveCommand,1,"ar",0,NULL,0,0,0,0,0},
210 {"bgrewriteaof",bgrewriteaofCommand,1,"ar",0,NULL,0,0,0,0,0},
211 {"shutdown",shutdownCommand,-1,"ar",0,NULL,0,0,0,0,0},
212 {"lastsave",lastsaveCommand,1,"r",0,NULL,0,0,0,0,0},
213 {"type",typeCommand,2,"r",0,NULL,1,1,1,0,0},
214 {"multi",multiCommand,1,"rs",0,NULL,0,0,0,0,0},
215 {"exec",execCommand,1,"s",0,NULL,0,0,0,0,0},
216 {"discard",discardCommand,1,"rs",0,NULL,0,0,0,0,0},
217 {"sync",syncCommand,1,"ars",0,NULL,0,0,0,0,0},
218 {"flushdb",flushdbCommand,1,"w",0,NULL,0,0,0,0,0},
219 {"flushall",flushallCommand,1,"w",0,NULL,0,0,0,0,0},
220 {"sort",sortCommand,-2,"wmS",0,NULL,1,1,1,0,0},
221 {"info",infoCommand,-1,"r",0,NULL,0,0,0,0,0},
222 {"monitor",monitorCommand,1,"ars",0,NULL,0,0,0,0,0},
223 {"ttl",ttlCommand,2,"r",0,NULL,1,1,1,0,0},
224 {"pttl",pttlCommand,2,"r",0,NULL,1,1,1,0,0},
225 {"persist",persistCommand,2,"w",0,NULL,1,1,1,0,0},
226 {"slaveof",slaveofCommand,3,"as",0,NULL,0,0,0,0,0},
227 {"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0},
228 {"config",configCommand,-2,"ar",0,NULL,0,0,0,0,0},
229 {"subscribe",subscribeCommand,-2,"rps",0,NULL,0,0,0,0,0},
230 {"unsubscribe",unsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0},
231 {"psubscribe",psubscribeCommand,-2,"rps",0,NULL,0,0,0,0,0},
232 {"punsubscribe",punsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0},
233 {"publish",publishCommand,3,"pf",0,NULL,0,0,0,0,0},
234 {"watch",watchCommand,-2,"rs",0,noPreloadGetKeys,1,-1,1,0,0},
235 {"unwatch",unwatchCommand,1,"rs",0,NULL,0,0,0,0,0},
236 {"cluster",clusterCommand,-2,"ar",0,NULL,0,0,0,0,0},
237 {"restore",restoreCommand,4,"awm",0,NULL,1,1,1,0,0},
238 {"migrate",migrateCommand,6,"aw",0,NULL,0,0,0,0,0},
239 {"asking",askingCommand,1,"r",0,NULL,0,0,0,0,0},
240 {"dump",dumpCommand,2,"ar",0,NULL,1,1,1,0,0},
241 {"object",objectCommand,-2,"r",0,NULL,2,2,2,0,0},
242 {"client",clientCommand,-2,"ar",0,NULL,0,0,0,0,0},
243 {"eval",evalCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0},
244 {"evalsha",evalShaCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0},
245 {"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0},
246 {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0},
247 {"time",timeCommand,1,"rR",0,NULL,0,0,0,0,0}
248 };
249
250 /*============================ Utility functions ============================ */
251
252 /* Low level logging. To use only for very big messages, otherwise
253 * redisLog() is to prefer. */
254 void redisLogRaw(int level, const char *msg) {
255 const int syslogLevelMap[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE, LOG_WARNING };
256 const char *c = ".-*#";
257 FILE *fp;
258 char buf[64];
259 int rawmode = (level & REDIS_LOG_RAW);
260
261 level &= 0xff; /* clear flags */
262 if (level < server.verbosity) return;
263
264 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
265 if (!fp) return;
266
267 if (rawmode) {
268 fprintf(fp,"%s",msg);
269 } else {
270 int off;
271 struct timeval tv;
272
273 gettimeofday(&tv,NULL);
274 off = strftime(buf,sizeof(buf),"%d %b %H:%M:%S.",localtime(&tv.tv_sec));
275 snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000);
276 fprintf(fp,"[%d] %s %c %s\n",(int)getpid(),buf,c[level],msg);
277 }
278 fflush(fp);
279
280 if (server.logfile) fclose(fp);
281
282 if (server.syslog_enabled) syslog(syslogLevelMap[level], "%s", msg);
283 }
284
285 /* Like redisLogRaw() but with printf-alike support. This is the funciton that
286 * is used across the code. The raw version is only used in order to dump
287 * the INFO output on crash. */
288 void redisLog(int level, const char *fmt, ...) {
289 va_list ap;
290 char msg[REDIS_MAX_LOGMSG_LEN];
291
292 if ((level&0xff) < server.verbosity) return;
293
294 va_start(ap, fmt);
295 vsnprintf(msg, sizeof(msg), fmt, ap);
296 va_end(ap);
297
298 redisLogRaw(level,msg);
299 }
300
301 /* Log a fixed message without printf-alike capabilities, in a way that is
302 * safe to call from a signal handler.
303 *
304 * We actually use this only for signals that are not fatal from the point
305 * of view of Redis. Signals that are going to kill the server anyway and
306 * where we need printf-alike features are served by redisLog(). */
307 void redisLogFromHandler(int level, const char *msg) {
308 int fd;
309 char buf[64];
310
311 if ((level&0xff) < server.verbosity ||
312 (server.logfile == NULL && server.daemonize)) return;
313 fd = server.logfile ?
314 open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644) :
315 STDOUT_FILENO;
316 if (fd == -1) return;
317 ll2string(buf,sizeof(buf),getpid());
318 if (write(fd,"[",1) == -1) goto err;
319 if (write(fd,buf,strlen(buf)) == -1) goto err;
320 if (write(fd," | signal handler] (",20) == -1) goto err;
321 ll2string(buf,sizeof(buf),time(NULL));
322 if (write(fd,buf,strlen(buf)) == -1) goto err;
323 if (write(fd,") ",2) == -1) goto err;
324 if (write(fd,msg,strlen(msg)) == -1) goto err;
325 if (write(fd,"\n",1) == -1) goto err;
326 err:
327 if (server.logfile) close(fd);
328 }
329
330 /* Redis generally does not try to recover from out of memory conditions
331 * when allocating objects or strings, it is not clear if it will be possible
332 * to report this condition to the client since the networking layer itself
333 * is based on heap allocation for send buffers, so we simply abort.
334 * At least the code will be simpler to read... */
335 void oom(const char *msg) {
336 redisLog(REDIS_WARNING, "%s: Out of memory\n",msg);
337 sleep(1);
338 abort();
339 }
340
341 /* Return the UNIX time in microseconds */
342 long long ustime(void) {
343 struct timeval tv;
344 long long ust;
345
346 gettimeofday(&tv, NULL);
347 ust = ((long long)tv.tv_sec)*1000000;
348 ust += tv.tv_usec;
349 return ust;
350 }
351
352 /* Return the UNIX time in milliseconds */
353 long long mstime(void) {
354 return ustime()/1000;
355 }
356
357 /* After an RDB dump or AOF rewrite we exit from children using _exit() instead of
358 * exit(), because the latter may interact with the same file objects used by
359 * the parent process. However if we are testing the coverage normal exit() is
360 * used in order to obtain the right coverage information. */
361 void exitFromChild(int retcode) {
362 #ifdef COVERAGE_TEST
363 exit(retcode);
364 #else
365 _exit(retcode);
366 #endif
367 }
368
369 /*====================== Hash table type implementation ==================== */
370
371 /* This is an hash table type that uses the SDS dynamic strings libary as
372 * keys and radis objects as values (objects can hold SDS strings,
373 * lists, sets). */
374
375 void dictVanillaFree(void *privdata, void *val)
376 {
377 DICT_NOTUSED(privdata);
378 zfree(val);
379 }
380
381 void dictListDestructor(void *privdata, void *val)
382 {
383 DICT_NOTUSED(privdata);
384 listRelease((list*)val);
385 }
386
387 int dictSdsKeyCompare(void *privdata, const void *key1,
388 const void *key2)
389 {
390 int l1,l2;
391 DICT_NOTUSED(privdata);
392
393 l1 = sdslen((sds)key1);
394 l2 = sdslen((sds)key2);
395 if (l1 != l2) return 0;
396 return memcmp(key1, key2, l1) == 0;
397 }
398
399 /* A case insensitive version used for the command lookup table. */
400 int dictSdsKeyCaseCompare(void *privdata, const void *key1,
401 const void *key2)
402 {
403 DICT_NOTUSED(privdata);
404
405 return strcasecmp(key1, key2) == 0;
406 }
407
408 void dictRedisObjectDestructor(void *privdata, void *val)
409 {
410 DICT_NOTUSED(privdata);
411
412 if (val == NULL) return; /* Values of swapped out keys as set to NULL */
413 decrRefCount(val);
414 }
415
416 void dictSdsDestructor(void *privdata, void *val)
417 {
418 DICT_NOTUSED(privdata);
419
420 sdsfree(val);
421 }
422
423 int dictObjKeyCompare(void *privdata, const void *key1,
424 const void *key2)
425 {
426 const robj *o1 = key1, *o2 = key2;
427 return dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
428 }
429
430 unsigned int dictObjHash(const void *key) {
431 const robj *o = key;
432 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
433 }
434
435 unsigned int dictSdsHash(const void *key) {
436 return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
437 }
438
439 unsigned int dictSdsCaseHash(const void *key) {
440 return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key));
441 }
442
443 int dictEncObjKeyCompare(void *privdata, const void *key1,
444 const void *key2)
445 {
446 robj *o1 = (robj*) key1, *o2 = (robj*) key2;
447 int cmp;
448
449 if (o1->encoding == REDIS_ENCODING_INT &&
450 o2->encoding == REDIS_ENCODING_INT)
451 return o1->ptr == o2->ptr;
452
453 o1 = getDecodedObject(o1);
454 o2 = getDecodedObject(o2);
455 cmp = dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
456 decrRefCount(o1);
457 decrRefCount(o2);
458 return cmp;
459 }
460
461 unsigned int dictEncObjHash(const void *key) {
462 robj *o = (robj*) key;
463
464 if (o->encoding == REDIS_ENCODING_RAW) {
465 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
466 } else {
467 if (o->encoding == REDIS_ENCODING_INT) {
468 char buf[32];
469 int len;
470
471 len = ll2string(buf,32,(long)o->ptr);
472 return dictGenHashFunction((unsigned char*)buf, len);
473 } else {
474 unsigned int hash;
475
476 o = getDecodedObject(o);
477 hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
478 decrRefCount(o);
479 return hash;
480 }
481 }
482 }
483
484 /* Sets type hash table */
485 dictType setDictType = {
486 dictEncObjHash, /* hash function */
487 NULL, /* key dup */
488 NULL, /* val dup */
489 dictEncObjKeyCompare, /* key compare */
490 dictRedisObjectDestructor, /* key destructor */
491 NULL /* val destructor */
492 };
493
494 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
495 dictType zsetDictType = {
496 dictEncObjHash, /* hash function */
497 NULL, /* key dup */
498 NULL, /* val dup */
499 dictEncObjKeyCompare, /* key compare */
500 dictRedisObjectDestructor, /* key destructor */
501 NULL /* val destructor */
502 };
503
504 /* Db->dict, keys are sds strings, vals are Redis objects. */
505 dictType dbDictType = {
506 dictSdsHash, /* hash function */
507 NULL, /* key dup */
508 NULL, /* val dup */
509 dictSdsKeyCompare, /* key compare */
510 dictSdsDestructor, /* key destructor */
511 dictRedisObjectDestructor /* val destructor */
512 };
513
514 /* Db->expires */
515 dictType keyptrDictType = {
516 dictSdsHash, /* hash function */
517 NULL, /* key dup */
518 NULL, /* val dup */
519 dictSdsKeyCompare, /* key compare */
520 NULL, /* key destructor */
521 NULL /* val destructor */
522 };
523
524 /* Command table. sds string -> command struct pointer. */
525 dictType commandTableDictType = {
526 dictSdsCaseHash, /* hash function */
527 NULL, /* key dup */
528 NULL, /* val dup */
529 dictSdsKeyCaseCompare, /* key compare */
530 dictSdsDestructor, /* key destructor */
531 NULL /* val destructor */
532 };
533
534 /* Hash type hash table (note that small hashes are represented with zimpaps) */
535 dictType hashDictType = {
536 dictEncObjHash, /* hash function */
537 NULL, /* key dup */
538 NULL, /* val dup */
539 dictEncObjKeyCompare, /* key compare */
540 dictRedisObjectDestructor, /* key destructor */
541 dictRedisObjectDestructor /* val destructor */
542 };
543
544 /* Keylist hash table type has unencoded redis objects as keys and
545 * lists as values. It's used for blocking operations (BLPOP) and to
546 * map swapped keys to a list of clients waiting for this keys to be loaded. */
547 dictType keylistDictType = {
548 dictObjHash, /* hash function */
549 NULL, /* key dup */
550 NULL, /* val dup */
551 dictObjKeyCompare, /* key compare */
552 dictRedisObjectDestructor, /* key destructor */
553 dictListDestructor /* val destructor */
554 };
555
556 /* Cluster nodes hash table, mapping nodes addresses 1.2.3.4:6379 to
557 * clusterNode structures. */
558 dictType clusterNodesDictType = {
559 dictSdsHash, /* hash function */
560 NULL, /* key dup */
561 NULL, /* val dup */
562 dictSdsKeyCompare, /* key compare */
563 dictSdsDestructor, /* key destructor */
564 NULL /* val destructor */
565 };
566
567 int htNeedsResize(dict *dict) {
568 long long size, used;
569
570 size = dictSlots(dict);
571 used = dictSize(dict);
572 return (size && used && size > DICT_HT_INITIAL_SIZE &&
573 (used*100/size < REDIS_HT_MINFILL));
574 }
575
576 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
577 * we resize the hash table to save memory */
578 void tryResizeHashTables(void) {
579 int j;
580
581 for (j = 0; j < server.dbnum; j++) {
582 if (htNeedsResize(server.db[j].dict))
583 dictResize(server.db[j].dict);
584 if (htNeedsResize(server.db[j].expires))
585 dictResize(server.db[j].expires);
586 }
587 }
588
589 /* Our hash table implementation performs rehashing incrementally while
590 * we write/read from the hash table. Still if the server is idle, the hash
591 * table will use two tables for a long time. So we try to use 1 millisecond
592 * of CPU time at every serverCron() loop in order to rehash some key. */
593 void incrementallyRehash(void) {
594 int j;
595
596 for (j = 0; j < server.dbnum; j++) {
597 if (dictIsRehashing(server.db[j].dict)) {
598 dictRehashMilliseconds(server.db[j].dict,1);
599 break; /* already used our millisecond for this loop... */
600 }
601 }
602 }
603
604 /* This function is called once a background process of some kind terminates,
605 * as we want to avoid resizing the hash tables when there is a child in order
606 * to play well with copy-on-write (otherwise when a resize happens lots of
607 * memory pages are copied). The goal of this function is to update the ability
608 * for dict.c to resize the hash tables accordingly to the fact we have o not
609 * running childs. */
610 void updateDictResizePolicy(void) {
611 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1)
612 dictEnableResize();
613 else
614 dictDisableResize();
615 }
616
617 /* ======================= Cron: called every 100 ms ======================== */
618
619 /* Try to expire a few timed out keys. The algorithm used is adaptive and
620 * will use few CPU cycles if there are few expiring keys, otherwise
621 * it will get more aggressive to avoid that too much memory is used by
622 * keys that can be removed from the keyspace. */
623 void activeExpireCycle(void) {
624 int j;
625 long long start = mstime(), timelimit;
626
627 /* We can use at max REDIS_EXPIRELOOKUPS_TIME_PERC percentage of CPU time
628 * per iteration. Since this function gets called with a frequency of
629 * REDIS_HZ times per second, the following is the max amount of
630 * milliseconds we can spend here: */
631 timelimit = (1000/REDIS_HZ/100)*REDIS_EXPIRELOOKUPS_TIME_PERC;
632 if (timelimit <= 0) timelimit = 1;
633
634 for (j = 0; j < server.dbnum; j++) {
635 int expired, iteration = 0;
636 redisDb *db = server.db+j;
637
638 /* Continue to expire if at the end of the cycle more than 25%
639 * of the keys were expired. */
640 do {
641 long num = dictSize(db->expires);
642 long long now = mstime();
643
644 expired = 0;
645 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
646 num = REDIS_EXPIRELOOKUPS_PER_CRON;
647 while (num--) {
648 dictEntry *de;
649 long long t;
650
651 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
652 t = dictGetSignedIntegerVal(de);
653 if (now > t) {
654 sds key = dictGetKey(de);
655 robj *keyobj = createStringObject(key,sdslen(key));
656
657 propagateExpire(db,keyobj);
658 dbDelete(db,keyobj);
659 decrRefCount(keyobj);
660 expired++;
661 server.stat_expiredkeys++;
662 }
663 }
664 /* We can't block forever here even if there are many keys to
665 * expire. So after a given amount of milliseconds return to the
666 * caller waiting for the other active expire cycle. */
667 iteration++;
668 if ((iteration & 0xff) == 0 && /* Check once every 255 iterations */
669 (mstime()-start) > timelimit) return;
670 } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
671 }
672 }
673
674 void updateLRUClock(void) {
675 server.lruclock = (server.unixtime/REDIS_LRU_CLOCK_RESOLUTION) &
676 REDIS_LRU_CLOCK_MAX;
677 }
678
679
680 /* Add a sample to the operations per second array of samples. */
681 void trackOperationsPerSecond(void) {
682 long long t = mstime() - server.ops_sec_last_sample_time;
683 long long ops = server.stat_numcommands - server.ops_sec_last_sample_ops;
684 long long ops_sec;
685
686 ops_sec = t > 0 ? (ops*1000/t) : 0;
687
688 server.ops_sec_samples[server.ops_sec_idx] = ops_sec;
689 server.ops_sec_idx = (server.ops_sec_idx+1) % REDIS_OPS_SEC_SAMPLES;
690 server.ops_sec_last_sample_time = mstime();
691 server.ops_sec_last_sample_ops = server.stat_numcommands;
692 }
693
694 /* Return the mean of all the samples. */
695 long long getOperationsPerSecond(void) {
696 int j;
697 long long sum = 0;
698
699 for (j = 0; j < REDIS_OPS_SEC_SAMPLES; j++)
700 sum += server.ops_sec_samples[j];
701 return sum / REDIS_OPS_SEC_SAMPLES;
702 }
703
704 /* Check for timeouts. Returns non-zero if the client was terminated */
705 int clientsCronHandleTimeout(redisClient *c) {
706 time_t now = server.unixtime;
707
708 if (server.maxidletime &&
709 !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
710 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
711 !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */
712 dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */
713 listLength(c->pubsub_patterns) == 0 &&
714 (now - c->lastinteraction > server.maxidletime))
715 {
716 redisLog(REDIS_VERBOSE,"Closing idle client");
717 freeClient(c);
718 return 1;
719 } else if (c->flags & REDIS_BLOCKED) {
720 if (c->bpop.timeout != 0 && c->bpop.timeout < now) {
721 addReply(c,shared.nullmultibulk);
722 unblockClientWaitingData(c);
723 }
724 }
725 return 0;
726 }
727
728 /* The client query buffer is an sds.c string that can end with a lot of
729 * free space not used, this function reclaims space if needed.
730 *
731 * The funciton always returns 0 as it never terminates the client. */
732 int clientsCronResizeQueryBuffer(redisClient *c) {
733 size_t querybuf_size = sdsAllocSize(c->querybuf);
734 time_t idletime = server.unixtime - c->lastinteraction;
735
736 /* There are two conditions to resize the query buffer:
737 * 1) Query buffer is > BIG_ARG and too big for latest peak.
738 * 2) Client is inactive and the buffer is bigger than 1k. */
739 if (((querybuf_size > REDIS_MBULK_BIG_ARG) &&
740 (querybuf_size/(c->querybuf_peak+1)) > 2) ||
741 (querybuf_size > 1024 && idletime > 2))
742 {
743 /* Only resize the query buffer if it is actually wasting space. */
744 if (sdsavail(c->querybuf) > 1024) {
745 c->querybuf = sdsRemoveFreeSpace(c->querybuf);
746 }
747 }
748 /* Reset the peak again to capture the peak memory usage in the next
749 * cycle. */
750 c->querybuf_peak = 0;
751 return 0;
752 }
753
754 void clientsCron(void) {
755 /* Make sure to process at least 1/(REDIS_HZ*10) of clients per call.
756 * Since this function is called REDIS_HZ times per second we are sure that
757 * in the worst case we process all the clients in 10 seconds.
758 * In normal conditions (a reasonable number of clients) we process
759 * all the clients in a shorter time. */
760 int numclients = listLength(server.clients);
761 int iterations = numclients/(REDIS_HZ*10);
762
763 if (iterations < 50)
764 iterations = (numclients < 50) ? numclients : 50;
765 while(listLength(server.clients) && iterations--) {
766 redisClient *c;
767 listNode *head;
768
769 /* Rotate the list, take the current head, process.
770 * This way if the client must be removed from the list it's the
771 * first element and we don't incur into O(N) computation. */
772 listRotate(server.clients);
773 head = listFirst(server.clients);
774 c = listNodeValue(head);
775 /* The following functions do different service checks on the client.
776 * The protocol is that they return non-zero if the client was
777 * terminated. */
778 if (clientsCronHandleTimeout(c)) continue;
779 if (clientsCronResizeQueryBuffer(c)) continue;
780 }
781 }
782
783 /* This is our timer interrupt, called REDIS_HZ times per second.
784 * Here is where we do a number of things that need to be done asynchronously.
785 * For instance:
786 *
787 * - Active expired keys collection (it is also performed in a lazy way on
788 * lookup).
789 * - Software watchdong.
790 * - Update some statistic.
791 * - Incremental rehashing of the DBs hash tables.
792 * - Triggering BGSAVE / AOF rewrite, and handling of terminated children.
793 * - Clients timeout of differnet kinds.
794 * - Replication reconnection.
795 * - Many more...
796 *
797 * Everything directly called here will be called REDIS_HZ times per second,
798 * so in order to throttle execution of things we want to do less frequently
799 * a macro is used: run_with_period(milliseconds) { .... }
800 */
801
802 /* Using the following macro you can run code inside serverCron() with the
803 * specified period, specified in milliseconds.
804 * The actual resolution depends on REDIS_HZ. */
805 #define run_with_period(_ms_) if (!(loops % ((_ms_)/(1000/REDIS_HZ))))
806
807 int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
808 int j, loops = server.cronloops;
809 REDIS_NOTUSED(eventLoop);
810 REDIS_NOTUSED(id);
811 REDIS_NOTUSED(clientData);
812
813 /* Software watchdog: deliver the SIGALRM that will reach the signal
814 * handler if we don't return here fast enough. */
815 if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period);
816
817 /* We take a cached value of the unix time in the global state because
818 * with virtual memory and aging there is to store the current time
819 * in objects at every object access, and accuracy is not needed.
820 * To access a global var is faster than calling time(NULL) */
821 server.unixtime = time(NULL);
822
823 run_with_period(100) trackOperationsPerSecond();
824
825 /* We have just 22 bits per object for LRU information.
826 * So we use an (eventually wrapping) LRU clock with 10 seconds resolution.
827 * 2^22 bits with 10 seconds resoluton is more or less 1.5 years.
828 *
829 * Note that even if this will wrap after 1.5 years it's not a problem,
830 * everything will still work but just some object will appear younger
831 * to Redis. But for this to happen a given object should never be touched
832 * for 1.5 years.
833 *
834 * Note that you can change the resolution altering the
835 * REDIS_LRU_CLOCK_RESOLUTION define.
836 */
837 updateLRUClock();
838
839 /* Record the max memory used since the server was started. */
840 if (zmalloc_used_memory() > server.stat_peak_memory)
841 server.stat_peak_memory = zmalloc_used_memory();
842
843 /* We received a SIGTERM, shutting down here in a safe way, as it is
844 * not ok doing so inside the signal handler. */
845 if (server.shutdown_asap) {
846 if (prepareForShutdown(0) == REDIS_OK) exit(0);
847 redisLog(REDIS_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
848 }
849
850 /* Show some info about non-empty databases */
851 run_with_period(5000) {
852 for (j = 0; j < server.dbnum; j++) {
853 long long size, used, vkeys;
854
855 size = dictSlots(server.db[j].dict);
856 used = dictSize(server.db[j].dict);
857 vkeys = dictSize(server.db[j].expires);
858 if (used || vkeys) {
859 redisLog(REDIS_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
860 /* dictPrintStats(server.dict); */
861 }
862 }
863 }
864
865 /* We don't want to resize the hash tables while a bacground saving
866 * is in progress: the saving child is created using fork() that is
867 * implemented with a copy-on-write semantic in most modern systems, so
868 * if we resize the HT while there is the saving child at work actually
869 * a lot of memory movements in the parent will cause a lot of pages
870 * copied. */
871 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1) {
872 run_with_period(1000) tryResizeHashTables();
873 if (server.activerehashing) incrementallyRehash();
874 }
875
876 /* Show information about connected clients */
877 run_with_period(5000) {
878 redisLog(REDIS_VERBOSE,"%d clients connected (%d slaves), %zu bytes in use",
879 listLength(server.clients)-listLength(server.slaves),
880 listLength(server.slaves),
881 zmalloc_used_memory());
882 }
883
884 /* We need to do a few operations on clients asynchronously. */
885 clientsCron();
886
887 /* Start a scheduled AOF rewrite if this was requested by the user while
888 * a BGSAVE was in progress. */
889 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1 &&
890 server.aof_rewrite_scheduled)
891 {
892 rewriteAppendOnlyFileBackground();
893 }
894
895 /* Check if a background saving or AOF rewrite in progress terminated. */
896 if (server.rdb_child_pid != -1 || server.aof_child_pid != -1) {
897 int statloc;
898 pid_t pid;
899
900 if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
901 int exitcode = WEXITSTATUS(statloc);
902 int bysignal = 0;
903
904 if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
905
906 if (pid == server.rdb_child_pid) {
907 backgroundSaveDoneHandler(exitcode,bysignal);
908 } else {
909 backgroundRewriteDoneHandler(exitcode,bysignal);
910 }
911 updateDictResizePolicy();
912 }
913 } else {
914 /* If there is not a background saving/rewrite in progress check if
915 * we have to save/rewrite now */
916 for (j = 0; j < server.saveparamslen; j++) {
917 struct saveparam *sp = server.saveparams+j;
918
919 if (server.dirty >= sp->changes &&
920 server.unixtime-server.lastsave > sp->seconds) {
921 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
922 sp->changes, sp->seconds);
923 rdbSaveBackground(server.rdb_filename);
924 break;
925 }
926 }
927
928 /* Trigger an AOF rewrite if needed */
929 if (server.rdb_child_pid == -1 &&
930 server.aof_child_pid == -1 &&
931 server.aof_rewrite_perc &&
932 server.aof_current_size > server.aof_rewrite_min_size)
933 {
934 long long base = server.aof_rewrite_base_size ?
935 server.aof_rewrite_base_size : 1;
936 long long growth = (server.aof_current_size*100/base) - 100;
937 if (growth >= server.aof_rewrite_perc) {
938 redisLog(REDIS_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
939 rewriteAppendOnlyFileBackground();
940 }
941 }
942 }
943
944
945 /* If we postponed an AOF buffer flush, let's try to do it every time the
946 * cron function is called. */
947 if (server.aof_flush_postponed_start) flushAppendOnlyFile(0);
948
949 /* Expire a few keys per cycle, only if this is a master.
950 * On slaves we wait for DEL operations synthesized by the master
951 * in order to guarantee a strict consistency. */
952 if (server.masterhost == NULL) activeExpireCycle();
953
954 /* Close clients that need to be closed asynchronous */
955 freeClientsInAsyncFreeQueue();
956
957 /* Replication cron function -- used to reconnect to master and
958 * to detect transfer failures. */
959 run_with_period(1000) replicationCron();
960
961 /* Run other sub-systems specific cron jobs */
962 run_with_period(1000) {
963 if (server.cluster_enabled) clusterCron();
964 }
965
966 server.cronloops++;
967 return 1000/REDIS_HZ;
968 }
969
970 /* This function gets called every time Redis is entering the
971 * main loop of the event driven library, that is, before to sleep
972 * for ready file descriptors. */
973 void beforeSleep(struct aeEventLoop *eventLoop) {
974 REDIS_NOTUSED(eventLoop);
975 listNode *ln;
976 redisClient *c;
977
978 /* Try to process pending commands for clients that were just unblocked. */
979 while (listLength(server.unblocked_clients)) {
980 ln = listFirst(server.unblocked_clients);
981 redisAssert(ln != NULL);
982 c = ln->value;
983 listDelNode(server.unblocked_clients,ln);
984 c->flags &= ~REDIS_UNBLOCKED;
985
986 /* Process remaining data in the input buffer. */
987 if (c->querybuf && sdslen(c->querybuf) > 0) {
988 server.current_client = c;
989 processInputBuffer(c);
990 server.current_client = NULL;
991 }
992 }
993
994 /* Write the AOF buffer on disk */
995 flushAppendOnlyFile(0);
996 }
997
998 /* =========================== Server initialization ======================== */
999
1000 void createSharedObjects(void) {
1001 int j;
1002
1003 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
1004 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
1005 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
1006 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
1007 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
1008 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
1009 shared.cnegone = createObject(REDIS_STRING,sdsnew(":-1\r\n"));
1010 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
1011 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
1012 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
1013 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
1014 shared.queued = createObject(REDIS_STRING,sdsnew("+QUEUED\r\n"));
1015 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
1016 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1017 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
1018 "-ERR no such key\r\n"));
1019 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
1020 "-ERR syntax error\r\n"));
1021 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
1022 "-ERR source and destination objects are the same\r\n"));
1023 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
1024 "-ERR index out of range\r\n"));
1025 shared.noscripterr = createObject(REDIS_STRING,sdsnew(
1026 "-NOSCRIPT No matching script. Please use EVAL.\r\n"));
1027 shared.loadingerr = createObject(REDIS_STRING,sdsnew(
1028 "-LOADING Redis is loading the dataset in memory\r\n"));
1029 shared.slowscripterr = createObject(REDIS_STRING,sdsnew(
1030 "-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n"));
1031 shared.masterdownerr = createObject(REDIS_STRING,sdsnew(
1032 "-MASTERDOWN Link with MASTER is down and slave-serve-stale-data is set to 'no'.\r\n"));
1033 shared.bgsaveerr = createObject(REDIS_STRING,sdsnew(
1034 "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error.\r\n"));
1035 shared.roslaveerr = createObject(REDIS_STRING,sdsnew(
1036 "-READONLY You can't write against a read only slave.\r\n"));
1037 shared.oomerr = createObject(REDIS_STRING,sdsnew(
1038 "-OOM command not allowed when used memory > 'maxmemory'.\r\n"));
1039 shared.space = createObject(REDIS_STRING,sdsnew(" "));
1040 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
1041 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
1042
1043 for (j = 0; j < REDIS_SHARED_SELECT_CMDS; j++) {
1044 shared.select[j] = createObject(REDIS_STRING,
1045 sdscatprintf(sdsempty(),"select %d\r\n", j));
1046 }
1047 shared.messagebulk = createStringObject("$7\r\nmessage\r\n",13);
1048 shared.pmessagebulk = createStringObject("$8\r\npmessage\r\n",14);
1049 shared.subscribebulk = createStringObject("$9\r\nsubscribe\r\n",15);
1050 shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18);
1051 shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
1052 shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
1053 shared.del = createStringObject("DEL",3);
1054 shared.rpop = createStringObject("RPOP",4);
1055 shared.lpop = createStringObject("LPOP",4);
1056 for (j = 0; j < REDIS_SHARED_INTEGERS; j++) {
1057 shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j);
1058 shared.integers[j]->encoding = REDIS_ENCODING_INT;
1059 }
1060 for (j = 0; j < REDIS_SHARED_BULKHDR_LEN; j++) {
1061 shared.mbulkhdr[j] = createObject(REDIS_STRING,
1062 sdscatprintf(sdsempty(),"*%d\r\n",j));
1063 shared.bulkhdr[j] = createObject(REDIS_STRING,
1064 sdscatprintf(sdsempty(),"$%d\r\n",j));
1065 }
1066 }
1067
1068 void initServerConfig() {
1069 getRandomHexChars(server.runid,REDIS_RUN_ID_SIZE);
1070 server.runid[REDIS_RUN_ID_SIZE] = '\0';
1071 server.arch_bits = (sizeof(long) == 8) ? 64 : 32;
1072 server.port = REDIS_SERVERPORT;
1073 server.bindaddr = NULL;
1074 server.unixsocket = NULL;
1075 server.unixsocketperm = 0;
1076 server.ipfd = -1;
1077 server.sofd = -1;
1078 server.dbnum = REDIS_DEFAULT_DBNUM;
1079 server.verbosity = REDIS_NOTICE;
1080 server.maxidletime = REDIS_MAXIDLETIME;
1081 server.client_max_querybuf_len = REDIS_MAX_QUERYBUF_LEN;
1082 server.saveparams = NULL;
1083 server.loading = 0;
1084 server.logfile = NULL; /* NULL = log on standard output */
1085 server.syslog_enabled = 0;
1086 server.syslog_ident = zstrdup("redis");
1087 server.syslog_facility = LOG_LOCAL0;
1088 server.daemonize = 0;
1089 server.aof_state = REDIS_AOF_OFF;
1090 server.aof_fsync = AOF_FSYNC_EVERYSEC;
1091 server.aof_no_fsync_on_rewrite = 0;
1092 server.aof_rewrite_perc = REDIS_AOF_REWRITE_PERC;
1093 server.aof_rewrite_min_size = REDIS_AOF_REWRITE_MIN_SIZE;
1094 server.aof_rewrite_base_size = 0;
1095 server.aof_rewrite_scheduled = 0;
1096 server.aof_last_fsync = time(NULL);
1097 server.aof_delayed_fsync = 0;
1098 server.aof_fd = -1;
1099 server.aof_selected_db = -1; /* Make sure the first time will not match */
1100 server.aof_flush_postponed_start = 0;
1101 server.pidfile = zstrdup("/var/run/redis.pid");
1102 server.rdb_filename = zstrdup("dump.rdb");
1103 server.aof_filename = zstrdup("appendonly.aof");
1104 server.requirepass = NULL;
1105 server.rdb_compression = 1;
1106 server.rdb_checksum = 1;
1107 server.activerehashing = 1;
1108 server.maxclients = REDIS_MAX_CLIENTS;
1109 server.bpop_blocked_clients = 0;
1110 server.maxmemory = 0;
1111 server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU;
1112 server.maxmemory_samples = 3;
1113 server.hash_max_ziplist_entries = REDIS_HASH_MAX_ZIPLIST_ENTRIES;
1114 server.hash_max_ziplist_value = REDIS_HASH_MAX_ZIPLIST_VALUE;
1115 server.list_max_ziplist_entries = REDIS_LIST_MAX_ZIPLIST_ENTRIES;
1116 server.list_max_ziplist_value = REDIS_LIST_MAX_ZIPLIST_VALUE;
1117 server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES;
1118 server.zset_max_ziplist_entries = REDIS_ZSET_MAX_ZIPLIST_ENTRIES;
1119 server.zset_max_ziplist_value = REDIS_ZSET_MAX_ZIPLIST_VALUE;
1120 server.shutdown_asap = 0;
1121 server.repl_ping_slave_period = REDIS_REPL_PING_SLAVE_PERIOD;
1122 server.repl_timeout = REDIS_REPL_TIMEOUT;
1123 server.cluster_enabled = 0;
1124 server.cluster.configfile = zstrdup("nodes.conf");
1125 server.lua_caller = NULL;
1126 server.lua_time_limit = REDIS_LUA_TIME_LIMIT;
1127 server.lua_client = NULL;
1128 server.lua_timedout = 0;
1129
1130 updateLRUClock();
1131 resetServerSaveParams();
1132
1133 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1134 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1135 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1136 /* Replication related */
1137 server.masterauth = NULL;
1138 server.masterhost = NULL;
1139 server.masterport = 6379;
1140 server.master = NULL;
1141 server.repl_state = REDIS_REPL_NONE;
1142 server.repl_syncio_timeout = REDIS_REPL_SYNCIO_TIMEOUT;
1143 server.repl_serve_stale_data = 1;
1144 server.repl_slave_ro = 1;
1145 server.repl_down_since = time(NULL);
1146
1147 /* Client output buffer limits */
1148 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].hard_limit_bytes = 0;
1149 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].soft_limit_bytes = 0;
1150 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].soft_limit_seconds = 0;
1151 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].hard_limit_bytes = 1024*1024*256;
1152 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].soft_limit_bytes = 1024*1024*64;
1153 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].soft_limit_seconds = 60;
1154 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].hard_limit_bytes = 1024*1024*32;
1155 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].soft_limit_bytes = 1024*1024*8;
1156 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].soft_limit_seconds = 60;
1157
1158 /* Double constants initialization */
1159 R_Zero = 0.0;
1160 R_PosInf = 1.0/R_Zero;
1161 R_NegInf = -1.0/R_Zero;
1162 R_Nan = R_Zero/R_Zero;
1163
1164 /* Command table -- we intiialize it here as it is part of the
1165 * initial configuration, since command names may be changed via
1166 * redis.conf using the rename-command directive. */
1167 server.commands = dictCreate(&commandTableDictType,NULL);
1168 populateCommandTable();
1169 server.delCommand = lookupCommandByCString("del");
1170 server.multiCommand = lookupCommandByCString("multi");
1171 server.lpushCommand = lookupCommandByCString("lpush");
1172
1173 /* Slow log */
1174 server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN;
1175 server.slowlog_max_len = REDIS_SLOWLOG_MAX_LEN;
1176
1177 /* Debugging */
1178 server.assert_failed = "<no assertion failed>";
1179 server.assert_file = "<no file>";
1180 server.assert_line = 0;
1181 server.bug_report_start = 0;
1182 server.watchdog_period = 0;
1183 }
1184
1185 /* This function will try to raise the max number of open files accordingly to
1186 * the configured max number of clients. It will also account for 32 additional
1187 * file descriptors as we need a few more for persistence, listening
1188 * sockets, log files and so forth.
1189 *
1190 * If it will not be possible to set the limit accordingly to the configured
1191 * max number of clients, the function will do the reverse setting
1192 * server.maxclients to the value that we can actually handle. */
1193 void adjustOpenFilesLimit(void) {
1194 rlim_t maxfiles = server.maxclients+32;
1195 struct rlimit limit;
1196
1197 if (getrlimit(RLIMIT_NOFILE,&limit) == -1) {
1198 redisLog(REDIS_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.",
1199 strerror(errno));
1200 server.maxclients = 1024-32;
1201 } else {
1202 rlim_t oldlimit = limit.rlim_cur;
1203
1204 /* Set the max number of files if the current limit is not enough
1205 * for our needs. */
1206 if (oldlimit < maxfiles) {
1207 rlim_t f;
1208
1209 f = maxfiles;
1210 while(f > oldlimit) {
1211 limit.rlim_cur = f;
1212 limit.rlim_max = f;
1213 if (setrlimit(RLIMIT_NOFILE,&limit) != -1) break;
1214 f -= 128;
1215 }
1216 if (f < oldlimit) f = oldlimit;
1217 if (f != maxfiles) {
1218 server.maxclients = f-32;
1219 redisLog(REDIS_WARNING,"Unable to set the max number of files limit to %d (%s), setting the max clients configuration to %d.",
1220 (int) maxfiles, strerror(errno), (int) server.maxclients);
1221 } else {
1222 redisLog(REDIS_NOTICE,"Max number of open files set to %d",
1223 (int) maxfiles);
1224 }
1225 }
1226 }
1227 }
1228
1229 void initServer() {
1230 int j;
1231
1232 signal(SIGHUP, SIG_IGN);
1233 signal(SIGPIPE, SIG_IGN);
1234 setupSignalHandlers();
1235
1236 if (server.syslog_enabled) {
1237 openlog(server.syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT,
1238 server.syslog_facility);
1239 }
1240
1241 server.current_client = NULL;
1242 server.clients = listCreate();
1243 server.clients_to_close = listCreate();
1244 server.slaves = listCreate();
1245 server.monitors = listCreate();
1246 server.unblocked_clients = listCreate();
1247
1248 createSharedObjects();
1249 adjustOpenFilesLimit();
1250 server.el = aeCreateEventLoop(server.maxclients+1024);
1251 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
1252
1253 if (server.port != 0) {
1254 server.ipfd = anetTcpServer(server.neterr,server.port,server.bindaddr);
1255 if (server.ipfd == ANET_ERR) {
1256 redisLog(REDIS_WARNING, "Opening port %d: %s",
1257 server.port, server.neterr);
1258 exit(1);
1259 }
1260 }
1261 if (server.unixsocket != NULL) {
1262 unlink(server.unixsocket); /* don't care if this fails */
1263 server.sofd = anetUnixServer(server.neterr,server.unixsocket,server.unixsocketperm);
1264 if (server.sofd == ANET_ERR) {
1265 redisLog(REDIS_WARNING, "Opening socket: %s", server.neterr);
1266 exit(1);
1267 }
1268 }
1269 if (server.ipfd < 0 && server.sofd < 0) {
1270 redisLog(REDIS_WARNING, "Configured to not listen anywhere, exiting.");
1271 exit(1);
1272 }
1273 for (j = 0; j < server.dbnum; j++) {
1274 server.db[j].dict = dictCreate(&dbDictType,NULL);
1275 server.db[j].expires = dictCreate(&keyptrDictType,NULL);
1276 server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
1277 server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
1278 server.db[j].id = j;
1279 }
1280 server.pubsub_channels = dictCreate(&keylistDictType,NULL);
1281 server.pubsub_patterns = listCreate();
1282 listSetFreeMethod(server.pubsub_patterns,freePubsubPattern);
1283 listSetMatchMethod(server.pubsub_patterns,listMatchPubsubPattern);
1284 server.cronloops = 0;
1285 server.rdb_child_pid = -1;
1286 server.aof_child_pid = -1;
1287 server.aof_rewrite_buf = sdsempty();
1288 server.aof_buf = sdsempty();
1289 server.lastsave = time(NULL);
1290 server.dirty = 0;
1291 server.stat_numcommands = 0;
1292 server.stat_numconnections = 0;
1293 server.stat_expiredkeys = 0;
1294 server.stat_evictedkeys = 0;
1295 server.stat_starttime = time(NULL);
1296 server.stat_keyspace_misses = 0;
1297 server.stat_keyspace_hits = 0;
1298 server.stat_peak_memory = 0;
1299 server.stat_fork_time = 0;
1300 server.stat_rejected_conn = 0;
1301 memset(server.ops_sec_samples,0,sizeof(server.ops_sec_samples));
1302 server.ops_sec_idx = 0;
1303 server.ops_sec_last_sample_time = mstime();
1304 server.ops_sec_last_sample_ops = 0;
1305 server.unixtime = time(NULL);
1306 server.lastbgsave_status = REDIS_OK;
1307 server.stop_writes_on_bgsave_err = 1;
1308 aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
1309 if (server.ipfd > 0 && aeCreateFileEvent(server.el,server.ipfd,AE_READABLE,
1310 acceptTcpHandler,NULL) == AE_ERR) oom("creating file event");
1311 if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
1312 acceptUnixHandler,NULL) == AE_ERR) oom("creating file event");
1313
1314 if (server.aof_state == REDIS_AOF_ON) {
1315 server.aof_fd = open(server.aof_filename,
1316 O_WRONLY|O_APPEND|O_CREAT,0644);
1317 if (server.aof_fd == -1) {
1318 redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
1319 strerror(errno));
1320 exit(1);
1321 }
1322 }
1323
1324 /* 32 bit instances are limited to 4GB of address space, so if there is
1325 * no explicit limit in the user provided configuration we set a limit
1326 * at 3.5GB using maxmemory with 'noeviction' policy'. This saves
1327 * useless crashes of the Redis instance. */
1328 if (server.arch_bits == 32 && server.maxmemory == 0) {
1329 redisLog(REDIS_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3.5 GB maxmemory limit with 'noeviction' policy now.");
1330 server.maxmemory = 3584LL*(1024*1024); /* 3584 MB = 3.5 GB */
1331 server.maxmemory_policy = REDIS_MAXMEMORY_NO_EVICTION;
1332 }
1333
1334 if (server.cluster_enabled) clusterInit();
1335 scriptingInit();
1336 slowlogInit();
1337 bioInit();
1338 }
1339
1340 /* Populates the Redis Command Table starting from the hard coded list
1341 * we have on top of redis.c file. */
1342 void populateCommandTable(void) {
1343 int j;
1344 int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
1345
1346 for (j = 0; j < numcommands; j++) {
1347 struct redisCommand *c = redisCommandTable+j;
1348 char *f = c->sflags;
1349 int retval;
1350
1351 while(*f != '\0') {
1352 switch(*f) {
1353 case 'w': c->flags |= REDIS_CMD_WRITE; break;
1354 case 'r': c->flags |= REDIS_CMD_READONLY; break;
1355 case 'm': c->flags |= REDIS_CMD_DENYOOM; break;
1356 case 'a': c->flags |= REDIS_CMD_ADMIN; break;
1357 case 'p': c->flags |= REDIS_CMD_PUBSUB; break;
1358 case 'f': c->flags |= REDIS_CMD_FORCE_REPLICATION; break;
1359 case 's': c->flags |= REDIS_CMD_NOSCRIPT; break;
1360 case 'R': c->flags |= REDIS_CMD_RANDOM; break;
1361 case 'S': c->flags |= REDIS_CMD_SORT_FOR_SCRIPT; break;
1362 default: redisPanic("Unsupported command flag"); break;
1363 }
1364 f++;
1365 }
1366
1367 retval = dictAdd(server.commands, sdsnew(c->name), c);
1368 assert(retval == DICT_OK);
1369 }
1370 }
1371
1372 void resetCommandTableStats(void) {
1373 int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
1374 int j;
1375
1376 for (j = 0; j < numcommands; j++) {
1377 struct redisCommand *c = redisCommandTable+j;
1378
1379 c->microseconds = 0;
1380 c->calls = 0;
1381 }
1382 }
1383
1384 /* ========================== Redis OP Array API ============================ */
1385
1386 void redisOpArrayInit(redisOpArray *oa) {
1387 oa->ops = NULL;
1388 oa->numops = 0;
1389 }
1390
1391 int redisOpArrayAppend(redisOpArray *oa, struct redisCommand *cmd, int dbid,
1392 robj **argv, int argc, int target)
1393 {
1394 redisOp *op;
1395
1396 oa->ops = zrealloc(oa->ops,sizeof(redisOp)*(oa->numops+1));
1397 op = oa->ops+oa->numops;
1398 op->cmd = cmd;
1399 op->dbid = dbid;
1400 op->argv = argv;
1401 op->argc = argc;
1402 op->target = target;
1403 oa->numops++;
1404 return oa->numops;
1405 }
1406
1407 void redisOpArrayFree(redisOpArray *oa) {
1408 while(oa->numops) {
1409 int j;
1410 redisOp *op;
1411
1412 oa->numops--;
1413 op = oa->ops+oa->numops;
1414 for (j = 0; j < op->argc; j++)
1415 decrRefCount(op->argv[j]);
1416 zfree(op->argv);
1417 }
1418 zfree(oa->ops);
1419 }
1420
1421 /* ====================== Commands lookup and execution ===================== */
1422
1423 struct redisCommand *lookupCommand(sds name) {
1424 return dictFetchValue(server.commands, name);
1425 }
1426
1427 struct redisCommand *lookupCommandByCString(char *s) {
1428 struct redisCommand *cmd;
1429 sds name = sdsnew(s);
1430
1431 cmd = dictFetchValue(server.commands, name);
1432 sdsfree(name);
1433 return cmd;
1434 }
1435
1436 /* Propagate the specified command (in the context of the specified database id)
1437 * to AOF, Slaves and Monitors.
1438 *
1439 * flags are an xor between:
1440 * + REDIS_PROPAGATE_NONE (no propagation of command at all)
1441 * + REDIS_PROPAGATE_AOF (propagate into the AOF file if is enabled)
1442 * + REDIS_PROPAGATE_REPL (propagate into the replication link)
1443 */
1444 void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
1445 int flags)
1446 {
1447 if (server.aof_state != REDIS_AOF_OFF && flags & REDIS_PROPAGATE_AOF)
1448 feedAppendOnlyFile(cmd,dbid,argv,argc);
1449 if (flags & REDIS_PROPAGATE_REPL && listLength(server.slaves))
1450 replicationFeedSlaves(server.slaves,dbid,argv,argc);
1451 }
1452
1453 /* Used inside commands to schedule the propagation of additional commands
1454 * after the current command is propagated to AOF / Replication. */
1455 void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
1456 int target)
1457 {
1458 redisOpArrayAppend(&server.also_propagate,cmd,dbid,argv,argc,target);
1459 }
1460
1461 /* Call() is the core of Redis execution of a command */
1462 void call(redisClient *c, int flags) {
1463 long long dirty, start = ustime(), duration;
1464
1465 /* Sent the command to clients in MONITOR mode, only if the commands are
1466 * not geneated from reading an AOF. */
1467 if (listLength(server.monitors) && !server.loading)
1468 replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
1469
1470 /* Call the command. */
1471 redisOpArrayInit(&server.also_propagate);
1472 dirty = server.dirty;
1473 c->cmd->proc(c);
1474 dirty = server.dirty-dirty;
1475 duration = ustime()-start;
1476
1477 /* When EVAL is called loading the AOF we don't want commands called
1478 * from Lua to go into the slowlog or to populate statistics. */
1479 if (server.loading && c->flags & REDIS_LUA_CLIENT)
1480 flags &= ~(REDIS_CALL_SLOWLOG | REDIS_CALL_STATS);
1481
1482 /* Log the command into the Slow log if needed, and populate the
1483 * per-command statistics that we show in INFO commandstats. */
1484 if (flags & REDIS_CALL_SLOWLOG)
1485 slowlogPushEntryIfNeeded(c->argv,c->argc,duration);
1486 if (flags & REDIS_CALL_STATS) {
1487 c->cmd->microseconds += duration;
1488 c->cmd->calls++;
1489 }
1490
1491 /* Propagate the command into the AOF and replication link */
1492 if (flags & REDIS_CALL_PROPAGATE) {
1493 int flags = REDIS_PROPAGATE_NONE;
1494
1495 if (c->cmd->flags & REDIS_CMD_FORCE_REPLICATION)
1496 flags |= REDIS_PROPAGATE_REPL;
1497 if (dirty)
1498 flags |= (REDIS_PROPAGATE_REPL | REDIS_PROPAGATE_AOF);
1499 if (flags != REDIS_PROPAGATE_NONE)
1500 propagate(c->cmd,c->db->id,c->argv,c->argc,flags);
1501 }
1502 /* Commands such as LPUSH or BRPOPLPUSH may propagate an additional
1503 * PUSH command. */
1504 if (server.also_propagate.numops) {
1505 int j;
1506 redisOp *rop;
1507
1508 for (j = 0; j < server.also_propagate.numops; j++) {
1509 rop = &server.also_propagate.ops[j];
1510 propagate(rop->cmd, rop->dbid, rop->argv, rop->argc, rop->target);
1511 }
1512 redisOpArrayFree(&server.also_propagate);
1513 }
1514 server.stat_numcommands++;
1515 }
1516
1517 /* If this function gets called we already read a whole
1518 * command, argments are in the client argv/argc fields.
1519 * processCommand() execute the command or prepare the
1520 * server for a bulk read from the client.
1521 *
1522 * If 1 is returned the client is still alive and valid and
1523 * and other operations can be performed by the caller. Otherwise
1524 * if 0 is returned the client was destroied (i.e. after QUIT). */
1525 int processCommand(redisClient *c) {
1526 /* The QUIT command is handled separately. Normal command procs will
1527 * go through checking for replication and QUIT will cause trouble
1528 * when FORCE_REPLICATION is enabled and would be implemented in
1529 * a regular command proc. */
1530 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
1531 addReply(c,shared.ok);
1532 c->flags |= REDIS_CLOSE_AFTER_REPLY;
1533 return REDIS_ERR;
1534 }
1535
1536 /* Now lookup the command and check ASAP about trivial error conditions
1537 * such as wrong arity, bad command name and so forth. */
1538 c->cmd = c->lastcmd = lookupCommand(c->argv[0]->ptr);
1539 if (!c->cmd) {
1540 addReplyErrorFormat(c,"unknown command '%s'",
1541 (char*)c->argv[0]->ptr);
1542 return REDIS_OK;
1543 } else if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) ||
1544 (c->argc < -c->cmd->arity)) {
1545 addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
1546 c->cmd->name);
1547 return REDIS_OK;
1548 }
1549
1550 /* Check if the user is authenticated */
1551 if (server.requirepass && !c->authenticated && c->cmd->proc != authCommand)
1552 {
1553 addReplyError(c,"operation not permitted");
1554 return REDIS_OK;
1555 }
1556
1557 /* If cluster is enabled, redirect here */
1558 if (server.cluster_enabled &&
1559 !(c->cmd->getkeys_proc == NULL && c->cmd->firstkey == 0)) {
1560 int hashslot;
1561
1562 if (server.cluster.state != REDIS_CLUSTER_OK) {
1563 addReplyError(c,"The cluster is down. Check with CLUSTER INFO for more information");
1564 return REDIS_OK;
1565 } else {
1566 int ask;
1567 clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,&hashslot,&ask);
1568 if (n == NULL) {
1569 addReplyError(c,"Multi keys request invalid in cluster");
1570 return REDIS_OK;
1571 } else if (n != server.cluster.myself) {
1572 addReplySds(c,sdscatprintf(sdsempty(),
1573 "-%s %d %s:%d\r\n", ask ? "ASK" : "MOVED",
1574 hashslot,n->ip,n->port));
1575 return REDIS_OK;
1576 }
1577 }
1578 }
1579
1580 /* Handle the maxmemory directive.
1581 *
1582 * First we try to free some memory if possible (if there are volatile
1583 * keys in the dataset). If there are not the only thing we can do
1584 * is returning an error. */
1585 if (server.maxmemory) {
1586 int retval = freeMemoryIfNeeded();
1587 if ((c->cmd->flags & REDIS_CMD_DENYOOM) && retval == REDIS_ERR) {
1588 addReply(c, shared.oomerr);
1589 return REDIS_OK;
1590 }
1591 }
1592
1593 /* Don't accept write commands if there are problems persisting on disk. */
1594 if (server.stop_writes_on_bgsave_err &&
1595 server.saveparamslen > 0
1596 && server.lastbgsave_status == REDIS_ERR &&
1597 c->cmd->flags & REDIS_CMD_WRITE)
1598 {
1599 addReply(c, shared.bgsaveerr);
1600 return REDIS_OK;
1601 }
1602
1603 /* Don't accept wirte commands if this is a read only slave. But
1604 * accept write commands if this is our master. */
1605 if (server.masterhost && server.repl_slave_ro &&
1606 !(c->flags & REDIS_MASTER) &&
1607 c->cmd->flags & REDIS_CMD_WRITE)
1608 {
1609 addReply(c, shared.roslaveerr);
1610 return REDIS_OK;
1611 }
1612
1613 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
1614 if ((dictSize(c->pubsub_channels) > 0 || listLength(c->pubsub_patterns) > 0)
1615 &&
1616 c->cmd->proc != subscribeCommand &&
1617 c->cmd->proc != unsubscribeCommand &&
1618 c->cmd->proc != psubscribeCommand &&
1619 c->cmd->proc != punsubscribeCommand) {
1620 addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context");
1621 return REDIS_OK;
1622 }
1623
1624 /* Only allow INFO and SLAVEOF when slave-serve-stale-data is no and
1625 * we are a slave with a broken link with master. */
1626 if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED &&
1627 server.repl_serve_stale_data == 0 &&
1628 c->cmd->proc != infoCommand && c->cmd->proc != slaveofCommand)
1629 {
1630 addReply(c, shared.masterdownerr);
1631 return REDIS_OK;
1632 }
1633
1634 /* Loading DB? Return an error if the command is not INFO */
1635 if (server.loading && c->cmd->proc != infoCommand) {
1636 addReply(c, shared.loadingerr);
1637 return REDIS_OK;
1638 }
1639
1640 /* Lua script too slow? Only allow SHUTDOWN NOSAVE and SCRIPT KILL. */
1641 if (server.lua_timedout &&
1642 !(c->cmd->proc == shutdownCommand &&
1643 c->argc == 2 &&
1644 tolower(((char*)c->argv[1]->ptr)[0]) == 'n') &&
1645 !(c->cmd->proc == scriptCommand &&
1646 c->argc == 2 &&
1647 tolower(((char*)c->argv[1]->ptr)[0]) == 'k'))
1648 {
1649 addReply(c, shared.slowscripterr);
1650 return REDIS_OK;
1651 }
1652
1653 /* Exec the command */
1654 if (c->flags & REDIS_MULTI &&
1655 c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
1656 c->cmd->proc != multiCommand && c->cmd->proc != watchCommand)
1657 {
1658 queueMultiCommand(c);
1659 addReply(c,shared.queued);
1660 } else {
1661 call(c,REDIS_CALL_FULL);
1662 }
1663 return REDIS_OK;
1664 }
1665
1666 /*================================== Shutdown =============================== */
1667
1668 int prepareForShutdown(int flags) {
1669 int save = flags & REDIS_SHUTDOWN_SAVE;
1670 int nosave = flags & REDIS_SHUTDOWN_NOSAVE;
1671
1672 redisLog(REDIS_WARNING,"User requested shutdown...");
1673 /* Kill the saving child if there is a background saving in progress.
1674 We want to avoid race conditions, for instance our saving child may
1675 overwrite the synchronous saving did by SHUTDOWN. */
1676 if (server.rdb_child_pid != -1) {
1677 redisLog(REDIS_WARNING,"There is a child saving an .rdb. Killing it!");
1678 kill(server.rdb_child_pid,SIGKILL);
1679 rdbRemoveTempFile(server.rdb_child_pid);
1680 }
1681 if (server.aof_state != REDIS_AOF_OFF) {
1682 /* Kill the AOF saving child as the AOF we already have may be longer
1683 * but contains the full dataset anyway. */
1684 if (server.aof_child_pid != -1) {
1685 redisLog(REDIS_WARNING,
1686 "There is a child rewriting the AOF. Killing it!");
1687 kill(server.aof_child_pid,SIGKILL);
1688 }
1689 /* Append only file: fsync() the AOF and exit */
1690 redisLog(REDIS_NOTICE,"Calling fsync() on the AOF file.");
1691 aof_fsync(server.aof_fd);
1692 }
1693 if ((server.saveparamslen > 0 && !nosave) || save) {
1694 redisLog(REDIS_NOTICE,"Saving the final RDB snapshot before exiting.");
1695 /* Snapshotting. Perform a SYNC SAVE and exit */
1696 if (rdbSave(server.rdb_filename) != REDIS_OK) {
1697 /* Ooops.. error saving! The best we can do is to continue
1698 * operating. Note that if there was a background saving process,
1699 * in the next cron() Redis will be notified that the background
1700 * saving aborted, handling special stuff like slaves pending for
1701 * synchronization... */
1702 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit.");
1703 return REDIS_ERR;
1704 }
1705 }
1706 if (server.daemonize) {
1707 redisLog(REDIS_NOTICE,"Removing the pid file.");
1708 unlink(server.pidfile);
1709 }
1710 /* Close the listening sockets. Apparently this allows faster restarts. */
1711 if (server.ipfd != -1) close(server.ipfd);
1712 if (server.sofd != -1) close(server.sofd);
1713 if (server.unixsocket) {
1714 redisLog(REDIS_NOTICE,"Removing the unix socket file.");
1715 unlink(server.unixsocket); /* don't care if this fails */
1716 }
1717
1718 redisLog(REDIS_WARNING,"Redis is now ready to exit, bye bye...");
1719 return REDIS_OK;
1720 }
1721
1722 /*================================== Commands =============================== */
1723
1724 void authCommand(redisClient *c) {
1725 if (!server.requirepass) {
1726 addReplyError(c,"Client sent AUTH, but no password is set");
1727 } else if (!strcmp(c->argv[1]->ptr, server.requirepass)) {
1728 c->authenticated = 1;
1729 addReply(c,shared.ok);
1730 } else {
1731 c->authenticated = 0;
1732 addReplyError(c,"invalid password");
1733 }
1734 }
1735
1736 void pingCommand(redisClient *c) {
1737 addReply(c,shared.pong);
1738 }
1739
1740 void echoCommand(redisClient *c) {
1741 addReplyBulk(c,c->argv[1]);
1742 }
1743
1744 void timeCommand(redisClient *c) {
1745 struct timeval tv;
1746
1747 /* gettimeofday() can only fail if &tv is a bad addresss so we
1748 * don't check for errors. */
1749 gettimeofday(&tv,NULL);
1750 addReplyMultiBulkLen(c,2);
1751 addReplyBulkLongLong(c,tv.tv_sec);
1752 addReplyBulkLongLong(c,tv.tv_usec);
1753 }
1754
1755 /* Convert an amount of bytes into a human readable string in the form
1756 * of 100B, 2G, 100M, 4K, and so forth. */
1757 void bytesToHuman(char *s, unsigned long long n) {
1758 double d;
1759
1760 if (n < 1024) {
1761 /* Bytes */
1762 sprintf(s,"%lluB",n);
1763 return;
1764 } else if (n < (1024*1024)) {
1765 d = (double)n/(1024);
1766 sprintf(s,"%.2fK",d);
1767 } else if (n < (1024LL*1024*1024)) {
1768 d = (double)n/(1024*1024);
1769 sprintf(s,"%.2fM",d);
1770 } else if (n < (1024LL*1024*1024*1024)) {
1771 d = (double)n/(1024LL*1024*1024);
1772 sprintf(s,"%.2fG",d);
1773 }
1774 }
1775
1776 /* Create the string returned by the INFO command. This is decoupled
1777 * by the INFO command itself as we need to report the same information
1778 * on memory corruption problems. */
1779 sds genRedisInfoString(char *section) {
1780 sds info = sdsempty();
1781 time_t uptime = server.unixtime-server.stat_starttime;
1782 int j, numcommands;
1783 struct rusage self_ru, c_ru;
1784 unsigned long lol, bib;
1785 int allsections = 0, defsections = 0;
1786 int sections = 0;
1787
1788 if (section) {
1789 allsections = strcasecmp(section,"all") == 0;
1790 defsections = strcasecmp(section,"default") == 0;
1791 }
1792
1793 getrusage(RUSAGE_SELF, &self_ru);
1794 getrusage(RUSAGE_CHILDREN, &c_ru);
1795 getClientsMaxBuffers(&lol,&bib);
1796
1797 /* Server */
1798 if (allsections || defsections || !strcasecmp(section,"server")) {
1799 struct utsname name;
1800
1801 if (sections++) info = sdscat(info,"\r\n");
1802 uname(&name);
1803 info = sdscatprintf(info,
1804 "# Server\r\n"
1805 "redis_version:%s\r\n"
1806 "redis_git_sha1:%s\r\n"
1807 "redis_git_dirty:%d\r\n"
1808 "os:%s %s %s\r\n"
1809 "arch_bits:%d\r\n"
1810 "multiplexing_api:%s\r\n"
1811 "gcc_version:%d.%d.%d\r\n"
1812 "process_id:%ld\r\n"
1813 "run_id:%s\r\n"
1814 "tcp_port:%d\r\n"
1815 "uptime_in_seconds:%ld\r\n"
1816 "uptime_in_days:%ld\r\n"
1817 "lru_clock:%ld\r\n",
1818 REDIS_VERSION,
1819 redisGitSHA1(),
1820 strtol(redisGitDirty(),NULL,10) > 0,
1821 name.sysname, name.release, name.machine,
1822 server.arch_bits,
1823 aeGetApiName(),
1824 #ifdef __GNUC__
1825 __GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__,
1826 #else
1827 0,0,0,
1828 #endif
1829 (long) getpid(),
1830 server.runid,
1831 server.port,
1832 uptime,
1833 uptime/(3600*24),
1834 (unsigned long) server.lruclock);
1835 }
1836
1837 /* Clients */
1838 if (allsections || defsections || !strcasecmp(section,"clients")) {
1839 if (sections++) info = sdscat(info,"\r\n");
1840 info = sdscatprintf(info,
1841 "# Clients\r\n"
1842 "connected_clients:%lu\r\n"
1843 "client_longest_output_list:%lu\r\n"
1844 "client_biggest_input_buf:%lu\r\n"
1845 "blocked_clients:%d\r\n",
1846 listLength(server.clients)-listLength(server.slaves),
1847 lol, bib,
1848 server.bpop_blocked_clients);
1849 }
1850
1851 /* Memory */
1852 if (allsections || defsections || !strcasecmp(section,"memory")) {
1853 char hmem[64];
1854 char peak_hmem[64];
1855
1856 bytesToHuman(hmem,zmalloc_used_memory());
1857 bytesToHuman(peak_hmem,server.stat_peak_memory);
1858 if (sections++) info = sdscat(info,"\r\n");
1859 info = sdscatprintf(info,
1860 "# Memory\r\n"
1861 "used_memory:%zu\r\n"
1862 "used_memory_human:%s\r\n"
1863 "used_memory_rss:%zu\r\n"
1864 "used_memory_peak:%zu\r\n"
1865 "used_memory_peak_human:%s\r\n"
1866 "used_memory_lua:%lld\r\n"
1867 "mem_fragmentation_ratio:%.2f\r\n"
1868 "mem_allocator:%s\r\n",
1869 zmalloc_used_memory(),
1870 hmem,
1871 zmalloc_get_rss(),
1872 server.stat_peak_memory,
1873 peak_hmem,
1874 ((long long)lua_gc(server.lua,LUA_GCCOUNT,0))*1024LL,
1875 zmalloc_get_fragmentation_ratio(),
1876 ZMALLOC_LIB
1877 );
1878 }
1879
1880 /* Persistence */
1881 if (allsections || defsections || !strcasecmp(section,"persistence")) {
1882 if (sections++) info = sdscat(info,"\r\n");
1883 info = sdscatprintf(info,
1884 "# Persistence\r\n"
1885 "loading:%d\r\n"
1886 "aof_enabled:%d\r\n"
1887 "changes_since_last_save:%lld\r\n"
1888 "bgsave_in_progress:%d\r\n"
1889 "last_save_time:%ld\r\n"
1890 "last_bgsave_status:%s\r\n"
1891 "bgrewriteaof_in_progress:%d\r\n"
1892 "bgrewriteaof_scheduled:%d\r\n",
1893 server.loading,
1894 server.aof_state != REDIS_AOF_OFF,
1895 server.dirty,
1896 server.rdb_child_pid != -1,
1897 server.lastsave,
1898 server.lastbgsave_status == REDIS_OK ? "ok" : "err",
1899 server.aof_child_pid != -1,
1900 server.aof_rewrite_scheduled);
1901
1902 if (server.aof_state != REDIS_AOF_OFF) {
1903 info = sdscatprintf(info,
1904 "aof_current_size:%lld\r\n"
1905 "aof_base_size:%lld\r\n"
1906 "aof_pending_rewrite:%d\r\n"
1907 "aof_buffer_length:%zu\r\n"
1908 "aof_pending_bio_fsync:%llu\r\n"
1909 "aof_delayed_fsync:%lu\r\n",
1910 (long long) server.aof_current_size,
1911 (long long) server.aof_rewrite_base_size,
1912 server.aof_rewrite_scheduled,
1913 sdslen(server.aof_buf),
1914 bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC),
1915 server.aof_delayed_fsync);
1916 }
1917
1918 if (server.loading) {
1919 double perc;
1920 time_t eta, elapsed;
1921 off_t remaining_bytes = server.loading_total_bytes-
1922 server.loading_loaded_bytes;
1923
1924 perc = ((double)server.loading_loaded_bytes /
1925 server.loading_total_bytes) * 100;
1926
1927 elapsed = server.unixtime-server.loading_start_time;
1928 if (elapsed == 0) {
1929 eta = 1; /* A fake 1 second figure if we don't have
1930 enough info */
1931 } else {
1932 eta = (elapsed*remaining_bytes)/server.loading_loaded_bytes;
1933 }
1934
1935 info = sdscatprintf(info,
1936 "loading_start_time:%ld\r\n"
1937 "loading_total_bytes:%llu\r\n"
1938 "loading_loaded_bytes:%llu\r\n"
1939 "loading_loaded_perc:%.2f\r\n"
1940 "loading_eta_seconds:%ld\r\n"
1941 ,(unsigned long) server.loading_start_time,
1942 (unsigned long long) server.loading_total_bytes,
1943 (unsigned long long) server.loading_loaded_bytes,
1944 perc,
1945 eta
1946 );
1947 }
1948 }
1949
1950 /* Stats */
1951 if (allsections || defsections || !strcasecmp(section,"stats")) {
1952 if (sections++) info = sdscat(info,"\r\n");
1953 info = sdscatprintf(info,
1954 "# Stats\r\n"
1955 "total_connections_received:%lld\r\n"
1956 "total_commands_processed:%lld\r\n"
1957 "instantaneous_ops_per_sec:%lld\r\n"
1958 "rejected_connections:%lld\r\n"
1959 "expired_keys:%lld\r\n"
1960 "evicted_keys:%lld\r\n"
1961 "keyspace_hits:%lld\r\n"
1962 "keyspace_misses:%lld\r\n"
1963 "pubsub_channels:%ld\r\n"
1964 "pubsub_patterns:%lu\r\n"
1965 "latest_fork_usec:%lld\r\n",
1966 server.stat_numconnections,
1967 server.stat_numcommands,
1968 getOperationsPerSecond(),
1969 server.stat_rejected_conn,
1970 server.stat_expiredkeys,
1971 server.stat_evictedkeys,
1972 server.stat_keyspace_hits,
1973 server.stat_keyspace_misses,
1974 dictSize(server.pubsub_channels),
1975 listLength(server.pubsub_patterns),
1976 server.stat_fork_time);
1977 }
1978
1979 /* Replication */
1980 if (allsections || defsections || !strcasecmp(section,"replication")) {
1981 if (sections++) info = sdscat(info,"\r\n");
1982 info = sdscatprintf(info,
1983 "# Replication\r\n"
1984 "role:%s\r\n",
1985 server.masterhost == NULL ? "master" : "slave");
1986 if (server.masterhost) {
1987 info = sdscatprintf(info,
1988 "master_host:%s\r\n"
1989 "master_port:%d\r\n"
1990 "master_link_status:%s\r\n"
1991 "master_last_io_seconds_ago:%d\r\n"
1992 "master_sync_in_progress:%d\r\n"
1993 ,server.masterhost,
1994 server.masterport,
1995 (server.repl_state == REDIS_REPL_CONNECTED) ?
1996 "up" : "down",
1997 server.master ?
1998 ((int)(server.unixtime-server.master->lastinteraction)) : -1,
1999 server.repl_state == REDIS_REPL_TRANSFER
2000 );
2001
2002 if (server.repl_state == REDIS_REPL_TRANSFER) {
2003 info = sdscatprintf(info,
2004 "master_sync_left_bytes:%ld\r\n"
2005 "master_sync_last_io_seconds_ago:%d\r\n"
2006 ,(long)server.repl_transfer_left,
2007 (int)(server.unixtime-server.repl_transfer_lastio)
2008 );
2009 }
2010
2011 if (server.repl_state != REDIS_REPL_CONNECTED) {
2012 info = sdscatprintf(info,
2013 "master_link_down_since_seconds:%ld\r\n",
2014 (long)server.unixtime-server.repl_down_since);
2015 }
2016 }
2017 info = sdscatprintf(info,
2018 "connected_slaves:%lu\r\n",
2019 listLength(server.slaves));
2020 if (listLength(server.slaves)) {
2021 int slaveid = 0;
2022 listNode *ln;
2023 listIter li;
2024
2025 listRewind(server.slaves,&li);
2026 while((ln = listNext(&li))) {
2027 redisClient *slave = listNodeValue(ln);
2028 char *state = NULL;
2029 char ip[32];
2030 int port;
2031
2032 if (anetPeerToString(slave->fd,ip,&port) == -1) continue;
2033 switch(slave->replstate) {
2034 case REDIS_REPL_WAIT_BGSAVE_START:
2035 case REDIS_REPL_WAIT_BGSAVE_END:
2036 state = "wait_bgsave";
2037 break;
2038 case REDIS_REPL_SEND_BULK:
2039 state = "send_bulk";
2040 break;
2041 case REDIS_REPL_ONLINE:
2042 state = "online";
2043 break;
2044 }
2045 if (state == NULL) continue;
2046 info = sdscatprintf(info,"slave%d:%s,%d,%s\r\n",
2047 slaveid,ip,port,state);
2048 slaveid++;
2049 }
2050 }
2051 }
2052
2053 /* CPU */
2054 if (allsections || defsections || !strcasecmp(section,"cpu")) {
2055 if (sections++) info = sdscat(info,"\r\n");
2056 info = sdscatprintf(info,
2057 "# CPU\r\n"
2058 "used_cpu_sys:%.2f\r\n"
2059 "used_cpu_user:%.2f\r\n"
2060 "used_cpu_sys_children:%.2f\r\n"
2061 "used_cpu_user_children:%.2f\r\n",
2062 (float)self_ru.ru_stime.tv_sec+(float)self_ru.ru_stime.tv_usec/1000000,
2063 (float)self_ru.ru_utime.tv_sec+(float)self_ru.ru_utime.tv_usec/1000000,
2064 (float)c_ru.ru_stime.tv_sec+(float)c_ru.ru_stime.tv_usec/1000000,
2065 (float)c_ru.ru_utime.tv_sec+(float)c_ru.ru_utime.tv_usec/1000000);
2066 }
2067
2068 /* cmdtime */
2069 if (allsections || !strcasecmp(section,"commandstats")) {
2070 if (sections++) info = sdscat(info,"\r\n");
2071 info = sdscatprintf(info, "# Commandstats\r\n");
2072 numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
2073 for (j = 0; j < numcommands; j++) {
2074 struct redisCommand *c = redisCommandTable+j;
2075
2076 if (!c->calls) continue;
2077 info = sdscatprintf(info,
2078 "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f\r\n",
2079 c->name, c->calls, c->microseconds,
2080 (c->calls == 0) ? 0 : ((float)c->microseconds/c->calls));
2081 }
2082 }
2083
2084 /* Cluster */
2085 if (allsections || defsections || !strcasecmp(section,"cluster")) {
2086 if (sections++) info = sdscat(info,"\r\n");
2087 info = sdscatprintf(info,
2088 "# Cluster\r\n"
2089 "cluster_enabled:%d\r\n",
2090 server.cluster_enabled);
2091 }
2092
2093 /* Key space */
2094 if (allsections || defsections || !strcasecmp(section,"keyspace")) {
2095 if (sections++) info = sdscat(info,"\r\n");
2096 info = sdscatprintf(info, "# Keyspace\r\n");
2097 for (j = 0; j < server.dbnum; j++) {
2098 long long keys, vkeys;
2099
2100 keys = dictSize(server.db[j].dict);
2101 vkeys = dictSize(server.db[j].expires);
2102 if (keys || vkeys) {
2103 info = sdscatprintf(info, "db%d:keys=%lld,expires=%lld\r\n",
2104 j, keys, vkeys);
2105 }
2106 }
2107 }
2108 return info;
2109 }
2110
2111 void infoCommand(redisClient *c) {
2112 char *section = c->argc == 2 ? c->argv[1]->ptr : "default";
2113
2114 if (c->argc > 2) {
2115 addReply(c,shared.syntaxerr);
2116 return;
2117 }
2118 sds info = genRedisInfoString(section);
2119 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
2120 (unsigned long)sdslen(info)));
2121 addReplySds(c,info);
2122 addReply(c,shared.crlf);
2123 }
2124
2125 void monitorCommand(redisClient *c) {
2126 /* ignore MONITOR if aleady slave or in monitor mode */
2127 if (c->flags & REDIS_SLAVE) return;
2128
2129 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
2130 c->slaveseldb = 0;
2131 listAddNodeTail(server.monitors,c);
2132 addReply(c,shared.ok);
2133 }
2134
2135 /* ============================ Maxmemory directive ======================== */
2136
2137 /* This function gets called when 'maxmemory' is set on the config file to limit
2138 * the max memory used by the server, before processing a command.
2139 *
2140 * The goal of the function is to free enough memory to keep Redis under the
2141 * configured memory limit.
2142 *
2143 * The function starts calculating how many bytes should be freed to keep
2144 * Redis under the limit, and enters a loop selecting the best keys to
2145 * evict accordingly to the configured policy.
2146 *
2147 * If all the bytes needed to return back under the limit were freed the
2148 * function returns REDIS_OK, otherwise REDIS_ERR is returned, and the caller
2149 * should block the execution of commands that will result in more memory
2150 * used by the server.
2151 */
2152 int freeMemoryIfNeeded(void) {
2153 size_t mem_used, mem_tofree, mem_freed;
2154 int slaves = listLength(server.slaves);
2155
2156 /* Remove the size of slaves output buffers and AOF buffer from the
2157 * count of used memory. */
2158 mem_used = zmalloc_used_memory();
2159 if (slaves) {
2160 listIter li;
2161 listNode *ln;
2162
2163 listRewind(server.slaves,&li);
2164 while((ln = listNext(&li))) {
2165 redisClient *slave = listNodeValue(ln);
2166 unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
2167 if (obuf_bytes > mem_used)
2168 mem_used = 0;
2169 else
2170 mem_used -= obuf_bytes;
2171 }
2172 }
2173 if (server.aof_state != REDIS_AOF_OFF) {
2174 mem_used -= sdslen(server.aof_buf);
2175 mem_used -= sdslen(server.aof_rewrite_buf);
2176 }
2177
2178 /* Check if we are over the memory limit. */
2179 if (mem_used <= server.maxmemory) return REDIS_OK;
2180
2181 if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION)
2182 return REDIS_ERR; /* We need to free memory, but policy forbids. */
2183
2184 /* Compute how much memory we need to free. */
2185 mem_tofree = mem_used - server.maxmemory;
2186 mem_freed = 0;
2187 while (mem_freed < mem_tofree) {
2188 int j, k, keys_freed = 0;
2189
2190 for (j = 0; j < server.dbnum; j++) {
2191 long bestval = 0; /* just to prevent warning */
2192 sds bestkey = NULL;
2193 struct dictEntry *de;
2194 redisDb *db = server.db+j;
2195 dict *dict;
2196
2197 if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
2198 server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM)
2199 {
2200 dict = server.db[j].dict;
2201 } else {
2202 dict = server.db[j].expires;
2203 }
2204 if (dictSize(dict) == 0) continue;
2205
2206 /* volatile-random and allkeys-random policy */
2207 if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM ||
2208 server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_RANDOM)
2209 {
2210 de = dictGetRandomKey(dict);
2211 bestkey = dictGetKey(de);
2212 }
2213
2214 /* volatile-lru and allkeys-lru policy */
2215 else if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
2216 server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
2217 {
2218 for (k = 0; k < server.maxmemory_samples; k++) {
2219 sds thiskey;
2220 long thisval;
2221 robj *o;
2222
2223 de = dictGetRandomKey(dict);
2224 thiskey = dictGetKey(de);
2225 /* When policy is volatile-lru we need an additonal lookup
2226 * to locate the real key, as dict is set to db->expires. */
2227 if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
2228 de = dictFind(db->dict, thiskey);
2229 o = dictGetVal(de);
2230 thisval = estimateObjectIdleTime(o);
2231
2232 /* Higher idle time is better candidate for deletion */
2233 if (bestkey == NULL || thisval > bestval) {
2234 bestkey = thiskey;
2235 bestval = thisval;
2236 }
2237 }
2238 }
2239
2240 /* volatile-ttl */
2241 else if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_TTL) {
2242 for (k = 0; k < server.maxmemory_samples; k++) {
2243 sds thiskey;
2244 long thisval;
2245
2246 de = dictGetRandomKey(dict);
2247 thiskey = dictGetKey(de);
2248 thisval = (long) dictGetVal(de);
2249
2250 /* Expire sooner (minor expire unix timestamp) is better
2251 * candidate for deletion */
2252 if (bestkey == NULL || thisval < bestval) {
2253 bestkey = thiskey;
2254 bestval = thisval;
2255 }
2256 }
2257 }
2258
2259 /* Finally remove the selected key. */
2260 if (bestkey) {
2261 long long delta;
2262
2263 robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
2264 propagateExpire(db,keyobj);
2265 /* We compute the amount of memory freed by dbDelete() alone.
2266 * It is possible that actually the memory needed to propagate
2267 * the DEL in AOF and replication link is greater than the one
2268 * we are freeing removing the key, but we can't account for
2269 * that otherwise we would never exit the loop.
2270 *
2271 * AOF and Output buffer memory will be freed eventually so
2272 * we only care about memory used by the key space. */
2273 delta = (long long) zmalloc_used_memory();
2274 dbDelete(db,keyobj);
2275 delta -= (long long) zmalloc_used_memory();
2276 mem_freed += delta;
2277 server.stat_evictedkeys++;
2278 decrRefCount(keyobj);
2279 keys_freed++;
2280
2281 /* When the memory to free starts to be big enough, we may
2282 * start spending so much time here that is impossible to
2283 * deliver data to the slaves fast enough, so we force the
2284 * transmission here inside the loop. */
2285 if (slaves) flushSlavesOutputBuffers();
2286 }
2287 }
2288 if (!keys_freed) return REDIS_ERR; /* nothing to free... */
2289 }
2290 return REDIS_OK;
2291 }
2292
2293 /* =================================== Main! ================================ */
2294
2295 #ifdef __linux__
2296 int linuxOvercommitMemoryValue(void) {
2297 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
2298 char buf[64];
2299
2300 if (!fp) return -1;
2301 if (fgets(buf,64,fp) == NULL) {
2302 fclose(fp);
2303 return -1;
2304 }
2305 fclose(fp);
2306
2307 return atoi(buf);
2308 }
2309
2310 void linuxOvercommitMemoryWarning(void) {
2311 if (linuxOvercommitMemoryValue() == 0) {
2312 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
2313 }
2314 }
2315 #endif /* __linux__ */
2316
2317 void createPidFile(void) {
2318 /* Try to write the pid file in a best-effort way. */
2319 FILE *fp = fopen(server.pidfile,"w");
2320 if (fp) {
2321 fprintf(fp,"%d\n",(int)getpid());
2322 fclose(fp);
2323 }
2324 }
2325
2326 void daemonize(void) {
2327 int fd;
2328
2329 if (fork() != 0) exit(0); /* parent exits */
2330 setsid(); /* create a new session */
2331
2332 /* Every output goes to /dev/null. If Redis is daemonized but
2333 * the 'logfile' is set to 'stdout' in the configuration file
2334 * it will not log at all. */
2335 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
2336 dup2(fd, STDIN_FILENO);
2337 dup2(fd, STDOUT_FILENO);
2338 dup2(fd, STDERR_FILENO);
2339 if (fd > STDERR_FILENO) close(fd);
2340 }
2341 }
2342
2343 void version() {
2344 printf("Redis server v=%s sha=%s:%d malloc=%s bits=%d\n",
2345 REDIS_VERSION,
2346 redisGitSHA1(),
2347 atoi(redisGitDirty()) > 0,
2348 ZMALLOC_LIB,
2349 sizeof(long) == 4 ? 32 : 64);
2350 exit(0);
2351 }
2352
2353 void usage() {
2354 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf] [options]\n");
2355 fprintf(stderr," ./redis-server - (read config from stdin)\n");
2356 fprintf(stderr," ./redis-server -v or --version\n");
2357 fprintf(stderr," ./redis-server -h or --help\n");
2358 fprintf(stderr," ./redis-server --test-memory <megabytes>\n\n");
2359 fprintf(stderr,"Examples:\n");
2360 fprintf(stderr," ./redis-server (run the server with default conf)\n");
2361 fprintf(stderr," ./redis-server /etc/redis/6379.conf\n");
2362 fprintf(stderr," ./redis-server --port 7777\n");
2363 fprintf(stderr," ./redis-server --port 7777 --slaveof 127.0.0.1 8888\n");
2364 fprintf(stderr," ./redis-server /etc/myredis.conf --loglevel verbose\n");
2365 exit(1);
2366 }
2367
2368 void redisAsciiArt(void) {
2369 #include "asciilogo.h"
2370 char *buf = zmalloc(1024*16);
2371
2372 snprintf(buf,1024*16,ascii_logo,
2373 REDIS_VERSION,
2374 redisGitSHA1(),
2375 strtol(redisGitDirty(),NULL,10) > 0,
2376 (sizeof(long) == 8) ? "64" : "32",
2377 server.cluster_enabled ? "cluster" : "stand alone",
2378 server.port,
2379 (long) getpid()
2380 );
2381 redisLogRaw(REDIS_NOTICE|REDIS_LOG_RAW,buf);
2382 zfree(buf);
2383 }
2384
2385 static void sigtermHandler(int sig) {
2386 REDIS_NOTUSED(sig);
2387
2388 redisLogFromHandler(REDIS_WARNING,"Received SIGTERM, scheduling shutdown...");
2389 server.shutdown_asap = 1;
2390 }
2391
2392 void setupSignalHandlers(void) {
2393 struct sigaction act;
2394
2395 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used.
2396 * Otherwise, sa_handler is used. */
2397 sigemptyset(&act.sa_mask);
2398 act.sa_flags = 0;
2399 act.sa_handler = sigtermHandler;
2400 sigaction(SIGTERM, &act, NULL);
2401
2402 #ifdef HAVE_BACKTRACE
2403 sigemptyset(&act.sa_mask);
2404 act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO;
2405 act.sa_sigaction = sigsegvHandler;
2406 sigaction(SIGSEGV, &act, NULL);
2407 sigaction(SIGBUS, &act, NULL);
2408 sigaction(SIGFPE, &act, NULL);
2409 sigaction(SIGILL, &act, NULL);
2410 #endif
2411 return;
2412 }
2413
2414 void memtest(size_t megabytes, int passes);
2415
2416 int main(int argc, char **argv) {
2417 long long start;
2418 struct timeval tv;
2419
2420 /* We need to initialize our libraries, and the server configuration. */
2421 zmalloc_enable_thread_safeness();
2422 srand(time(NULL)^getpid());
2423 gettimeofday(&tv,NULL);
2424 dictSetHashFunctionSeed(tv.tv_sec^tv.tv_usec^getpid());
2425 initServerConfig();
2426
2427 if (argc >= 2) {
2428 int j = 1; /* First option to parse in argv[] */
2429 sds options = sdsempty();
2430 char *configfile = NULL;
2431
2432 /* Handle special options --help and --version */
2433 if (strcmp(argv[1], "-v") == 0 ||
2434 strcmp(argv[1], "--version") == 0) version();
2435 if (strcmp(argv[1], "--help") == 0 ||
2436 strcmp(argv[1], "-h") == 0) usage();
2437 if (strcmp(argv[1], "--test-memory") == 0) {
2438 if (argc == 3) {
2439 memtest(atoi(argv[2]),50);
2440 exit(0);
2441 } else {
2442 fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n");
2443 fprintf(stderr,"Example: ./redis-server --test-memory 4096\n\n");
2444 exit(1);
2445 }
2446 }
2447
2448 /* First argument is the config file name? */
2449 if (argv[j][0] != '-' || argv[j][1] != '-')
2450 configfile = argv[j++];
2451 /* All the other options are parsed and conceptually appended to the
2452 * configuration file. For instance --port 6380 will generate the
2453 * string "port 6380\n" to be parsed after the actual file name
2454 * is parsed, if any. */
2455 while(j != argc) {
2456 if (argv[j][0] == '-' && argv[j][1] == '-') {
2457 /* Option name */
2458 if (sdslen(options)) options = sdscat(options,"\n");
2459 options = sdscat(options,argv[j]+2);
2460 options = sdscat(options," ");
2461 } else {
2462 /* Option argument */
2463 options = sdscatrepr(options,argv[j],strlen(argv[j]));
2464 options = sdscat(options," ");
2465 }
2466 j++;
2467 }
2468 resetServerSaveParams();
2469 loadServerConfig(configfile,options);
2470 sdsfree(options);
2471 } else {
2472 redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
2473 }
2474 if (server.daemonize) daemonize();
2475 initServer();
2476 if (server.daemonize) createPidFile();
2477 redisAsciiArt();
2478 redisLog(REDIS_WARNING,"Server started, Redis version " REDIS_VERSION);
2479 #ifdef __linux__
2480 linuxOvercommitMemoryWarning();
2481 #endif
2482 start = ustime();
2483 if (server.aof_state == REDIS_AOF_ON) {
2484 if (loadAppendOnlyFile(server.aof_filename) == REDIS_OK)
2485 redisLog(REDIS_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000);
2486 } else {
2487 if (rdbLoad(server.rdb_filename) == REDIS_OK) {
2488 redisLog(REDIS_NOTICE,"DB loaded from disk: %.3f seconds",
2489 (float)(ustime()-start)/1000000);
2490 } else if (errno != ENOENT) {
2491 redisLog(REDIS_WARNING,"Fatal error loading the DB. Exiting.");
2492 exit(1);
2493 }
2494 }
2495 if (server.ipfd > 0)
2496 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
2497 if (server.sofd > 0)
2498 redisLog(REDIS_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket);
2499 aeSetBeforeSleepProc(server.el,beforeSleep);
2500 aeMain(server.el);
2501 aeDeleteEventLoop(server.el);
2502 return 0;
2503 }
2504
2505 /* The End */