]> git.saurik.com Git - redis.git/blob - src/redis.c
Impovements for: Redis timer, hashes rehashing, keys collection.
[redis.git] / src / redis.c
1 /*
2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include "redis.h"
31 #include "slowlog.h"
32 #include "bio.h"
33
34 #include <time.h>
35 #include <signal.h>
36 #include <sys/wait.h>
37 #include <errno.h>
38 #include <assert.h>
39 #include <ctype.h>
40 #include <stdarg.h>
41 #include <arpa/inet.h>
42 #include <sys/stat.h>
43 #include <fcntl.h>
44 #include <sys/time.h>
45 #include <sys/resource.h>
46 #include <sys/uio.h>
47 #include <limits.h>
48 #include <float.h>
49 #include <math.h>
50 #include <sys/resource.h>
51 #include <sys/utsname.h>
52
53 /* Our shared "common" objects */
54
55 struct sharedObjectsStruct shared;
56
57 /* Global vars that are actually used as constants. The following double
58 * values are used for double on-disk serialization, and are initialized
59 * at runtime to avoid strange compiler optimizations. */
60
61 double R_Zero, R_PosInf, R_NegInf, R_Nan;
62
63 /*================================= Globals ================================= */
64
65 /* Global vars */
66 struct redisServer server; /* server global state */
67 struct redisCommand *commandTable;
68
69 /* Our command table.
70 *
71 * Every entry is composed of the following fields:
72 *
73 * name: a string representing the command name.
74 * function: pointer to the C function implementing the command.
75 * arity: number of arguments, it is possible to use -N to say >= N
76 * sflags: command flags as string. See below for a table of flags.
77 * flags: flags as bitmask. Computed by Redis using the 'sflags' field.
78 * get_keys_proc: an optional function to get key arguments from a command.
79 * This is only used when the following three fields are not
80 * enough to specify what arguments are keys.
81 * first_key_index: first argument that is a key
82 * last_key_index: last argument that is a key
83 * key_step: step to get all the keys from first to last argument. For instance
84 * in MSET the step is two since arguments are key,val,key,val,...
85 * microseconds: microseconds of total execution time for this command.
86 * calls: total number of calls of this command.
87 *
88 * The flags, microseconds and calls fields are computed by Redis and should
89 * always be set to zero.
90 *
91 * Command flags are expressed using strings where every character represents
92 * a flag. Later the populateCommandTable() function will take care of
93 * populating the real 'flags' field using this characters.
94 *
95 * This is the meaning of the flags:
96 *
97 * w: write command (may modify the key space).
98 * r: read command (will never modify the key space).
99 * m: may increase memory usage once called. Don't allow if out of memory.
100 * a: admin command, like SAVE or SHUTDOWN.
101 * p: Pub/Sub related command.
102 * f: force replication of this command, regarless of server.dirty.
103 * s: command not allowed in scripts.
104 * R: random command. Command is not deterministic, that is, the same command
105 * with the same arguments, with the same key space, may have different
106 * results. For instance SPOP and RANDOMKEY are two random commands.
107 * S: Sort command output array if called from script, so that the output
108 * is deterministic.
109 */
110 struct redisCommand redisCommandTable[] = {
111 {"get",getCommand,2,"r",0,NULL,1,1,1,0,0},
112 {"set",setCommand,3,"wm",0,noPreloadGetKeys,1,1,1,0,0},
113 {"setnx",setnxCommand,3,"wm",0,noPreloadGetKeys,1,1,1,0,0},
114 {"setex",setexCommand,4,"wm",0,noPreloadGetKeys,1,1,1,0,0},
115 {"psetex",psetexCommand,4,"wm",0,noPreloadGetKeys,1,1,1,0,0},
116 {"append",appendCommand,3,"wm",0,NULL,1,1,1,0,0},
117 {"strlen",strlenCommand,2,"r",0,NULL,1,1,1,0,0},
118 {"del",delCommand,-2,"w",0,noPreloadGetKeys,1,-1,1,0,0},
119 {"exists",existsCommand,2,"r",0,NULL,1,1,1,0,0},
120 {"setbit",setbitCommand,4,"wm",0,NULL,1,1,1,0,0},
121 {"getbit",getbitCommand,3,"r",0,NULL,1,1,1,0,0},
122 {"setrange",setrangeCommand,4,"wm",0,NULL,1,1,1,0,0},
123 {"getrange",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
124 {"substr",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
125 {"incr",incrCommand,2,"wm",0,NULL,1,1,1,0,0},
126 {"decr",decrCommand,2,"wm",0,NULL,1,1,1,0,0},
127 {"mget",mgetCommand,-2,"r",0,NULL,1,-1,1,0,0},
128 {"rpush",rpushCommand,-3,"wm",0,NULL,1,1,1,0,0},
129 {"lpush",lpushCommand,-3,"wm",0,NULL,1,1,1,0,0},
130 {"rpushx",rpushxCommand,3,"wm",0,NULL,1,1,1,0,0},
131 {"lpushx",lpushxCommand,3,"wm",0,NULL,1,1,1,0,0},
132 {"linsert",linsertCommand,5,"wm",0,NULL,1,1,1,0,0},
133 {"rpop",rpopCommand,2,"w",0,NULL,1,1,1,0,0},
134 {"lpop",lpopCommand,2,"w",0,NULL,1,1,1,0,0},
135 {"brpop",brpopCommand,-3,"ws",0,NULL,1,1,1,0,0},
136 {"brpoplpush",brpoplpushCommand,4,"wms",0,NULL,1,2,1,0,0},
137 {"blpop",blpopCommand,-3,"ws",0,NULL,1,-2,1,0,0},
138 {"llen",llenCommand,2,"r",0,NULL,1,1,1,0,0},
139 {"lindex",lindexCommand,3,"r",0,NULL,1,1,1,0,0},
140 {"lset",lsetCommand,4,"wm",0,NULL,1,1,1,0,0},
141 {"lrange",lrangeCommand,4,"r",0,NULL,1,1,1,0,0},
142 {"ltrim",ltrimCommand,4,"w",0,NULL,1,1,1,0,0},
143 {"lrem",lremCommand,4,"w",0,NULL,1,1,1,0,0},
144 {"rpoplpush",rpoplpushCommand,3,"wm",0,NULL,1,2,1,0,0},
145 {"sadd",saddCommand,-3,"wm",0,NULL,1,1,1,0,0},
146 {"srem",sremCommand,-3,"w",0,NULL,1,1,1,0,0},
147 {"smove",smoveCommand,4,"w",0,NULL,1,2,1,0,0},
148 {"sismember",sismemberCommand,3,"r",0,NULL,1,1,1,0,0},
149 {"scard",scardCommand,2,"r",0,NULL,1,1,1,0,0},
150 {"spop",spopCommand,2,"wRs",0,NULL,1,1,1,0,0},
151 {"srandmember",srandmemberCommand,2,"rR",0,NULL,1,1,1,0,0},
152 {"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
153 {"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
154 {"sunion",sunionCommand,-2,"rS",0,NULL,1,-1,1,0,0},
155 {"sunionstore",sunionstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
156 {"sdiff",sdiffCommand,-2,"rS",0,NULL,1,-1,1,0,0},
157 {"sdiffstore",sdiffstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
158 {"smembers",sinterCommand,2,"rS",0,NULL,1,1,1,0,0},
159 {"zadd",zaddCommand,-4,"wm",0,NULL,1,1,1,0,0},
160 {"zincrby",zincrbyCommand,4,"wm",0,NULL,1,1,1,0,0},
161 {"zrem",zremCommand,-3,"w",0,NULL,1,1,1,0,0},
162 {"zremrangebyscore",zremrangebyscoreCommand,4,"w",0,NULL,1,1,1,0,0},
163 {"zremrangebyrank",zremrangebyrankCommand,4,"w",0,NULL,1,1,1,0,0},
164 {"zunionstore",zunionstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0},
165 {"zinterstore",zinterstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0},
166 {"zrange",zrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
167 {"zrangebyscore",zrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0},
168 {"zrevrangebyscore",zrevrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0},
169 {"zcount",zcountCommand,4,"r",0,NULL,1,1,1,0,0},
170 {"zrevrange",zrevrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
171 {"zcard",zcardCommand,2,"r",0,NULL,1,1,1,0,0},
172 {"zscore",zscoreCommand,3,"r",0,NULL,1,1,1,0,0},
173 {"zrank",zrankCommand,3,"r",0,NULL,1,1,1,0,0},
174 {"zrevrank",zrevrankCommand,3,"r",0,NULL,1,1,1,0,0},
175 {"hset",hsetCommand,4,"wm",0,NULL,1,1,1,0,0},
176 {"hsetnx",hsetnxCommand,4,"wm",0,NULL,1,1,1,0,0},
177 {"hget",hgetCommand,3,"r",0,NULL,1,1,1,0,0},
178 {"hmset",hmsetCommand,-4,"wm",0,NULL,1,1,1,0,0},
179 {"hmget",hmgetCommand,-3,"r",0,NULL,1,1,1,0,0},
180 {"hincrby",hincrbyCommand,4,"wm",0,NULL,1,1,1,0,0},
181 {"hincrbyfloat",hincrbyfloatCommand,4,"wm",0,NULL,1,1,1,0,0},
182 {"hdel",hdelCommand,-3,"w",0,NULL,1,1,1,0,0},
183 {"hlen",hlenCommand,2,"r",0,NULL,1,1,1,0,0},
184 {"hkeys",hkeysCommand,2,"rS",0,NULL,1,1,1,0,0},
185 {"hvals",hvalsCommand,2,"rS",0,NULL,1,1,1,0,0},
186 {"hgetall",hgetallCommand,2,"r",0,NULL,1,1,1,0,0},
187 {"hexists",hexistsCommand,3,"r",0,NULL,1,1,1,0,0},
188 {"incrby",incrbyCommand,3,"wm",0,NULL,1,1,1,0,0},
189 {"decrby",decrbyCommand,3,"wm",0,NULL,1,1,1,0,0},
190 {"incrbyfloat",incrbyfloatCommand,3,"wm",0,NULL,1,1,1,0,0},
191 {"getset",getsetCommand,3,"wm",0,NULL,1,1,1,0,0},
192 {"mset",msetCommand,-3,"wm",0,NULL,1,-1,2,0,0},
193 {"msetnx",msetnxCommand,-3,"wm",0,NULL,1,-1,2,0,0},
194 {"randomkey",randomkeyCommand,1,"rR",0,NULL,0,0,0,0,0},
195 {"select",selectCommand,2,"r",0,NULL,0,0,0,0,0},
196 {"move",moveCommand,3,"w",0,NULL,1,1,1,0,0},
197 {"rename",renameCommand,3,"w",0,renameGetKeys,1,2,1,0,0},
198 {"renamenx",renamenxCommand,3,"w",0,renameGetKeys,1,2,1,0,0},
199 {"expire",expireCommand,3,"w",0,NULL,1,1,1,0,0},
200 {"expireat",expireatCommand,3,"w",0,NULL,1,1,1,0,0},
201 {"pexpire",pexpireCommand,3,"w",0,NULL,1,1,1,0,0},
202 {"pexpireat",pexpireatCommand,3,"w",0,NULL,1,1,1,0,0},
203 {"keys",keysCommand,2,"rS",0,NULL,0,0,0,0,0},
204 {"dbsize",dbsizeCommand,1,"r",0,NULL,0,0,0,0,0},
205 {"auth",authCommand,2,"rs",0,NULL,0,0,0,0,0},
206 {"ping",pingCommand,1,"r",0,NULL,0,0,0,0,0},
207 {"echo",echoCommand,2,"r",0,NULL,0,0,0,0,0},
208 {"save",saveCommand,1,"ars",0,NULL,0,0,0,0,0},
209 {"bgsave",bgsaveCommand,1,"ar",0,NULL,0,0,0,0,0},
210 {"bgrewriteaof",bgrewriteaofCommand,1,"ar",0,NULL,0,0,0,0,0},
211 {"shutdown",shutdownCommand,-1,"ar",0,NULL,0,0,0,0,0},
212 {"lastsave",lastsaveCommand,1,"r",0,NULL,0,0,0,0,0},
213 {"type",typeCommand,2,"r",0,NULL,1,1,1,0,0},
214 {"multi",multiCommand,1,"rs",0,NULL,0,0,0,0,0},
215 {"exec",execCommand,1,"s",0,NULL,0,0,0,0,0},
216 {"discard",discardCommand,1,"rs",0,NULL,0,0,0,0,0},
217 {"sync",syncCommand,1,"ars",0,NULL,0,0,0,0,0},
218 {"flushdb",flushdbCommand,1,"w",0,NULL,0,0,0,0,0},
219 {"flushall",flushallCommand,1,"w",0,NULL,0,0,0,0,0},
220 {"sort",sortCommand,-2,"wmS",0,NULL,1,1,1,0,0},
221 {"info",infoCommand,-1,"r",0,NULL,0,0,0,0,0},
222 {"monitor",monitorCommand,1,"ars",0,NULL,0,0,0,0,0},
223 {"ttl",ttlCommand,2,"r",0,NULL,1,1,1,0,0},
224 {"pttl",pttlCommand,2,"r",0,NULL,1,1,1,0,0},
225 {"persist",persistCommand,2,"w",0,NULL,1,1,1,0,0},
226 {"slaveof",slaveofCommand,3,"as",0,NULL,0,0,0,0,0},
227 {"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0},
228 {"config",configCommand,-2,"ar",0,NULL,0,0,0,0,0},
229 {"subscribe",subscribeCommand,-2,"rps",0,NULL,0,0,0,0,0},
230 {"unsubscribe",unsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0},
231 {"psubscribe",psubscribeCommand,-2,"rps",0,NULL,0,0,0,0,0},
232 {"punsubscribe",punsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0},
233 {"publish",publishCommand,3,"pf",0,NULL,0,0,0,0,0},
234 {"watch",watchCommand,-2,"rs",0,noPreloadGetKeys,1,-1,1,0,0},
235 {"unwatch",unwatchCommand,1,"rs",0,NULL,0,0,0,0,0},
236 {"restore",restoreCommand,4,"awm",0,NULL,1,1,1,0,0},
237 {"migrate",migrateCommand,6,"aw",0,NULL,0,0,0,0,0},
238 {"dump",dumpCommand,2,"ar",0,NULL,1,1,1,0,0},
239 {"object",objectCommand,-2,"r",0,NULL,2,2,2,0,0},
240 {"client",clientCommand,-2,"ar",0,NULL,0,0,0,0,0},
241 {"eval",evalCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0},
242 {"evalsha",evalShaCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0},
243 {"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0},
244 {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0},
245 {"time",timeCommand,1,"rR",0,NULL,0,0,0,0,0}
246 };
247
248 /*============================ Utility functions ============================ */
249
250 /* Low level logging. To use only for very big messages, otherwise
251 * redisLog() is to prefer. */
252 void redisLogRaw(int level, const char *msg) {
253 const int syslogLevelMap[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE, LOG_WARNING };
254 const char *c = ".-*#";
255 FILE *fp;
256 char buf[64];
257 int rawmode = (level & REDIS_LOG_RAW);
258
259 level &= 0xff; /* clear flags */
260 if (level < server.verbosity) return;
261
262 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
263 if (!fp) return;
264
265 if (rawmode) {
266 fprintf(fp,"%s",msg);
267 } else {
268 int off;
269 struct timeval tv;
270
271 gettimeofday(&tv,NULL);
272 off = strftime(buf,sizeof(buf),"%d %b %H:%M:%S.",localtime(&tv.tv_sec));
273 snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000);
274 fprintf(fp,"[%d] %s %c %s\n",(int)getpid(),buf,c[level],msg);
275 }
276 fflush(fp);
277
278 if (server.logfile) fclose(fp);
279
280 if (server.syslog_enabled) syslog(syslogLevelMap[level], "%s", msg);
281 }
282
283 /* Like redisLogRaw() but with printf-alike support. This is the funciton that
284 * is used across the code. The raw version is only used in order to dump
285 * the INFO output on crash. */
286 void redisLog(int level, const char *fmt, ...) {
287 va_list ap;
288 char msg[REDIS_MAX_LOGMSG_LEN];
289
290 if ((level&0xff) < server.verbosity) return;
291
292 va_start(ap, fmt);
293 vsnprintf(msg, sizeof(msg), fmt, ap);
294 va_end(ap);
295
296 redisLogRaw(level,msg);
297 }
298
299 /* Log a fixed message without printf-alike capabilities, in a way that is
300 * safe to call from a signal handler.
301 *
302 * We actually use this only for signals that are not fatal from the point
303 * of view of Redis. Signals that are going to kill the server anyway and
304 * where we need printf-alike features are served by redisLog(). */
305 void redisLogFromHandler(int level, const char *msg) {
306 int fd;
307 char buf[64];
308
309 if ((level&0xff) < server.verbosity ||
310 (server.logfile == NULL && server.daemonize)) return;
311 fd = server.logfile ?
312 open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644) :
313 STDOUT_FILENO;
314 if (fd == -1) return;
315 ll2string(buf,sizeof(buf),getpid());
316 if (write(fd,"[",1) == -1) goto err;
317 if (write(fd,buf,strlen(buf)) == -1) goto err;
318 if (write(fd," | signal handler] (",20) == -1) goto err;
319 ll2string(buf,sizeof(buf),time(NULL));
320 if (write(fd,buf,strlen(buf)) == -1) goto err;
321 if (write(fd,") ",2) == -1) goto err;
322 if (write(fd,msg,strlen(msg)) == -1) goto err;
323 if (write(fd,"\n",1) == -1) goto err;
324 err:
325 if (server.logfile) close(fd);
326 }
327
328 /* Redis generally does not try to recover from out of memory conditions
329 * when allocating objects or strings, it is not clear if it will be possible
330 * to report this condition to the client since the networking layer itself
331 * is based on heap allocation for send buffers, so we simply abort.
332 * At least the code will be simpler to read... */
333 void oom(const char *msg) {
334 redisLog(REDIS_WARNING, "%s: Out of memory\n",msg);
335 sleep(1);
336 abort();
337 }
338
339 /* Return the UNIX time in microseconds */
340 long long ustime(void) {
341 struct timeval tv;
342 long long ust;
343
344 gettimeofday(&tv, NULL);
345 ust = ((long long)tv.tv_sec)*1000000;
346 ust += tv.tv_usec;
347 return ust;
348 }
349
350 /* Return the UNIX time in milliseconds */
351 long long mstime(void) {
352 return ustime()/1000;
353 }
354
355 /* After an RDB dump or AOF rewrite we exit from children using _exit() instead of
356 * exit(), because the latter may interact with the same file objects used by
357 * the parent process. However if we are testing the coverage normal exit() is
358 * used in order to obtain the right coverage information. */
359 void exitFromChild(int retcode) {
360 #ifdef COVERAGE_TEST
361 exit(retcode);
362 #else
363 _exit(retcode);
364 #endif
365 }
366
367 /*====================== Hash table type implementation ==================== */
368
369 /* This is an hash table type that uses the SDS dynamic strings libary as
370 * keys and radis objects as values (objects can hold SDS strings,
371 * lists, sets). */
372
373 void dictVanillaFree(void *privdata, void *val)
374 {
375 DICT_NOTUSED(privdata);
376 zfree(val);
377 }
378
379 void dictListDestructor(void *privdata, void *val)
380 {
381 DICT_NOTUSED(privdata);
382 listRelease((list*)val);
383 }
384
385 int dictSdsKeyCompare(void *privdata, const void *key1,
386 const void *key2)
387 {
388 int l1,l2;
389 DICT_NOTUSED(privdata);
390
391 l1 = sdslen((sds)key1);
392 l2 = sdslen((sds)key2);
393 if (l1 != l2) return 0;
394 return memcmp(key1, key2, l1) == 0;
395 }
396
397 /* A case insensitive version used for the command lookup table. */
398 int dictSdsKeyCaseCompare(void *privdata, const void *key1,
399 const void *key2)
400 {
401 DICT_NOTUSED(privdata);
402
403 return strcasecmp(key1, key2) == 0;
404 }
405
406 void dictRedisObjectDestructor(void *privdata, void *val)
407 {
408 DICT_NOTUSED(privdata);
409
410 if (val == NULL) return; /* Values of swapped out keys as set to NULL */
411 decrRefCount(val);
412 }
413
414 void dictSdsDestructor(void *privdata, void *val)
415 {
416 DICT_NOTUSED(privdata);
417
418 sdsfree(val);
419 }
420
421 int dictObjKeyCompare(void *privdata, const void *key1,
422 const void *key2)
423 {
424 const robj *o1 = key1, *o2 = key2;
425 return dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
426 }
427
428 unsigned int dictObjHash(const void *key) {
429 const robj *o = key;
430 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
431 }
432
433 unsigned int dictSdsHash(const void *key) {
434 return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
435 }
436
437 unsigned int dictSdsCaseHash(const void *key) {
438 return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key));
439 }
440
441 int dictEncObjKeyCompare(void *privdata, const void *key1,
442 const void *key2)
443 {
444 robj *o1 = (robj*) key1, *o2 = (robj*) key2;
445 int cmp;
446
447 if (o1->encoding == REDIS_ENCODING_INT &&
448 o2->encoding == REDIS_ENCODING_INT)
449 return o1->ptr == o2->ptr;
450
451 o1 = getDecodedObject(o1);
452 o2 = getDecodedObject(o2);
453 cmp = dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
454 decrRefCount(o1);
455 decrRefCount(o2);
456 return cmp;
457 }
458
459 unsigned int dictEncObjHash(const void *key) {
460 robj *o = (robj*) key;
461
462 if (o->encoding == REDIS_ENCODING_RAW) {
463 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
464 } else {
465 if (o->encoding == REDIS_ENCODING_INT) {
466 char buf[32];
467 int len;
468
469 len = ll2string(buf,32,(long)o->ptr);
470 return dictGenHashFunction((unsigned char*)buf, len);
471 } else {
472 unsigned int hash;
473
474 o = getDecodedObject(o);
475 hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
476 decrRefCount(o);
477 return hash;
478 }
479 }
480 }
481
482 /* Sets type hash table */
483 dictType setDictType = {
484 dictEncObjHash, /* hash function */
485 NULL, /* key dup */
486 NULL, /* val dup */
487 dictEncObjKeyCompare, /* key compare */
488 dictRedisObjectDestructor, /* key destructor */
489 NULL /* val destructor */
490 };
491
492 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
493 dictType zsetDictType = {
494 dictEncObjHash, /* hash function */
495 NULL, /* key dup */
496 NULL, /* val dup */
497 dictEncObjKeyCompare, /* key compare */
498 dictRedisObjectDestructor, /* key destructor */
499 NULL /* val destructor */
500 };
501
502 /* Db->dict, keys are sds strings, vals are Redis objects. */
503 dictType dbDictType = {
504 dictSdsHash, /* hash function */
505 NULL, /* key dup */
506 NULL, /* val dup */
507 dictSdsKeyCompare, /* key compare */
508 dictSdsDestructor, /* key destructor */
509 dictRedisObjectDestructor /* val destructor */
510 };
511
512 /* Db->expires */
513 dictType keyptrDictType = {
514 dictSdsHash, /* hash function */
515 NULL, /* key dup */
516 NULL, /* val dup */
517 dictSdsKeyCompare, /* key compare */
518 NULL, /* key destructor */
519 NULL /* val destructor */
520 };
521
522 /* Command table. sds string -> command struct pointer. */
523 dictType commandTableDictType = {
524 dictSdsCaseHash, /* hash function */
525 NULL, /* key dup */
526 NULL, /* val dup */
527 dictSdsKeyCaseCompare, /* key compare */
528 dictSdsDestructor, /* key destructor */
529 NULL /* val destructor */
530 };
531
532 /* Hash type hash table (note that small hashes are represented with zimpaps) */
533 dictType hashDictType = {
534 dictEncObjHash, /* hash function */
535 NULL, /* key dup */
536 NULL, /* val dup */
537 dictEncObjKeyCompare, /* key compare */
538 dictRedisObjectDestructor, /* key destructor */
539 dictRedisObjectDestructor /* val destructor */
540 };
541
542 /* Keylist hash table type has unencoded redis objects as keys and
543 * lists as values. It's used for blocking operations (BLPOP) and to
544 * map swapped keys to a list of clients waiting for this keys to be loaded. */
545 dictType keylistDictType = {
546 dictObjHash, /* hash function */
547 NULL, /* key dup */
548 NULL, /* val dup */
549 dictObjKeyCompare, /* key compare */
550 dictRedisObjectDestructor, /* key destructor */
551 dictListDestructor /* val destructor */
552 };
553
554 int htNeedsResize(dict *dict) {
555 long long size, used;
556
557 size = dictSlots(dict);
558 used = dictSize(dict);
559 return (size && used && size > DICT_HT_INITIAL_SIZE &&
560 (used*100/size < REDIS_HT_MINFILL));
561 }
562
563 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
564 * we resize the hash table to save memory */
565 void tryResizeHashTables(void) {
566 int j;
567
568 for (j = 0; j < server.dbnum; j++) {
569 if (htNeedsResize(server.db[j].dict))
570 dictResize(server.db[j].dict);
571 if (htNeedsResize(server.db[j].expires))
572 dictResize(server.db[j].expires);
573 }
574 }
575
576 /* Our hash table implementation performs rehashing incrementally while
577 * we write/read from the hash table. Still if the server is idle, the hash
578 * table will use two tables for a long time. So we try to use 1 millisecond
579 * of CPU time at every serverCron() loop in order to rehash some key. */
580 void incrementallyRehash(void) {
581 int j;
582
583 for (j = 0; j < server.dbnum; j++) {
584 /* Keys dictionary */
585 if (dictIsRehashing(server.db[j].dict)) {
586 dictRehashMilliseconds(server.db[j].dict,1);
587 break; /* already used our millisecond for this loop... */
588 }
589 /* Expires */
590 if (dictIsRehashing(server.db[j].expires)) {
591 dictRehashMilliseconds(server.db[j].expires,1);
592 break; /* already used our millisecond for this loop... */
593 }
594 }
595 }
596
597 /* This function is called once a background process of some kind terminates,
598 * as we want to avoid resizing the hash tables when there is a child in order
599 * to play well with copy-on-write (otherwise when a resize happens lots of
600 * memory pages are copied). The goal of this function is to update the ability
601 * for dict.c to resize the hash tables accordingly to the fact we have o not
602 * running childs. */
603 void updateDictResizePolicy(void) {
604 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1)
605 dictEnableResize();
606 else
607 dictDisableResize();
608 }
609
610 /* ======================= Cron: called every 100 ms ======================== */
611
612 /* Try to expire a few timed out keys. The algorithm used is adaptive and
613 * will use few CPU cycles if there are few expiring keys, otherwise
614 * it will get more aggressive to avoid that too much memory is used by
615 * keys that can be removed from the keyspace. */
616 void activeExpireCycle(void) {
617 int j;
618 long long start = ustime(), timelimit;
619
620 /* We can use at max REDIS_EXPIRELOOKUPS_TIME_PERC percentage of CPU time
621 * per iteration. Since this function gets called with a frequency of
622 * REDIS_HZ times per second, the following is the max amount of
623 * microseconds we can spend in this function. */
624 timelimit = 1000000*REDIS_EXPIRELOOKUPS_TIME_PERC/REDIS_HZ/100;
625 if (timelimit <= 0) timelimit = 1;
626
627 for (j = 0; j < server.dbnum; j++) {
628 int expired, iteration = 0;
629 redisDb *db = server.db+j;
630
631 /* Continue to expire if at the end of the cycle more than 25%
632 * of the keys were expired. */
633 do {
634 unsigned long num = dictSize(db->expires);
635 unsigned long slots = dictSlots(db->expires);
636 long long now = mstime();
637
638 /* When there are less than 1% filled slots getting random
639 * keys is expensive, so stop here waiting for better times...
640 * The dictionary will be resized asap. */
641 if (num && slots > DICT_HT_INITIAL_SIZE &&
642 (num*100/slots < 1)) break;
643
644 expired = 0;
645 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
646 num = REDIS_EXPIRELOOKUPS_PER_CRON;
647 while (num--) {
648 dictEntry *de;
649 long long t;
650
651 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
652 t = dictGetSignedIntegerVal(de);
653 if (now > t) {
654 sds key = dictGetKey(de);
655 robj *keyobj = createStringObject(key,sdslen(key));
656
657 propagateExpire(db,keyobj);
658 dbDelete(db,keyobj);
659 decrRefCount(keyobj);
660 expired++;
661 server.stat_expiredkeys++;
662 }
663 }
664 /* We can't block forever here even if there are many keys to
665 * expire. So after a given amount of milliseconds return to the
666 * caller waiting for the other active expire cycle. */
667 iteration++;
668 if ((iteration & 0xff) == 0 && /* Check once every 255 iterations */
669 (ustime()-start) > timelimit) return;
670 } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
671 }
672 }
673
674 void updateLRUClock(void) {
675 server.lruclock = (server.unixtime/REDIS_LRU_CLOCK_RESOLUTION) &
676 REDIS_LRU_CLOCK_MAX;
677 }
678
679
680 /* Add a sample to the operations per second array of samples. */
681 void trackOperationsPerSecond(void) {
682 long long t = mstime() - server.ops_sec_last_sample_time;
683 long long ops = server.stat_numcommands - server.ops_sec_last_sample_ops;
684 long long ops_sec;
685
686 ops_sec = t > 0 ? (ops*1000/t) : 0;
687
688 server.ops_sec_samples[server.ops_sec_idx] = ops_sec;
689 server.ops_sec_idx = (server.ops_sec_idx+1) % REDIS_OPS_SEC_SAMPLES;
690 server.ops_sec_last_sample_time = mstime();
691 server.ops_sec_last_sample_ops = server.stat_numcommands;
692 }
693
694 /* Return the mean of all the samples. */
695 long long getOperationsPerSecond(void) {
696 int j;
697 long long sum = 0;
698
699 for (j = 0; j < REDIS_OPS_SEC_SAMPLES; j++)
700 sum += server.ops_sec_samples[j];
701 return sum / REDIS_OPS_SEC_SAMPLES;
702 }
703
704 /* Check for timeouts. Returns non-zero if the client was terminated */
705 int clientsCronHandleTimeout(redisClient *c) {
706 time_t now = server.unixtime;
707
708 if (server.maxidletime &&
709 !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
710 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
711 !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */
712 dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */
713 listLength(c->pubsub_patterns) == 0 &&
714 (now - c->lastinteraction > server.maxidletime))
715 {
716 redisLog(REDIS_VERBOSE,"Closing idle client");
717 freeClient(c);
718 return 1;
719 } else if (c->flags & REDIS_BLOCKED) {
720 if (c->bpop.timeout != 0 && c->bpop.timeout < now) {
721 addReply(c,shared.nullmultibulk);
722 unblockClientWaitingData(c);
723 }
724 }
725 return 0;
726 }
727
728 /* The client query buffer is an sds.c string that can end with a lot of
729 * free space not used, this function reclaims space if needed.
730 *
731 * The funciton always returns 0 as it never terminates the client. */
732 int clientsCronResizeQueryBuffer(redisClient *c) {
733 size_t querybuf_size = sdsAllocSize(c->querybuf);
734 time_t idletime = server.unixtime - c->lastinteraction;
735
736 /* There are two conditions to resize the query buffer:
737 * 1) Query buffer is > BIG_ARG and too big for latest peak.
738 * 2) Client is inactive and the buffer is bigger than 1k. */
739 if (((querybuf_size > REDIS_MBULK_BIG_ARG) &&
740 (querybuf_size/(c->querybuf_peak+1)) > 2) ||
741 (querybuf_size > 1024 && idletime > 2))
742 {
743 /* Only resize the query buffer if it is actually wasting space. */
744 if (sdsavail(c->querybuf) > 1024) {
745 c->querybuf = sdsRemoveFreeSpace(c->querybuf);
746 }
747 }
748 /* Reset the peak again to capture the peak memory usage in the next
749 * cycle. */
750 c->querybuf_peak = 0;
751 return 0;
752 }
753
754 void clientsCron(void) {
755 /* Make sure to process at least 1/(REDIS_HZ*10) of clients per call.
756 * Since this function is called REDIS_HZ times per second we are sure that
757 * in the worst case we process all the clients in 10 seconds.
758 * In normal conditions (a reasonable number of clients) we process
759 * all the clients in a shorter time. */
760 int numclients = listLength(server.clients);
761 int iterations = numclients/(REDIS_HZ*10);
762
763 if (iterations < 50)
764 iterations = (numclients < 50) ? numclients : 50;
765 while(listLength(server.clients) && iterations--) {
766 redisClient *c;
767 listNode *head;
768
769 /* Rotate the list, take the current head, process.
770 * This way if the client must be removed from the list it's the
771 * first element and we don't incur into O(N) computation. */
772 listRotate(server.clients);
773 head = listFirst(server.clients);
774 c = listNodeValue(head);
775 /* The following functions do different service checks on the client.
776 * The protocol is that they return non-zero if the client was
777 * terminated. */
778 if (clientsCronHandleTimeout(c)) continue;
779 if (clientsCronResizeQueryBuffer(c)) continue;
780 }
781 }
782
783 /* This is our timer interrupt, called REDIS_HZ times per second.
784 * Here is where we do a number of things that need to be done asynchronously.
785 * For instance:
786 *
787 * - Active expired keys collection (it is also performed in a lazy way on
788 * lookup).
789 * - Software watchdong.
790 * - Update some statistic.
791 * - Incremental rehashing of the DBs hash tables.
792 * - Triggering BGSAVE / AOF rewrite, and handling of terminated children.
793 * - Clients timeout of differnet kinds.
794 * - Replication reconnection.
795 * - Many more...
796 *
797 * Everything directly called here will be called REDIS_HZ times per second,
798 * so in order to throttle execution of things we want to do less frequently
799 * a macro is used: run_with_period(milliseconds) { .... }
800 */
801
802 /* Using the following macro you can run code inside serverCron() with the
803 * specified period, specified in milliseconds.
804 * The actual resolution depends on REDIS_HZ. */
805 #define run_with_period(_ms_) if (!(loops % ((_ms_)/(1000/REDIS_HZ))))
806
807 int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
808 int j, loops = server.cronloops;
809 REDIS_NOTUSED(eventLoop);
810 REDIS_NOTUSED(id);
811 REDIS_NOTUSED(clientData);
812
813 /* Software watchdog: deliver the SIGALRM that will reach the signal
814 * handler if we don't return here fast enough. */
815 if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period);
816
817 /* We take a cached value of the unix time in the global state because
818 * with virtual memory and aging there is to store the current time
819 * in objects at every object access, and accuracy is not needed.
820 * To access a global var is faster than calling time(NULL) */
821 server.unixtime = time(NULL);
822
823 run_with_period(100) trackOperationsPerSecond();
824
825 /* We have just 22 bits per object for LRU information.
826 * So we use an (eventually wrapping) LRU clock with 10 seconds resolution.
827 * 2^22 bits with 10 seconds resoluton is more or less 1.5 years.
828 *
829 * Note that even if this will wrap after 1.5 years it's not a problem,
830 * everything will still work but just some object will appear younger
831 * to Redis. But for this to happen a given object should never be touched
832 * for 1.5 years.
833 *
834 * Note that you can change the resolution altering the
835 * REDIS_LRU_CLOCK_RESOLUTION define.
836 */
837 updateLRUClock();
838
839 /* Record the max memory used since the server was started. */
840 if (zmalloc_used_memory() > server.stat_peak_memory)
841 server.stat_peak_memory = zmalloc_used_memory();
842
843 /* We received a SIGTERM, shutting down here in a safe way, as it is
844 * not ok doing so inside the signal handler. */
845 if (server.shutdown_asap) {
846 if (prepareForShutdown(0) == REDIS_OK) exit(0);
847 redisLog(REDIS_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
848 }
849
850 /* Show some info about non-empty databases */
851 run_with_period(5000) {
852 for (j = 0; j < server.dbnum; j++) {
853 long long size, used, vkeys;
854
855 size = dictSlots(server.db[j].dict);
856 used = dictSize(server.db[j].dict);
857 vkeys = dictSize(server.db[j].expires);
858 if (used || vkeys) {
859 redisLog(REDIS_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
860 /* dictPrintStats(server.dict); */
861 }
862 }
863 }
864
865 /* We don't want to resize the hash tables while a bacground saving
866 * is in progress: the saving child is created using fork() that is
867 * implemented with a copy-on-write semantic in most modern systems, so
868 * if we resize the HT while there is the saving child at work actually
869 * a lot of memory movements in the parent will cause a lot of pages
870 * copied. */
871 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1) {
872 tryResizeHashTables();
873 if (server.activerehashing) incrementallyRehash();
874 }
875
876 /* Show information about connected clients */
877 run_with_period(5000) {
878 redisLog(REDIS_VERBOSE,"%d clients connected (%d slaves), %zu bytes in use",
879 listLength(server.clients)-listLength(server.slaves),
880 listLength(server.slaves),
881 zmalloc_used_memory());
882 }
883
884 /* We need to do a few operations on clients asynchronously. */
885 clientsCron();
886
887 /* Start a scheduled AOF rewrite if this was requested by the user while
888 * a BGSAVE was in progress. */
889 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1 &&
890 server.aof_rewrite_scheduled)
891 {
892 rewriteAppendOnlyFileBackground();
893 }
894
895 /* Check if a background saving or AOF rewrite in progress terminated. */
896 if (server.rdb_child_pid != -1 || server.aof_child_pid != -1) {
897 int statloc;
898 pid_t pid;
899
900 if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
901 int exitcode = WEXITSTATUS(statloc);
902 int bysignal = 0;
903
904 if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
905
906 if (pid == server.rdb_child_pid) {
907 backgroundSaveDoneHandler(exitcode,bysignal);
908 } else {
909 backgroundRewriteDoneHandler(exitcode,bysignal);
910 }
911 updateDictResizePolicy();
912 }
913 } else {
914 /* If there is not a background saving/rewrite in progress check if
915 * we have to save/rewrite now */
916 for (j = 0; j < server.saveparamslen; j++) {
917 struct saveparam *sp = server.saveparams+j;
918
919 if (server.dirty >= sp->changes &&
920 server.unixtime-server.lastsave > sp->seconds) {
921 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
922 sp->changes, sp->seconds);
923 rdbSaveBackground(server.rdb_filename);
924 break;
925 }
926 }
927
928 /* Trigger an AOF rewrite if needed */
929 if (server.rdb_child_pid == -1 &&
930 server.aof_child_pid == -1 &&
931 server.aof_rewrite_perc &&
932 server.aof_current_size > server.aof_rewrite_min_size)
933 {
934 long long base = server.aof_rewrite_base_size ?
935 server.aof_rewrite_base_size : 1;
936 long long growth = (server.aof_current_size*100/base) - 100;
937 if (growth >= server.aof_rewrite_perc) {
938 redisLog(REDIS_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
939 rewriteAppendOnlyFileBackground();
940 }
941 }
942 }
943
944
945 /* If we postponed an AOF buffer flush, let's try to do it every time the
946 * cron function is called. */
947 if (server.aof_flush_postponed_start) flushAppendOnlyFile(0);
948
949 /* Expire a few keys per cycle, only if this is a master.
950 * On slaves we wait for DEL operations synthesized by the master
951 * in order to guarantee a strict consistency. */
952 if (server.masterhost == NULL) activeExpireCycle();
953
954 /* Close clients that need to be closed asynchronous */
955 freeClientsInAsyncFreeQueue();
956
957 /* Replication cron function -- used to reconnect to master and
958 * to detect transfer failures. */
959 run_with_period(1000) replicationCron();
960
961 server.cronloops++;
962 return 1000/REDIS_HZ;
963 }
964
965 /* This function gets called every time Redis is entering the
966 * main loop of the event driven library, that is, before to sleep
967 * for ready file descriptors. */
968 void beforeSleep(struct aeEventLoop *eventLoop) {
969 REDIS_NOTUSED(eventLoop);
970 listNode *ln;
971 redisClient *c;
972
973 /* Try to process pending commands for clients that were just unblocked. */
974 while (listLength(server.unblocked_clients)) {
975 ln = listFirst(server.unblocked_clients);
976 redisAssert(ln != NULL);
977 c = ln->value;
978 listDelNode(server.unblocked_clients,ln);
979 c->flags &= ~REDIS_UNBLOCKED;
980
981 /* Process remaining data in the input buffer. */
982 if (c->querybuf && sdslen(c->querybuf) > 0) {
983 server.current_client = c;
984 processInputBuffer(c);
985 server.current_client = NULL;
986 }
987 }
988
989 /* Write the AOF buffer on disk */
990 flushAppendOnlyFile(0);
991 }
992
993 /* =========================== Server initialization ======================== */
994
995 void createSharedObjects(void) {
996 int j;
997
998 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
999 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
1000 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
1001 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
1002 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
1003 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
1004 shared.cnegone = createObject(REDIS_STRING,sdsnew(":-1\r\n"));
1005 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
1006 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
1007 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
1008 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
1009 shared.queued = createObject(REDIS_STRING,sdsnew("+QUEUED\r\n"));
1010 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
1011 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1012 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
1013 "-ERR no such key\r\n"));
1014 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
1015 "-ERR syntax error\r\n"));
1016 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
1017 "-ERR source and destination objects are the same\r\n"));
1018 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
1019 "-ERR index out of range\r\n"));
1020 shared.noscripterr = createObject(REDIS_STRING,sdsnew(
1021 "-NOSCRIPT No matching script. Please use EVAL.\r\n"));
1022 shared.loadingerr = createObject(REDIS_STRING,sdsnew(
1023 "-LOADING Redis is loading the dataset in memory\r\n"));
1024 shared.slowscripterr = createObject(REDIS_STRING,sdsnew(
1025 "-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n"));
1026 shared.masterdownerr = createObject(REDIS_STRING,sdsnew(
1027 "-MASTERDOWN Link with MASTER is down and slave-serve-stale-data is set to 'no'.\r\n"));
1028 shared.bgsaveerr = createObject(REDIS_STRING,sdsnew(
1029 "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error.\r\n"));
1030 shared.roslaveerr = createObject(REDIS_STRING,sdsnew(
1031 "-READONLY You can't write against a read only slave.\r\n"));
1032 shared.oomerr = createObject(REDIS_STRING,sdsnew(
1033 "-OOM command not allowed when used memory > 'maxmemory'.\r\n"));
1034 shared.space = createObject(REDIS_STRING,sdsnew(" "));
1035 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
1036 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
1037
1038 for (j = 0; j < REDIS_SHARED_SELECT_CMDS; j++) {
1039 shared.select[j] = createObject(REDIS_STRING,
1040 sdscatprintf(sdsempty(),"select %d\r\n", j));
1041 }
1042 shared.messagebulk = createStringObject("$7\r\nmessage\r\n",13);
1043 shared.pmessagebulk = createStringObject("$8\r\npmessage\r\n",14);
1044 shared.subscribebulk = createStringObject("$9\r\nsubscribe\r\n",15);
1045 shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18);
1046 shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
1047 shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
1048 shared.del = createStringObject("DEL",3);
1049 shared.rpop = createStringObject("RPOP",4);
1050 shared.lpop = createStringObject("LPOP",4);
1051 for (j = 0; j < REDIS_SHARED_INTEGERS; j++) {
1052 shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j);
1053 shared.integers[j]->encoding = REDIS_ENCODING_INT;
1054 }
1055 for (j = 0; j < REDIS_SHARED_BULKHDR_LEN; j++) {
1056 shared.mbulkhdr[j] = createObject(REDIS_STRING,
1057 sdscatprintf(sdsempty(),"*%d\r\n",j));
1058 shared.bulkhdr[j] = createObject(REDIS_STRING,
1059 sdscatprintf(sdsempty(),"$%d\r\n",j));
1060 }
1061 }
1062
1063 void initServerConfig() {
1064 getRandomHexChars(server.runid,REDIS_RUN_ID_SIZE);
1065 server.runid[REDIS_RUN_ID_SIZE] = '\0';
1066 server.arch_bits = (sizeof(long) == 8) ? 64 : 32;
1067 server.port = REDIS_SERVERPORT;
1068 server.bindaddr = NULL;
1069 server.unixsocket = NULL;
1070 server.unixsocketperm = 0;
1071 server.ipfd = -1;
1072 server.sofd = -1;
1073 server.dbnum = REDIS_DEFAULT_DBNUM;
1074 server.verbosity = REDIS_NOTICE;
1075 server.maxidletime = REDIS_MAXIDLETIME;
1076 server.client_max_querybuf_len = REDIS_MAX_QUERYBUF_LEN;
1077 server.saveparams = NULL;
1078 server.loading = 0;
1079 server.logfile = NULL; /* NULL = log on standard output */
1080 server.syslog_enabled = 0;
1081 server.syslog_ident = zstrdup("redis");
1082 server.syslog_facility = LOG_LOCAL0;
1083 server.daemonize = 0;
1084 server.aof_state = REDIS_AOF_OFF;
1085 server.aof_fsync = AOF_FSYNC_EVERYSEC;
1086 server.aof_no_fsync_on_rewrite = 0;
1087 server.aof_rewrite_perc = REDIS_AOF_REWRITE_PERC;
1088 server.aof_rewrite_min_size = REDIS_AOF_REWRITE_MIN_SIZE;
1089 server.aof_rewrite_base_size = 0;
1090 server.aof_rewrite_scheduled = 0;
1091 server.aof_last_fsync = time(NULL);
1092 server.aof_delayed_fsync = 0;
1093 server.aof_fd = -1;
1094 server.aof_selected_db = -1; /* Make sure the first time will not match */
1095 server.aof_flush_postponed_start = 0;
1096 server.pidfile = zstrdup("/var/run/redis.pid");
1097 server.rdb_filename = zstrdup("dump.rdb");
1098 server.aof_filename = zstrdup("appendonly.aof");
1099 server.requirepass = NULL;
1100 server.rdb_compression = 1;
1101 server.rdb_checksum = 1;
1102 server.activerehashing = 1;
1103 server.maxclients = REDIS_MAX_CLIENTS;
1104 server.bpop_blocked_clients = 0;
1105 server.maxmemory = 0;
1106 server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU;
1107 server.maxmemory_samples = 3;
1108 server.hash_max_ziplist_entries = REDIS_HASH_MAX_ZIPLIST_ENTRIES;
1109 server.hash_max_ziplist_value = REDIS_HASH_MAX_ZIPLIST_VALUE;
1110 server.list_max_ziplist_entries = REDIS_LIST_MAX_ZIPLIST_ENTRIES;
1111 server.list_max_ziplist_value = REDIS_LIST_MAX_ZIPLIST_VALUE;
1112 server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES;
1113 server.zset_max_ziplist_entries = REDIS_ZSET_MAX_ZIPLIST_ENTRIES;
1114 server.zset_max_ziplist_value = REDIS_ZSET_MAX_ZIPLIST_VALUE;
1115 server.shutdown_asap = 0;
1116 server.repl_ping_slave_period = REDIS_REPL_PING_SLAVE_PERIOD;
1117 server.repl_timeout = REDIS_REPL_TIMEOUT;
1118 server.lua_caller = NULL;
1119 server.lua_time_limit = REDIS_LUA_TIME_LIMIT;
1120 server.lua_client = NULL;
1121 server.lua_timedout = 0;
1122
1123 updateLRUClock();
1124 resetServerSaveParams();
1125
1126 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1127 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1128 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1129 /* Replication related */
1130 server.masterauth = NULL;
1131 server.masterhost = NULL;
1132 server.masterport = 6379;
1133 server.master = NULL;
1134 server.repl_state = REDIS_REPL_NONE;
1135 server.repl_syncio_timeout = REDIS_REPL_SYNCIO_TIMEOUT;
1136 server.repl_serve_stale_data = 1;
1137 server.repl_slave_ro = 1;
1138 server.repl_down_since = time(NULL);
1139
1140 /* Client output buffer limits */
1141 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].hard_limit_bytes = 0;
1142 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].soft_limit_bytes = 0;
1143 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].soft_limit_seconds = 0;
1144 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].hard_limit_bytes = 1024*1024*256;
1145 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].soft_limit_bytes = 1024*1024*64;
1146 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].soft_limit_seconds = 60;
1147 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].hard_limit_bytes = 1024*1024*32;
1148 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].soft_limit_bytes = 1024*1024*8;
1149 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].soft_limit_seconds = 60;
1150
1151 /* Double constants initialization */
1152 R_Zero = 0.0;
1153 R_PosInf = 1.0/R_Zero;
1154 R_NegInf = -1.0/R_Zero;
1155 R_Nan = R_Zero/R_Zero;
1156
1157 /* Command table -- we intiialize it here as it is part of the
1158 * initial configuration, since command names may be changed via
1159 * redis.conf using the rename-command directive. */
1160 server.commands = dictCreate(&commandTableDictType,NULL);
1161 populateCommandTable();
1162 server.delCommand = lookupCommandByCString("del");
1163 server.multiCommand = lookupCommandByCString("multi");
1164 server.lpushCommand = lookupCommandByCString("lpush");
1165
1166 /* Slow log */
1167 server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN;
1168 server.slowlog_max_len = REDIS_SLOWLOG_MAX_LEN;
1169
1170 /* Debugging */
1171 server.assert_failed = "<no assertion failed>";
1172 server.assert_file = "<no file>";
1173 server.assert_line = 0;
1174 server.bug_report_start = 0;
1175 server.watchdog_period = 0;
1176 }
1177
1178 /* This function will try to raise the max number of open files accordingly to
1179 * the configured max number of clients. It will also account for 32 additional
1180 * file descriptors as we need a few more for persistence, listening
1181 * sockets, log files and so forth.
1182 *
1183 * If it will not be possible to set the limit accordingly to the configured
1184 * max number of clients, the function will do the reverse setting
1185 * server.maxclients to the value that we can actually handle. */
1186 void adjustOpenFilesLimit(void) {
1187 rlim_t maxfiles = server.maxclients+32;
1188 struct rlimit limit;
1189
1190 if (getrlimit(RLIMIT_NOFILE,&limit) == -1) {
1191 redisLog(REDIS_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.",
1192 strerror(errno));
1193 server.maxclients = 1024-32;
1194 } else {
1195 rlim_t oldlimit = limit.rlim_cur;
1196
1197 /* Set the max number of files if the current limit is not enough
1198 * for our needs. */
1199 if (oldlimit < maxfiles) {
1200 rlim_t f;
1201
1202 f = maxfiles;
1203 while(f > oldlimit) {
1204 limit.rlim_cur = f;
1205 limit.rlim_max = f;
1206 if (setrlimit(RLIMIT_NOFILE,&limit) != -1) break;
1207 f -= 128;
1208 }
1209 if (f < oldlimit) f = oldlimit;
1210 if (f != maxfiles) {
1211 server.maxclients = f-32;
1212 redisLog(REDIS_WARNING,"Unable to set the max number of files limit to %d (%s), setting the max clients configuration to %d.",
1213 (int) maxfiles, strerror(errno), (int) server.maxclients);
1214 } else {
1215 redisLog(REDIS_NOTICE,"Max number of open files set to %d",
1216 (int) maxfiles);
1217 }
1218 }
1219 }
1220 }
1221
1222 void initServer() {
1223 int j;
1224
1225 signal(SIGHUP, SIG_IGN);
1226 signal(SIGPIPE, SIG_IGN);
1227 setupSignalHandlers();
1228
1229 if (server.syslog_enabled) {
1230 openlog(server.syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT,
1231 server.syslog_facility);
1232 }
1233
1234 server.current_client = NULL;
1235 server.clients = listCreate();
1236 server.clients_to_close = listCreate();
1237 server.slaves = listCreate();
1238 server.monitors = listCreate();
1239 server.unblocked_clients = listCreate();
1240
1241 createSharedObjects();
1242 adjustOpenFilesLimit();
1243 server.el = aeCreateEventLoop(server.maxclients+1024);
1244 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
1245
1246 if (server.port != 0) {
1247 server.ipfd = anetTcpServer(server.neterr,server.port,server.bindaddr);
1248 if (server.ipfd == ANET_ERR) {
1249 redisLog(REDIS_WARNING, "Opening port %d: %s",
1250 server.port, server.neterr);
1251 exit(1);
1252 }
1253 }
1254 if (server.unixsocket != NULL) {
1255 unlink(server.unixsocket); /* don't care if this fails */
1256 server.sofd = anetUnixServer(server.neterr,server.unixsocket,server.unixsocketperm);
1257 if (server.sofd == ANET_ERR) {
1258 redisLog(REDIS_WARNING, "Opening socket: %s", server.neterr);
1259 exit(1);
1260 }
1261 }
1262 if (server.ipfd < 0 && server.sofd < 0) {
1263 redisLog(REDIS_WARNING, "Configured to not listen anywhere, exiting.");
1264 exit(1);
1265 }
1266 for (j = 0; j < server.dbnum; j++) {
1267 server.db[j].dict = dictCreate(&dbDictType,NULL);
1268 server.db[j].expires = dictCreate(&keyptrDictType,NULL);
1269 server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
1270 server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
1271 server.db[j].id = j;
1272 }
1273 server.pubsub_channels = dictCreate(&keylistDictType,NULL);
1274 server.pubsub_patterns = listCreate();
1275 listSetFreeMethod(server.pubsub_patterns,freePubsubPattern);
1276 listSetMatchMethod(server.pubsub_patterns,listMatchPubsubPattern);
1277 server.cronloops = 0;
1278 server.rdb_child_pid = -1;
1279 server.aof_child_pid = -1;
1280 server.aof_rewrite_buf = sdsempty();
1281 server.aof_buf = sdsempty();
1282 server.lastsave = time(NULL);
1283 server.dirty = 0;
1284 server.stat_numcommands = 0;
1285 server.stat_numconnections = 0;
1286 server.stat_expiredkeys = 0;
1287 server.stat_evictedkeys = 0;
1288 server.stat_starttime = time(NULL);
1289 server.stat_keyspace_misses = 0;
1290 server.stat_keyspace_hits = 0;
1291 server.stat_peak_memory = 0;
1292 server.stat_fork_time = 0;
1293 server.stat_rejected_conn = 0;
1294 memset(server.ops_sec_samples,0,sizeof(server.ops_sec_samples));
1295 server.ops_sec_idx = 0;
1296 server.ops_sec_last_sample_time = mstime();
1297 server.ops_sec_last_sample_ops = 0;
1298 server.unixtime = time(NULL);
1299 server.lastbgsave_status = REDIS_OK;
1300 server.stop_writes_on_bgsave_err = 1;
1301 aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
1302 if (server.ipfd > 0 && aeCreateFileEvent(server.el,server.ipfd,AE_READABLE,
1303 acceptTcpHandler,NULL) == AE_ERR) oom("creating file event");
1304 if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
1305 acceptUnixHandler,NULL) == AE_ERR) oom("creating file event");
1306
1307 if (server.aof_state == REDIS_AOF_ON) {
1308 server.aof_fd = open(server.aof_filename,
1309 O_WRONLY|O_APPEND|O_CREAT,0644);
1310 if (server.aof_fd == -1) {
1311 redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
1312 strerror(errno));
1313 exit(1);
1314 }
1315 }
1316
1317 /* 32 bit instances are limited to 4GB of address space, so if there is
1318 * no explicit limit in the user provided configuration we set a limit
1319 * at 3.5GB using maxmemory with 'noeviction' policy'. This saves
1320 * useless crashes of the Redis instance. */
1321 if (server.arch_bits == 32 && server.maxmemory == 0) {
1322 redisLog(REDIS_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3.5 GB maxmemory limit with 'noeviction' policy now.");
1323 server.maxmemory = 3584LL*(1024*1024); /* 3584 MB = 3.5 GB */
1324 server.maxmemory_policy = REDIS_MAXMEMORY_NO_EVICTION;
1325 }
1326
1327 scriptingInit();
1328 slowlogInit();
1329 bioInit();
1330 }
1331
1332 /* Populates the Redis Command Table starting from the hard coded list
1333 * we have on top of redis.c file. */
1334 void populateCommandTable(void) {
1335 int j;
1336 int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
1337
1338 for (j = 0; j < numcommands; j++) {
1339 struct redisCommand *c = redisCommandTable+j;
1340 char *f = c->sflags;
1341 int retval;
1342
1343 while(*f != '\0') {
1344 switch(*f) {
1345 case 'w': c->flags |= REDIS_CMD_WRITE; break;
1346 case 'r': c->flags |= REDIS_CMD_READONLY; break;
1347 case 'm': c->flags |= REDIS_CMD_DENYOOM; break;
1348 case 'a': c->flags |= REDIS_CMD_ADMIN; break;
1349 case 'p': c->flags |= REDIS_CMD_PUBSUB; break;
1350 case 'f': c->flags |= REDIS_CMD_FORCE_REPLICATION; break;
1351 case 's': c->flags |= REDIS_CMD_NOSCRIPT; break;
1352 case 'R': c->flags |= REDIS_CMD_RANDOM; break;
1353 case 'S': c->flags |= REDIS_CMD_SORT_FOR_SCRIPT; break;
1354 default: redisPanic("Unsupported command flag"); break;
1355 }
1356 f++;
1357 }
1358
1359 retval = dictAdd(server.commands, sdsnew(c->name), c);
1360 assert(retval == DICT_OK);
1361 }
1362 }
1363
1364 void resetCommandTableStats(void) {
1365 int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
1366 int j;
1367
1368 for (j = 0; j < numcommands; j++) {
1369 struct redisCommand *c = redisCommandTable+j;
1370
1371 c->microseconds = 0;
1372 c->calls = 0;
1373 }
1374 }
1375
1376 /* ========================== Redis OP Array API ============================ */
1377
1378 void redisOpArrayInit(redisOpArray *oa) {
1379 oa->ops = NULL;
1380 oa->numops = 0;
1381 }
1382
1383 int redisOpArrayAppend(redisOpArray *oa, struct redisCommand *cmd, int dbid,
1384 robj **argv, int argc, int target)
1385 {
1386 redisOp *op;
1387
1388 oa->ops = zrealloc(oa->ops,sizeof(redisOp)*(oa->numops+1));
1389 op = oa->ops+oa->numops;
1390 op->cmd = cmd;
1391 op->dbid = dbid;
1392 op->argv = argv;
1393 op->argc = argc;
1394 op->target = target;
1395 oa->numops++;
1396 return oa->numops;
1397 }
1398
1399 void redisOpArrayFree(redisOpArray *oa) {
1400 while(oa->numops) {
1401 int j;
1402 redisOp *op;
1403
1404 oa->numops--;
1405 op = oa->ops+oa->numops;
1406 for (j = 0; j < op->argc; j++)
1407 decrRefCount(op->argv[j]);
1408 zfree(op->argv);
1409 }
1410 zfree(oa->ops);
1411 }
1412
1413 /* ====================== Commands lookup and execution ===================== */
1414
1415 struct redisCommand *lookupCommand(sds name) {
1416 return dictFetchValue(server.commands, name);
1417 }
1418
1419 struct redisCommand *lookupCommandByCString(char *s) {
1420 struct redisCommand *cmd;
1421 sds name = sdsnew(s);
1422
1423 cmd = dictFetchValue(server.commands, name);
1424 sdsfree(name);
1425 return cmd;
1426 }
1427
1428 /* Propagate the specified command (in the context of the specified database id)
1429 * to AOF, Slaves and Monitors.
1430 *
1431 * flags are an xor between:
1432 * + REDIS_PROPAGATE_NONE (no propagation of command at all)
1433 * + REDIS_PROPAGATE_AOF (propagate into the AOF file if is enabled)
1434 * + REDIS_PROPAGATE_REPL (propagate into the replication link)
1435 */
1436 void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
1437 int flags)
1438 {
1439 if (server.aof_state != REDIS_AOF_OFF && flags & REDIS_PROPAGATE_AOF)
1440 feedAppendOnlyFile(cmd,dbid,argv,argc);
1441 if (flags & REDIS_PROPAGATE_REPL && listLength(server.slaves))
1442 replicationFeedSlaves(server.slaves,dbid,argv,argc);
1443 }
1444
1445 /* Used inside commands to schedule the propagation of additional commands
1446 * after the current command is propagated to AOF / Replication. */
1447 void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
1448 int target)
1449 {
1450 redisOpArrayAppend(&server.also_propagate,cmd,dbid,argv,argc,target);
1451 }
1452
1453 /* Call() is the core of Redis execution of a command */
1454 void call(redisClient *c, int flags) {
1455 long long dirty, start = ustime(), duration;
1456
1457 /* Sent the command to clients in MONITOR mode, only if the commands are
1458 * not geneated from reading an AOF. */
1459 if (listLength(server.monitors) && !server.loading)
1460 replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
1461
1462 /* Call the command. */
1463 redisOpArrayInit(&server.also_propagate);
1464 dirty = server.dirty;
1465 c->cmd->proc(c);
1466 dirty = server.dirty-dirty;
1467 duration = ustime()-start;
1468
1469 /* When EVAL is called loading the AOF we don't want commands called
1470 * from Lua to go into the slowlog or to populate statistics. */
1471 if (server.loading && c->flags & REDIS_LUA_CLIENT)
1472 flags &= ~(REDIS_CALL_SLOWLOG | REDIS_CALL_STATS);
1473
1474 /* Log the command into the Slow log if needed, and populate the
1475 * per-command statistics that we show in INFO commandstats. */
1476 if (flags & REDIS_CALL_SLOWLOG)
1477 slowlogPushEntryIfNeeded(c->argv,c->argc,duration);
1478 if (flags & REDIS_CALL_STATS) {
1479 c->cmd->microseconds += duration;
1480 c->cmd->calls++;
1481 }
1482
1483 /* Propagate the command into the AOF and replication link */
1484 if (flags & REDIS_CALL_PROPAGATE) {
1485 int flags = REDIS_PROPAGATE_NONE;
1486
1487 if (c->cmd->flags & REDIS_CMD_FORCE_REPLICATION)
1488 flags |= REDIS_PROPAGATE_REPL;
1489 if (dirty)
1490 flags |= (REDIS_PROPAGATE_REPL | REDIS_PROPAGATE_AOF);
1491 if (flags != REDIS_PROPAGATE_NONE)
1492 propagate(c->cmd,c->db->id,c->argv,c->argc,flags);
1493 }
1494 /* Commands such as LPUSH or BRPOPLPUSH may propagate an additional
1495 * PUSH command. */
1496 if (server.also_propagate.numops) {
1497 int j;
1498 redisOp *rop;
1499
1500 for (j = 0; j < server.also_propagate.numops; j++) {
1501 rop = &server.also_propagate.ops[j];
1502 propagate(rop->cmd, rop->dbid, rop->argv, rop->argc, rop->target);
1503 }
1504 redisOpArrayFree(&server.also_propagate);
1505 }
1506 server.stat_numcommands++;
1507 }
1508
1509 /* If this function gets called we already read a whole
1510 * command, argments are in the client argv/argc fields.
1511 * processCommand() execute the command or prepare the
1512 * server for a bulk read from the client.
1513 *
1514 * If 1 is returned the client is still alive and valid and
1515 * and other operations can be performed by the caller. Otherwise
1516 * if 0 is returned the client was destroied (i.e. after QUIT). */
1517 int processCommand(redisClient *c) {
1518 /* The QUIT command is handled separately. Normal command procs will
1519 * go through checking for replication and QUIT will cause trouble
1520 * when FORCE_REPLICATION is enabled and would be implemented in
1521 * a regular command proc. */
1522 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
1523 addReply(c,shared.ok);
1524 c->flags |= REDIS_CLOSE_AFTER_REPLY;
1525 return REDIS_ERR;
1526 }
1527
1528 /* Now lookup the command and check ASAP about trivial error conditions
1529 * such as wrong arity, bad command name and so forth. */
1530 c->cmd = c->lastcmd = lookupCommand(c->argv[0]->ptr);
1531 if (!c->cmd) {
1532 addReplyErrorFormat(c,"unknown command '%s'",
1533 (char*)c->argv[0]->ptr);
1534 return REDIS_OK;
1535 } else if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) ||
1536 (c->argc < -c->cmd->arity)) {
1537 addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
1538 c->cmd->name);
1539 return REDIS_OK;
1540 }
1541
1542 /* Check if the user is authenticated */
1543 if (server.requirepass && !c->authenticated && c->cmd->proc != authCommand)
1544 {
1545 addReplyError(c,"operation not permitted");
1546 return REDIS_OK;
1547 }
1548
1549 /* Handle the maxmemory directive.
1550 *
1551 * First we try to free some memory if possible (if there are volatile
1552 * keys in the dataset). If there are not the only thing we can do
1553 * is returning an error. */
1554 if (server.maxmemory) {
1555 int retval = freeMemoryIfNeeded();
1556 if ((c->cmd->flags & REDIS_CMD_DENYOOM) && retval == REDIS_ERR) {
1557 addReply(c, shared.oomerr);
1558 return REDIS_OK;
1559 }
1560 }
1561
1562 /* Don't accept write commands if there are problems persisting on disk. */
1563 if (server.stop_writes_on_bgsave_err &&
1564 server.saveparamslen > 0
1565 && server.lastbgsave_status == REDIS_ERR &&
1566 c->cmd->flags & REDIS_CMD_WRITE)
1567 {
1568 addReply(c, shared.bgsaveerr);
1569 return REDIS_OK;
1570 }
1571
1572 /* Don't accept wirte commands if this is a read only slave. But
1573 * accept write commands if this is our master. */
1574 if (server.masterhost && server.repl_slave_ro &&
1575 !(c->flags & REDIS_MASTER) &&
1576 c->cmd->flags & REDIS_CMD_WRITE)
1577 {
1578 addReply(c, shared.roslaveerr);
1579 return REDIS_OK;
1580 }
1581
1582 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
1583 if ((dictSize(c->pubsub_channels) > 0 || listLength(c->pubsub_patterns) > 0)
1584 &&
1585 c->cmd->proc != subscribeCommand &&
1586 c->cmd->proc != unsubscribeCommand &&
1587 c->cmd->proc != psubscribeCommand &&
1588 c->cmd->proc != punsubscribeCommand) {
1589 addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context");
1590 return REDIS_OK;
1591 }
1592
1593 /* Only allow INFO and SLAVEOF when slave-serve-stale-data is no and
1594 * we are a slave with a broken link with master. */
1595 if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED &&
1596 server.repl_serve_stale_data == 0 &&
1597 c->cmd->proc != infoCommand && c->cmd->proc != slaveofCommand)
1598 {
1599 addReply(c, shared.masterdownerr);
1600 return REDIS_OK;
1601 }
1602
1603 /* Loading DB? Return an error if the command is not INFO */
1604 if (server.loading && c->cmd->proc != infoCommand) {
1605 addReply(c, shared.loadingerr);
1606 return REDIS_OK;
1607 }
1608
1609 /* Lua script too slow? Only allow SHUTDOWN NOSAVE and SCRIPT KILL. */
1610 if (server.lua_timedout &&
1611 !(c->cmd->proc == shutdownCommand &&
1612 c->argc == 2 &&
1613 tolower(((char*)c->argv[1]->ptr)[0]) == 'n') &&
1614 !(c->cmd->proc == scriptCommand &&
1615 c->argc == 2 &&
1616 tolower(((char*)c->argv[1]->ptr)[0]) == 'k'))
1617 {
1618 addReply(c, shared.slowscripterr);
1619 return REDIS_OK;
1620 }
1621
1622 /* Exec the command */
1623 if (c->flags & REDIS_MULTI &&
1624 c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
1625 c->cmd->proc != multiCommand && c->cmd->proc != watchCommand)
1626 {
1627 queueMultiCommand(c);
1628 addReply(c,shared.queued);
1629 } else {
1630 call(c,REDIS_CALL_FULL);
1631 }
1632 return REDIS_OK;
1633 }
1634
1635 /*================================== Shutdown =============================== */
1636
1637 int prepareForShutdown(int flags) {
1638 int save = flags & REDIS_SHUTDOWN_SAVE;
1639 int nosave = flags & REDIS_SHUTDOWN_NOSAVE;
1640
1641 redisLog(REDIS_WARNING,"User requested shutdown...");
1642 /* Kill the saving child if there is a background saving in progress.
1643 We want to avoid race conditions, for instance our saving child may
1644 overwrite the synchronous saving did by SHUTDOWN. */
1645 if (server.rdb_child_pid != -1) {
1646 redisLog(REDIS_WARNING,"There is a child saving an .rdb. Killing it!");
1647 kill(server.rdb_child_pid,SIGKILL);
1648 rdbRemoveTempFile(server.rdb_child_pid);
1649 }
1650 if (server.aof_state != REDIS_AOF_OFF) {
1651 /* Kill the AOF saving child as the AOF we already have may be longer
1652 * but contains the full dataset anyway. */
1653 if (server.aof_child_pid != -1) {
1654 redisLog(REDIS_WARNING,
1655 "There is a child rewriting the AOF. Killing it!");
1656 kill(server.aof_child_pid,SIGKILL);
1657 }
1658 /* Append only file: fsync() the AOF and exit */
1659 redisLog(REDIS_NOTICE,"Calling fsync() on the AOF file.");
1660 aof_fsync(server.aof_fd);
1661 }
1662 if ((server.saveparamslen > 0 && !nosave) || save) {
1663 redisLog(REDIS_NOTICE,"Saving the final RDB snapshot before exiting.");
1664 /* Snapshotting. Perform a SYNC SAVE and exit */
1665 if (rdbSave(server.rdb_filename) != REDIS_OK) {
1666 /* Ooops.. error saving! The best we can do is to continue
1667 * operating. Note that if there was a background saving process,
1668 * in the next cron() Redis will be notified that the background
1669 * saving aborted, handling special stuff like slaves pending for
1670 * synchronization... */
1671 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit.");
1672 return REDIS_ERR;
1673 }
1674 }
1675 if (server.daemonize) {
1676 redisLog(REDIS_NOTICE,"Removing the pid file.");
1677 unlink(server.pidfile);
1678 }
1679 /* Close the listening sockets. Apparently this allows faster restarts. */
1680 if (server.ipfd != -1) close(server.ipfd);
1681 if (server.sofd != -1) close(server.sofd);
1682 if (server.unixsocket) {
1683 redisLog(REDIS_NOTICE,"Removing the unix socket file.");
1684 unlink(server.unixsocket); /* don't care if this fails */
1685 }
1686
1687 redisLog(REDIS_WARNING,"Redis is now ready to exit, bye bye...");
1688 return REDIS_OK;
1689 }
1690
1691 /*================================== Commands =============================== */
1692
1693 void authCommand(redisClient *c) {
1694 if (!server.requirepass) {
1695 addReplyError(c,"Client sent AUTH, but no password is set");
1696 } else if (!strcmp(c->argv[1]->ptr, server.requirepass)) {
1697 c->authenticated = 1;
1698 addReply(c,shared.ok);
1699 } else {
1700 c->authenticated = 0;
1701 addReplyError(c,"invalid password");
1702 }
1703 }
1704
1705 void pingCommand(redisClient *c) {
1706 addReply(c,shared.pong);
1707 }
1708
1709 void echoCommand(redisClient *c) {
1710 addReplyBulk(c,c->argv[1]);
1711 }
1712
1713 void timeCommand(redisClient *c) {
1714 struct timeval tv;
1715
1716 /* gettimeofday() can only fail if &tv is a bad addresss so we
1717 * don't check for errors. */
1718 gettimeofday(&tv,NULL);
1719 addReplyMultiBulkLen(c,2);
1720 addReplyBulkLongLong(c,tv.tv_sec);
1721 addReplyBulkLongLong(c,tv.tv_usec);
1722 }
1723
1724 /* Convert an amount of bytes into a human readable string in the form
1725 * of 100B, 2G, 100M, 4K, and so forth. */
1726 void bytesToHuman(char *s, unsigned long long n) {
1727 double d;
1728
1729 if (n < 1024) {
1730 /* Bytes */
1731 sprintf(s,"%lluB",n);
1732 return;
1733 } else if (n < (1024*1024)) {
1734 d = (double)n/(1024);
1735 sprintf(s,"%.2fK",d);
1736 } else if (n < (1024LL*1024*1024)) {
1737 d = (double)n/(1024*1024);
1738 sprintf(s,"%.2fM",d);
1739 } else if (n < (1024LL*1024*1024*1024)) {
1740 d = (double)n/(1024LL*1024*1024);
1741 sprintf(s,"%.2fG",d);
1742 }
1743 }
1744
1745 /* Create the string returned by the INFO command. This is decoupled
1746 * by the INFO command itself as we need to report the same information
1747 * on memory corruption problems. */
1748 sds genRedisInfoString(char *section) {
1749 sds info = sdsempty();
1750 time_t uptime = server.unixtime-server.stat_starttime;
1751 int j, numcommands;
1752 struct rusage self_ru, c_ru;
1753 unsigned long lol, bib;
1754 int allsections = 0, defsections = 0;
1755 int sections = 0;
1756
1757 if (section) {
1758 allsections = strcasecmp(section,"all") == 0;
1759 defsections = strcasecmp(section,"default") == 0;
1760 }
1761
1762 getrusage(RUSAGE_SELF, &self_ru);
1763 getrusage(RUSAGE_CHILDREN, &c_ru);
1764 getClientsMaxBuffers(&lol,&bib);
1765
1766 /* Server */
1767 if (allsections || defsections || !strcasecmp(section,"server")) {
1768 struct utsname name;
1769
1770 if (sections++) info = sdscat(info,"\r\n");
1771 uname(&name);
1772 info = sdscatprintf(info,
1773 "# Server\r\n"
1774 "redis_version:%s\r\n"
1775 "redis_git_sha1:%s\r\n"
1776 "redis_git_dirty:%d\r\n"
1777 "os:%s %s %s\r\n"
1778 "arch_bits:%d\r\n"
1779 "multiplexing_api:%s\r\n"
1780 "gcc_version:%d.%d.%d\r\n"
1781 "process_id:%ld\r\n"
1782 "run_id:%s\r\n"
1783 "tcp_port:%d\r\n"
1784 "uptime_in_seconds:%ld\r\n"
1785 "uptime_in_days:%ld\r\n"
1786 "lru_clock:%ld\r\n",
1787 REDIS_VERSION,
1788 redisGitSHA1(),
1789 strtol(redisGitDirty(),NULL,10) > 0,
1790 name.sysname, name.release, name.machine,
1791 server.arch_bits,
1792 aeGetApiName(),
1793 #ifdef __GNUC__
1794 __GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__,
1795 #else
1796 0,0,0,
1797 #endif
1798 (long) getpid(),
1799 server.runid,
1800 server.port,
1801 uptime,
1802 uptime/(3600*24),
1803 (unsigned long) server.lruclock);
1804 }
1805
1806 /* Clients */
1807 if (allsections || defsections || !strcasecmp(section,"clients")) {
1808 if (sections++) info = sdscat(info,"\r\n");
1809 info = sdscatprintf(info,
1810 "# Clients\r\n"
1811 "connected_clients:%lu\r\n"
1812 "client_longest_output_list:%lu\r\n"
1813 "client_biggest_input_buf:%lu\r\n"
1814 "blocked_clients:%d\r\n",
1815 listLength(server.clients)-listLength(server.slaves),
1816 lol, bib,
1817 server.bpop_blocked_clients);
1818 }
1819
1820 /* Memory */
1821 if (allsections || defsections || !strcasecmp(section,"memory")) {
1822 char hmem[64];
1823 char peak_hmem[64];
1824
1825 bytesToHuman(hmem,zmalloc_used_memory());
1826 bytesToHuman(peak_hmem,server.stat_peak_memory);
1827 if (sections++) info = sdscat(info,"\r\n");
1828 info = sdscatprintf(info,
1829 "# Memory\r\n"
1830 "used_memory:%zu\r\n"
1831 "used_memory_human:%s\r\n"
1832 "used_memory_rss:%zu\r\n"
1833 "used_memory_peak:%zu\r\n"
1834 "used_memory_peak_human:%s\r\n"
1835 "used_memory_lua:%lld\r\n"
1836 "mem_fragmentation_ratio:%.2f\r\n"
1837 "mem_allocator:%s\r\n",
1838 zmalloc_used_memory(),
1839 hmem,
1840 zmalloc_get_rss(),
1841 server.stat_peak_memory,
1842 peak_hmem,
1843 ((long long)lua_gc(server.lua,LUA_GCCOUNT,0))*1024LL,
1844 zmalloc_get_fragmentation_ratio(),
1845 ZMALLOC_LIB
1846 );
1847 }
1848
1849 /* Persistence */
1850 if (allsections || defsections || !strcasecmp(section,"persistence")) {
1851 if (sections++) info = sdscat(info,"\r\n");
1852 info = sdscatprintf(info,
1853 "# Persistence\r\n"
1854 "loading:%d\r\n"
1855 "aof_enabled:%d\r\n"
1856 "changes_since_last_save:%lld\r\n"
1857 "bgsave_in_progress:%d\r\n"
1858 "last_save_time:%ld\r\n"
1859 "last_bgsave_status:%s\r\n"
1860 "bgrewriteaof_in_progress:%d\r\n"
1861 "bgrewriteaof_scheduled:%d\r\n",
1862 server.loading,
1863 server.aof_state != REDIS_AOF_OFF,
1864 server.dirty,
1865 server.rdb_child_pid != -1,
1866 server.lastsave,
1867 server.lastbgsave_status == REDIS_OK ? "ok" : "err",
1868 server.aof_child_pid != -1,
1869 server.aof_rewrite_scheduled);
1870
1871 if (server.aof_state != REDIS_AOF_OFF) {
1872 info = sdscatprintf(info,
1873 "aof_current_size:%lld\r\n"
1874 "aof_base_size:%lld\r\n"
1875 "aof_pending_rewrite:%d\r\n"
1876 "aof_buffer_length:%zu\r\n"
1877 "aof_pending_bio_fsync:%llu\r\n"
1878 "aof_delayed_fsync:%lu\r\n",
1879 (long long) server.aof_current_size,
1880 (long long) server.aof_rewrite_base_size,
1881 server.aof_rewrite_scheduled,
1882 sdslen(server.aof_buf),
1883 bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC),
1884 server.aof_delayed_fsync);
1885 }
1886
1887 if (server.loading) {
1888 double perc;
1889 time_t eta, elapsed;
1890 off_t remaining_bytes = server.loading_total_bytes-
1891 server.loading_loaded_bytes;
1892
1893 perc = ((double)server.loading_loaded_bytes /
1894 server.loading_total_bytes) * 100;
1895
1896 elapsed = server.unixtime-server.loading_start_time;
1897 if (elapsed == 0) {
1898 eta = 1; /* A fake 1 second figure if we don't have
1899 enough info */
1900 } else {
1901 eta = (elapsed*remaining_bytes)/server.loading_loaded_bytes;
1902 }
1903
1904 info = sdscatprintf(info,
1905 "loading_start_time:%ld\r\n"
1906 "loading_total_bytes:%llu\r\n"
1907 "loading_loaded_bytes:%llu\r\n"
1908 "loading_loaded_perc:%.2f\r\n"
1909 "loading_eta_seconds:%ld\r\n"
1910 ,(unsigned long) server.loading_start_time,
1911 (unsigned long long) server.loading_total_bytes,
1912 (unsigned long long) server.loading_loaded_bytes,
1913 perc,
1914 eta
1915 );
1916 }
1917 }
1918
1919 /* Stats */
1920 if (allsections || defsections || !strcasecmp(section,"stats")) {
1921 if (sections++) info = sdscat(info,"\r\n");
1922 info = sdscatprintf(info,
1923 "# Stats\r\n"
1924 "total_connections_received:%lld\r\n"
1925 "total_commands_processed:%lld\r\n"
1926 "instantaneous_ops_per_sec:%lld\r\n"
1927 "rejected_connections:%lld\r\n"
1928 "expired_keys:%lld\r\n"
1929 "evicted_keys:%lld\r\n"
1930 "keyspace_hits:%lld\r\n"
1931 "keyspace_misses:%lld\r\n"
1932 "pubsub_channels:%ld\r\n"
1933 "pubsub_patterns:%lu\r\n"
1934 "latest_fork_usec:%lld\r\n",
1935 server.stat_numconnections,
1936 server.stat_numcommands,
1937 getOperationsPerSecond(),
1938 server.stat_rejected_conn,
1939 server.stat_expiredkeys,
1940 server.stat_evictedkeys,
1941 server.stat_keyspace_hits,
1942 server.stat_keyspace_misses,
1943 dictSize(server.pubsub_channels),
1944 listLength(server.pubsub_patterns),
1945 server.stat_fork_time);
1946 }
1947
1948 /* Replication */
1949 if (allsections || defsections || !strcasecmp(section,"replication")) {
1950 if (sections++) info = sdscat(info,"\r\n");
1951 info = sdscatprintf(info,
1952 "# Replication\r\n"
1953 "role:%s\r\n",
1954 server.masterhost == NULL ? "master" : "slave");
1955 if (server.masterhost) {
1956 info = sdscatprintf(info,
1957 "master_host:%s\r\n"
1958 "master_port:%d\r\n"
1959 "master_link_status:%s\r\n"
1960 "master_last_io_seconds_ago:%d\r\n"
1961 "master_sync_in_progress:%d\r\n"
1962 ,server.masterhost,
1963 server.masterport,
1964 (server.repl_state == REDIS_REPL_CONNECTED) ?
1965 "up" : "down",
1966 server.master ?
1967 ((int)(server.unixtime-server.master->lastinteraction)) : -1,
1968 server.repl_state == REDIS_REPL_TRANSFER
1969 );
1970
1971 if (server.repl_state == REDIS_REPL_TRANSFER) {
1972 info = sdscatprintf(info,
1973 "master_sync_left_bytes:%ld\r\n"
1974 "master_sync_last_io_seconds_ago:%d\r\n"
1975 ,(long)server.repl_transfer_left,
1976 (int)(server.unixtime-server.repl_transfer_lastio)
1977 );
1978 }
1979
1980 if (server.repl_state != REDIS_REPL_CONNECTED) {
1981 info = sdscatprintf(info,
1982 "master_link_down_since_seconds:%ld\r\n",
1983 (long)server.unixtime-server.repl_down_since);
1984 }
1985 }
1986 info = sdscatprintf(info,
1987 "connected_slaves:%lu\r\n",
1988 listLength(server.slaves));
1989 if (listLength(server.slaves)) {
1990 int slaveid = 0;
1991 listNode *ln;
1992 listIter li;
1993
1994 listRewind(server.slaves,&li);
1995 while((ln = listNext(&li))) {
1996 redisClient *slave = listNodeValue(ln);
1997 char *state = NULL;
1998 char ip[32];
1999 int port;
2000
2001 if (anetPeerToString(slave->fd,ip,&port) == -1) continue;
2002 switch(slave->replstate) {
2003 case REDIS_REPL_WAIT_BGSAVE_START:
2004 case REDIS_REPL_WAIT_BGSAVE_END:
2005 state = "wait_bgsave";
2006 break;
2007 case REDIS_REPL_SEND_BULK:
2008 state = "send_bulk";
2009 break;
2010 case REDIS_REPL_ONLINE:
2011 state = "online";
2012 break;
2013 }
2014 if (state == NULL) continue;
2015 info = sdscatprintf(info,"slave%d:%s,%d,%s\r\n",
2016 slaveid,ip,port,state);
2017 slaveid++;
2018 }
2019 }
2020 }
2021
2022 /* CPU */
2023 if (allsections || defsections || !strcasecmp(section,"cpu")) {
2024 if (sections++) info = sdscat(info,"\r\n");
2025 info = sdscatprintf(info,
2026 "# CPU\r\n"
2027 "used_cpu_sys:%.2f\r\n"
2028 "used_cpu_user:%.2f\r\n"
2029 "used_cpu_sys_children:%.2f\r\n"
2030 "used_cpu_user_children:%.2f\r\n",
2031 (float)self_ru.ru_stime.tv_sec+(float)self_ru.ru_stime.tv_usec/1000000,
2032 (float)self_ru.ru_utime.tv_sec+(float)self_ru.ru_utime.tv_usec/1000000,
2033 (float)c_ru.ru_stime.tv_sec+(float)c_ru.ru_stime.tv_usec/1000000,
2034 (float)c_ru.ru_utime.tv_sec+(float)c_ru.ru_utime.tv_usec/1000000);
2035 }
2036
2037 /* cmdtime */
2038 if (allsections || !strcasecmp(section,"commandstats")) {
2039 if (sections++) info = sdscat(info,"\r\n");
2040 info = sdscatprintf(info, "# Commandstats\r\n");
2041 numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
2042 for (j = 0; j < numcommands; j++) {
2043 struct redisCommand *c = redisCommandTable+j;
2044
2045 if (!c->calls) continue;
2046 info = sdscatprintf(info,
2047 "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f\r\n",
2048 c->name, c->calls, c->microseconds,
2049 (c->calls == 0) ? 0 : ((float)c->microseconds/c->calls));
2050 }
2051 }
2052
2053 /* Key space */
2054 if (allsections || defsections || !strcasecmp(section,"keyspace")) {
2055 if (sections++) info = sdscat(info,"\r\n");
2056 info = sdscatprintf(info, "# Keyspace\r\n");
2057 for (j = 0; j < server.dbnum; j++) {
2058 long long keys, vkeys;
2059
2060 keys = dictSize(server.db[j].dict);
2061 vkeys = dictSize(server.db[j].expires);
2062 if (keys || vkeys) {
2063 info = sdscatprintf(info, "db%d:keys=%lld,expires=%lld\r\n",
2064 j, keys, vkeys);
2065 }
2066 }
2067 }
2068 return info;
2069 }
2070
2071 void infoCommand(redisClient *c) {
2072 char *section = c->argc == 2 ? c->argv[1]->ptr : "default";
2073
2074 if (c->argc > 2) {
2075 addReply(c,shared.syntaxerr);
2076 return;
2077 }
2078 sds info = genRedisInfoString(section);
2079 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
2080 (unsigned long)sdslen(info)));
2081 addReplySds(c,info);
2082 addReply(c,shared.crlf);
2083 }
2084
2085 void monitorCommand(redisClient *c) {
2086 /* ignore MONITOR if aleady slave or in monitor mode */
2087 if (c->flags & REDIS_SLAVE) return;
2088
2089 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
2090 c->slaveseldb = 0;
2091 listAddNodeTail(server.monitors,c);
2092 addReply(c,shared.ok);
2093 }
2094
2095 /* ============================ Maxmemory directive ======================== */
2096
2097 /* This function gets called when 'maxmemory' is set on the config file to limit
2098 * the max memory used by the server, before processing a command.
2099 *
2100 * The goal of the function is to free enough memory to keep Redis under the
2101 * configured memory limit.
2102 *
2103 * The function starts calculating how many bytes should be freed to keep
2104 * Redis under the limit, and enters a loop selecting the best keys to
2105 * evict accordingly to the configured policy.
2106 *
2107 * If all the bytes needed to return back under the limit were freed the
2108 * function returns REDIS_OK, otherwise REDIS_ERR is returned, and the caller
2109 * should block the execution of commands that will result in more memory
2110 * used by the server.
2111 */
2112 int freeMemoryIfNeeded(void) {
2113 size_t mem_used, mem_tofree, mem_freed;
2114 int slaves = listLength(server.slaves);
2115
2116 /* Remove the size of slaves output buffers and AOF buffer from the
2117 * count of used memory. */
2118 mem_used = zmalloc_used_memory();
2119 if (slaves) {
2120 listIter li;
2121 listNode *ln;
2122
2123 listRewind(server.slaves,&li);
2124 while((ln = listNext(&li))) {
2125 redisClient *slave = listNodeValue(ln);
2126 unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
2127 if (obuf_bytes > mem_used)
2128 mem_used = 0;
2129 else
2130 mem_used -= obuf_bytes;
2131 }
2132 }
2133 if (server.aof_state != REDIS_AOF_OFF) {
2134 mem_used -= sdslen(server.aof_buf);
2135 mem_used -= sdslen(server.aof_rewrite_buf);
2136 }
2137
2138 /* Check if we are over the memory limit. */
2139 if (mem_used <= server.maxmemory) return REDIS_OK;
2140
2141 if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION)
2142 return REDIS_ERR; /* We need to free memory, but policy forbids. */
2143
2144 /* Compute how much memory we need to free. */
2145 mem_tofree = mem_used - server.maxmemory;
2146 mem_freed = 0;
2147 while (mem_freed < mem_tofree) {
2148 int j, k, keys_freed = 0;
2149
2150 for (j = 0; j < server.dbnum; j++) {
2151 long bestval = 0; /* just to prevent warning */
2152 sds bestkey = NULL;
2153 struct dictEntry *de;
2154 redisDb *db = server.db+j;
2155 dict *dict;
2156
2157 if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
2158 server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM)
2159 {
2160 dict = server.db[j].dict;
2161 } else {
2162 dict = server.db[j].expires;
2163 }
2164 if (dictSize(dict) == 0) continue;
2165
2166 /* volatile-random and allkeys-random policy */
2167 if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM ||
2168 server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_RANDOM)
2169 {
2170 de = dictGetRandomKey(dict);
2171 bestkey = dictGetKey(de);
2172 }
2173
2174 /* volatile-lru and allkeys-lru policy */
2175 else if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
2176 server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
2177 {
2178 for (k = 0; k < server.maxmemory_samples; k++) {
2179 sds thiskey;
2180 long thisval;
2181 robj *o;
2182
2183 de = dictGetRandomKey(dict);
2184 thiskey = dictGetKey(de);
2185 /* When policy is volatile-lru we need an additonal lookup
2186 * to locate the real key, as dict is set to db->expires. */
2187 if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
2188 de = dictFind(db->dict, thiskey);
2189 o = dictGetVal(de);
2190 thisval = estimateObjectIdleTime(o);
2191
2192 /* Higher idle time is better candidate for deletion */
2193 if (bestkey == NULL || thisval > bestval) {
2194 bestkey = thiskey;
2195 bestval = thisval;
2196 }
2197 }
2198 }
2199
2200 /* volatile-ttl */
2201 else if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_TTL) {
2202 for (k = 0; k < server.maxmemory_samples; k++) {
2203 sds thiskey;
2204 long thisval;
2205
2206 de = dictGetRandomKey(dict);
2207 thiskey = dictGetKey(de);
2208 thisval = (long) dictGetVal(de);
2209
2210 /* Expire sooner (minor expire unix timestamp) is better
2211 * candidate for deletion */
2212 if (bestkey == NULL || thisval < bestval) {
2213 bestkey = thiskey;
2214 bestval = thisval;
2215 }
2216 }
2217 }
2218
2219 /* Finally remove the selected key. */
2220 if (bestkey) {
2221 long long delta;
2222
2223 robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
2224 propagateExpire(db,keyobj);
2225 /* We compute the amount of memory freed by dbDelete() alone.
2226 * It is possible that actually the memory needed to propagate
2227 * the DEL in AOF and replication link is greater than the one
2228 * we are freeing removing the key, but we can't account for
2229 * that otherwise we would never exit the loop.
2230 *
2231 * AOF and Output buffer memory will be freed eventually so
2232 * we only care about memory used by the key space. */
2233 delta = (long long) zmalloc_used_memory();
2234 dbDelete(db,keyobj);
2235 delta -= (long long) zmalloc_used_memory();
2236 mem_freed += delta;
2237 server.stat_evictedkeys++;
2238 decrRefCount(keyobj);
2239 keys_freed++;
2240
2241 /* When the memory to free starts to be big enough, we may
2242 * start spending so much time here that is impossible to
2243 * deliver data to the slaves fast enough, so we force the
2244 * transmission here inside the loop. */
2245 if (slaves) flushSlavesOutputBuffers();
2246 }
2247 }
2248 if (!keys_freed) return REDIS_ERR; /* nothing to free... */
2249 }
2250 return REDIS_OK;
2251 }
2252
2253 /* =================================== Main! ================================ */
2254
2255 #ifdef __linux__
2256 int linuxOvercommitMemoryValue(void) {
2257 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
2258 char buf[64];
2259
2260 if (!fp) return -1;
2261 if (fgets(buf,64,fp) == NULL) {
2262 fclose(fp);
2263 return -1;
2264 }
2265 fclose(fp);
2266
2267 return atoi(buf);
2268 }
2269
2270 void linuxOvercommitMemoryWarning(void) {
2271 if (linuxOvercommitMemoryValue() == 0) {
2272 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
2273 }
2274 }
2275 #endif /* __linux__ */
2276
2277 void createPidFile(void) {
2278 /* Try to write the pid file in a best-effort way. */
2279 FILE *fp = fopen(server.pidfile,"w");
2280 if (fp) {
2281 fprintf(fp,"%d\n",(int)getpid());
2282 fclose(fp);
2283 }
2284 }
2285
2286 void daemonize(void) {
2287 int fd;
2288
2289 if (fork() != 0) exit(0); /* parent exits */
2290 setsid(); /* create a new session */
2291
2292 /* Every output goes to /dev/null. If Redis is daemonized but
2293 * the 'logfile' is set to 'stdout' in the configuration file
2294 * it will not log at all. */
2295 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
2296 dup2(fd, STDIN_FILENO);
2297 dup2(fd, STDOUT_FILENO);
2298 dup2(fd, STDERR_FILENO);
2299 if (fd > STDERR_FILENO) close(fd);
2300 }
2301 }
2302
2303 void version() {
2304 printf("Redis server v=%s sha=%s:%d malloc=%s bits=%d\n",
2305 REDIS_VERSION,
2306 redisGitSHA1(),
2307 atoi(redisGitDirty()) > 0,
2308 ZMALLOC_LIB,
2309 sizeof(long) == 4 ? 32 : 64);
2310 exit(0);
2311 }
2312
2313 void usage() {
2314 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf] [options]\n");
2315 fprintf(stderr," ./redis-server - (read config from stdin)\n");
2316 fprintf(stderr," ./redis-server -v or --version\n");
2317 fprintf(stderr," ./redis-server -h or --help\n");
2318 fprintf(stderr," ./redis-server --test-memory <megabytes>\n\n");
2319 fprintf(stderr,"Examples:\n");
2320 fprintf(stderr," ./redis-server (run the server with default conf)\n");
2321 fprintf(stderr," ./redis-server /etc/redis/6379.conf\n");
2322 fprintf(stderr," ./redis-server --port 7777\n");
2323 fprintf(stderr," ./redis-server --port 7777 --slaveof 127.0.0.1 8888\n");
2324 fprintf(stderr," ./redis-server /etc/myredis.conf --loglevel verbose\n");
2325 exit(1);
2326 }
2327
2328 void redisAsciiArt(void) {
2329 #include "asciilogo.h"
2330 char *buf = zmalloc(1024*16);
2331
2332 snprintf(buf,1024*16,ascii_logo,
2333 REDIS_VERSION,
2334 redisGitSHA1(),
2335 strtol(redisGitDirty(),NULL,10) > 0,
2336 (sizeof(long) == 8) ? "64" : "32",
2337 "stand alone",
2338 server.port,
2339 (long) getpid()
2340 );
2341 redisLogRaw(REDIS_NOTICE|REDIS_LOG_RAW,buf);
2342 zfree(buf);
2343 }
2344
2345 static void sigtermHandler(int sig) {
2346 REDIS_NOTUSED(sig);
2347
2348 redisLogFromHandler(REDIS_WARNING,"Received SIGTERM, scheduling shutdown...");
2349 server.shutdown_asap = 1;
2350 }
2351
2352 void setupSignalHandlers(void) {
2353 struct sigaction act;
2354
2355 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used.
2356 * Otherwise, sa_handler is used. */
2357 sigemptyset(&act.sa_mask);
2358 act.sa_flags = 0;
2359 act.sa_handler = sigtermHandler;
2360 sigaction(SIGTERM, &act, NULL);
2361
2362 #ifdef HAVE_BACKTRACE
2363 sigemptyset(&act.sa_mask);
2364 act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO;
2365 act.sa_sigaction = sigsegvHandler;
2366 sigaction(SIGSEGV, &act, NULL);
2367 sigaction(SIGBUS, &act, NULL);
2368 sigaction(SIGFPE, &act, NULL);
2369 sigaction(SIGILL, &act, NULL);
2370 #endif
2371 return;
2372 }
2373
2374 void memtest(size_t megabytes, int passes);
2375
2376 int main(int argc, char **argv) {
2377 long long start;
2378 struct timeval tv;
2379
2380 /* We need to initialize our libraries, and the server configuration. */
2381 zmalloc_enable_thread_safeness();
2382 srand(time(NULL)^getpid());
2383 gettimeofday(&tv,NULL);
2384 dictSetHashFunctionSeed(tv.tv_sec^tv.tv_usec^getpid());
2385 initServerConfig();
2386
2387 if (argc >= 2) {
2388 int j = 1; /* First option to parse in argv[] */
2389 sds options = sdsempty();
2390 char *configfile = NULL;
2391
2392 /* Handle special options --help and --version */
2393 if (strcmp(argv[1], "-v") == 0 ||
2394 strcmp(argv[1], "--version") == 0) version();
2395 if (strcmp(argv[1], "--help") == 0 ||
2396 strcmp(argv[1], "-h") == 0) usage();
2397 if (strcmp(argv[1], "--test-memory") == 0) {
2398 if (argc == 3) {
2399 memtest(atoi(argv[2]),50);
2400 exit(0);
2401 } else {
2402 fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n");
2403 fprintf(stderr,"Example: ./redis-server --test-memory 4096\n\n");
2404 exit(1);
2405 }
2406 }
2407
2408 /* First argument is the config file name? */
2409 if (argv[j][0] != '-' || argv[j][1] != '-')
2410 configfile = argv[j++];
2411 /* All the other options are parsed and conceptually appended to the
2412 * configuration file. For instance --port 6380 will generate the
2413 * string "port 6380\n" to be parsed after the actual file name
2414 * is parsed, if any. */
2415 while(j != argc) {
2416 if (argv[j][0] == '-' && argv[j][1] == '-') {
2417 /* Option name */
2418 if (sdslen(options)) options = sdscat(options,"\n");
2419 options = sdscat(options,argv[j]+2);
2420 options = sdscat(options," ");
2421 } else {
2422 /* Option argument */
2423 options = sdscatrepr(options,argv[j],strlen(argv[j]));
2424 options = sdscat(options," ");
2425 }
2426 j++;
2427 }
2428 resetServerSaveParams();
2429 loadServerConfig(configfile,options);
2430 sdsfree(options);
2431 } else {
2432 redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
2433 }
2434 if (server.daemonize) daemonize();
2435 initServer();
2436 if (server.daemonize) createPidFile();
2437 redisAsciiArt();
2438 redisLog(REDIS_WARNING,"Server started, Redis version " REDIS_VERSION);
2439 #ifdef __linux__
2440 linuxOvercommitMemoryWarning();
2441 #endif
2442 start = ustime();
2443 if (server.aof_state == REDIS_AOF_ON) {
2444 if (loadAppendOnlyFile(server.aof_filename) == REDIS_OK)
2445 redisLog(REDIS_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000);
2446 } else {
2447 if (rdbLoad(server.rdb_filename) == REDIS_OK) {
2448 redisLog(REDIS_NOTICE,"DB loaded from disk: %.3f seconds",
2449 (float)(ustime()-start)/1000000);
2450 } else if (errno != ENOENT) {
2451 redisLog(REDIS_WARNING,"Fatal error loading the DB. Exiting.");
2452 exit(1);
2453 }
2454 }
2455 if (server.ipfd > 0)
2456 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
2457 if (server.sofd > 0)
2458 redisLog(REDIS_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket);
2459 aeSetBeforeSleepProc(server.el,beforeSleep);
2460 aeMain(server.el);
2461 aeDeleteEventLoop(server.el);
2462 return 0;
2463 }
2464
2465 /* The End */