]> git.saurik.com Git - redis.git/blob - src/redis.c
63afefad9223ac11c8a395c9e2bbf843c0d17715
[redis.git] / src / redis.c
1 /*
2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include "redis.h"
31 #include "slowlog.h"
32 #include "bio.h"
33
34 #include <time.h>
35 #include <signal.h>
36 #include <sys/wait.h>
37 #include <errno.h>
38 #include <assert.h>
39 #include <ctype.h>
40 #include <stdarg.h>
41 #include <arpa/inet.h>
42 #include <sys/stat.h>
43 #include <fcntl.h>
44 #include <sys/time.h>
45 #include <sys/resource.h>
46 #include <sys/uio.h>
47 #include <limits.h>
48 #include <float.h>
49 #include <math.h>
50 #include <sys/resource.h>
51 #include <sys/utsname.h>
52
53 /* Our shared "common" objects */
54
55 struct sharedObjectsStruct shared;
56
57 /* Global vars that are actually used as constants. The following double
58 * values are used for double on-disk serialization, and are initialized
59 * at runtime to avoid strange compiler optimizations. */
60
61 double R_Zero, R_PosInf, R_NegInf, R_Nan;
62
63 /*================================= Globals ================================= */
64
65 /* Global vars */
66 struct redisServer server; /* server global state */
67 struct redisCommand *commandTable;
68
69 /* Our command table.
70 *
71 * Every entry is composed of the following fields:
72 *
73 * name: a string representing the command name.
74 * function: pointer to the C function implementing the command.
75 * arity: number of arguments, it is possible to use -N to say >= N
76 * sflags: command flags as string. See below for a table of flags.
77 * flags: flags as bitmask. Computed by Redis using the 'sflags' field.
78 * get_keys_proc: an optional function to get key arguments from a command.
79 * This is only used when the following three fields are not
80 * enough to specify what arguments are keys.
81 * first_key_index: first argument that is a key
82 * last_key_index: last argument that is a key
83 * key_step: step to get all the keys from first to last argument. For instance
84 * in MSET the step is two since arguments are key,val,key,val,...
85 * microseconds: microseconds of total execution time for this command.
86 * calls: total number of calls of this command.
87 *
88 * The flags, microseconds and calls fields are computed by Redis and should
89 * always be set to zero.
90 *
91 * Command flags are expressed using strings where every character represents
92 * a flag. Later the populateCommandTable() function will take care of
93 * populating the real 'flags' field using this characters.
94 *
95 * This is the meaning of the flags:
96 *
97 * w: write command (may modify the key space).
98 * r: read command (will never modify the key space).
99 * m: may increase memory usage once called. Don't allow if out of memory.
100 * a: admin command, like SAVE or SHUTDOWN.
101 * p: Pub/Sub related command.
102 * f: force replication of this command, regarless of server.dirty.
103 * s: command not allowed in scripts.
104 * R: random command. Command is not deterministic, that is, the same command
105 * with the same arguments, with the same key space, may have different
106 * results. For instance SPOP and RANDOMKEY are two random commands.
107 * S: Sort command output array if called from script, so that the output
108 * is deterministic.
109 */
110 struct redisCommand redisCommandTable[] = {
111 {"get",getCommand,2,"r",0,NULL,1,1,1,0,0},
112 {"set",setCommand,3,"wm",0,noPreloadGetKeys,1,1,1,0,0},
113 {"setnx",setnxCommand,3,"wm",0,noPreloadGetKeys,1,1,1,0,0},
114 {"setex",setexCommand,4,"wm",0,noPreloadGetKeys,1,1,1,0,0},
115 {"psetex",psetexCommand,4,"wm",0,noPreloadGetKeys,1,1,1,0,0},
116 {"append",appendCommand,3,"wm",0,NULL,1,1,1,0,0},
117 {"strlen",strlenCommand,2,"r",0,NULL,1,1,1,0,0},
118 {"del",delCommand,-2,"w",0,noPreloadGetKeys,1,-1,1,0,0},
119 {"exists",existsCommand,2,"r",0,NULL,1,1,1,0,0},
120 {"setbit",setbitCommand,4,"wm",0,NULL,1,1,1,0,0},
121 {"getbit",getbitCommand,3,"r",0,NULL,1,1,1,0,0},
122 {"setrange",setrangeCommand,4,"wm",0,NULL,1,1,1,0,0},
123 {"getrange",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
124 {"substr",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
125 {"incr",incrCommand,2,"wm",0,NULL,1,1,1,0,0},
126 {"decr",decrCommand,2,"wm",0,NULL,1,1,1,0,0},
127 {"mget",mgetCommand,-2,"r",0,NULL,1,-1,1,0,0},
128 {"rpush",rpushCommand,-3,"wm",0,NULL,1,1,1,0,0},
129 {"lpush",lpushCommand,-3,"wm",0,NULL,1,1,1,0,0},
130 {"rpushx",rpushxCommand,3,"wm",0,NULL,1,1,1,0,0},
131 {"lpushx",lpushxCommand,3,"wm",0,NULL,1,1,1,0,0},
132 {"linsert",linsertCommand,5,"wm",0,NULL,1,1,1,0,0},
133 {"rpop",rpopCommand,2,"w",0,NULL,1,1,1,0,0},
134 {"lpop",lpopCommand,2,"w",0,NULL,1,1,1,0,0},
135 {"brpop",brpopCommand,-3,"ws",0,NULL,1,1,1,0,0},
136 {"brpoplpush",brpoplpushCommand,4,"wms",0,NULL,1,2,1,0,0},
137 {"blpop",blpopCommand,-3,"ws",0,NULL,1,-2,1,0,0},
138 {"llen",llenCommand,2,"r",0,NULL,1,1,1,0,0},
139 {"lindex",lindexCommand,3,"r",0,NULL,1,1,1,0,0},
140 {"lset",lsetCommand,4,"wm",0,NULL,1,1,1,0,0},
141 {"lrange",lrangeCommand,4,"r",0,NULL,1,1,1,0,0},
142 {"ltrim",ltrimCommand,4,"w",0,NULL,1,1,1,0,0},
143 {"lrem",lremCommand,4,"w",0,NULL,1,1,1,0,0},
144 {"rpoplpush",rpoplpushCommand,3,"wm",0,NULL,1,2,1,0,0},
145 {"sadd",saddCommand,-3,"wm",0,NULL,1,1,1,0,0},
146 {"srem",sremCommand,-3,"w",0,NULL,1,1,1,0,0},
147 {"smove",smoveCommand,4,"w",0,NULL,1,2,1,0,0},
148 {"sismember",sismemberCommand,3,"r",0,NULL,1,1,1,0,0},
149 {"scard",scardCommand,2,"r",0,NULL,1,1,1,0,0},
150 {"spop",spopCommand,2,"wRs",0,NULL,1,1,1,0,0},
151 {"srandmember",srandmemberCommand,2,"rR",0,NULL,1,1,1,0,0},
152 {"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
153 {"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
154 {"sunion",sunionCommand,-2,"rS",0,NULL,1,-1,1,0,0},
155 {"sunionstore",sunionstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
156 {"sdiff",sdiffCommand,-2,"rS",0,NULL,1,-1,1,0,0},
157 {"sdiffstore",sdiffstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
158 {"smembers",sinterCommand,2,"rS",0,NULL,1,1,1,0,0},
159 {"zadd",zaddCommand,-4,"wm",0,NULL,1,1,1,0,0},
160 {"zincrby",zincrbyCommand,4,"wm",0,NULL,1,1,1,0,0},
161 {"zrem",zremCommand,-3,"w",0,NULL,1,1,1,0,0},
162 {"zremrangebyscore",zremrangebyscoreCommand,4,"w",0,NULL,1,1,1,0,0},
163 {"zremrangebyrank",zremrangebyrankCommand,4,"w",0,NULL,1,1,1,0,0},
164 {"zunionstore",zunionstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0},
165 {"zinterstore",zinterstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0},
166 {"zrange",zrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
167 {"zrangebyscore",zrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0},
168 {"zrevrangebyscore",zrevrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0},
169 {"zcount",zcountCommand,4,"r",0,NULL,1,1,1,0,0},
170 {"zrevrange",zrevrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
171 {"zcard",zcardCommand,2,"r",0,NULL,1,1,1,0,0},
172 {"zscore",zscoreCommand,3,"r",0,NULL,1,1,1,0,0},
173 {"zrank",zrankCommand,3,"r",0,NULL,1,1,1,0,0},
174 {"zrevrank",zrevrankCommand,3,"r",0,NULL,1,1,1,0,0},
175 {"hset",hsetCommand,4,"wm",0,NULL,1,1,1,0,0},
176 {"hsetnx",hsetnxCommand,4,"wm",0,NULL,1,1,1,0,0},
177 {"hget",hgetCommand,3,"r",0,NULL,1,1,1,0,0},
178 {"hmset",hmsetCommand,-4,"wm",0,NULL,1,1,1,0,0},
179 {"hmget",hmgetCommand,-3,"r",0,NULL,1,1,1,0,0},
180 {"hincrby",hincrbyCommand,4,"wm",0,NULL,1,1,1,0,0},
181 {"hincrbyfloat",hincrbyfloatCommand,4,"wm",0,NULL,1,1,1,0,0},
182 {"hdel",hdelCommand,-3,"w",0,NULL,1,1,1,0,0},
183 {"hlen",hlenCommand,2,"r",0,NULL,1,1,1,0,0},
184 {"hkeys",hkeysCommand,2,"rS",0,NULL,1,1,1,0,0},
185 {"hvals",hvalsCommand,2,"rS",0,NULL,1,1,1,0,0},
186 {"hgetall",hgetallCommand,2,"r",0,NULL,1,1,1,0,0},
187 {"hexists",hexistsCommand,3,"r",0,NULL,1,1,1,0,0},
188 {"incrby",incrbyCommand,3,"wm",0,NULL,1,1,1,0,0},
189 {"decrby",decrbyCommand,3,"wm",0,NULL,1,1,1,0,0},
190 {"incrbyfloat",incrbyfloatCommand,3,"wm",0,NULL,1,1,1,0,0},
191 {"getset",getsetCommand,3,"wm",0,NULL,1,1,1,0,0},
192 {"mset",msetCommand,-3,"wm",0,NULL,1,-1,2,0,0},
193 {"msetnx",msetnxCommand,-3,"wm",0,NULL,1,-1,2,0,0},
194 {"randomkey",randomkeyCommand,1,"rR",0,NULL,0,0,0,0,0},
195 {"select",selectCommand,2,"r",0,NULL,0,0,0,0,0},
196 {"move",moveCommand,3,"w",0,NULL,1,1,1,0,0},
197 {"rename",renameCommand,3,"w",0,renameGetKeys,1,2,1,0,0},
198 {"renamenx",renamenxCommand,3,"w",0,renameGetKeys,1,2,1,0,0},
199 {"expire",expireCommand,3,"w",0,NULL,1,1,1,0,0},
200 {"expireat",expireatCommand,3,"w",0,NULL,1,1,1,0,0},
201 {"pexpire",pexpireCommand,3,"w",0,NULL,1,1,1,0,0},
202 {"pexpireat",pexpireatCommand,3,"w",0,NULL,1,1,1,0,0},
203 {"keys",keysCommand,2,"rS",0,NULL,0,0,0,0,0},
204 {"dbsize",dbsizeCommand,1,"r",0,NULL,0,0,0,0,0},
205 {"auth",authCommand,2,"rs",0,NULL,0,0,0,0,0},
206 {"ping",pingCommand,1,"r",0,NULL,0,0,0,0,0},
207 {"echo",echoCommand,2,"r",0,NULL,0,0,0,0,0},
208 {"save",saveCommand,1,"ars",0,NULL,0,0,0,0,0},
209 {"bgsave",bgsaveCommand,1,"ar",0,NULL,0,0,0,0,0},
210 {"bgrewriteaof",bgrewriteaofCommand,1,"ar",0,NULL,0,0,0,0,0},
211 {"shutdown",shutdownCommand,-1,"ar",0,NULL,0,0,0,0,0},
212 {"lastsave",lastsaveCommand,1,"r",0,NULL,0,0,0,0,0},
213 {"type",typeCommand,2,"r",0,NULL,1,1,1,0,0},
214 {"multi",multiCommand,1,"rs",0,NULL,0,0,0,0,0},
215 {"exec",execCommand,1,"s",0,NULL,0,0,0,0,0},
216 {"discard",discardCommand,1,"rs",0,NULL,0,0,0,0,0},
217 {"sync",syncCommand,1,"ars",0,NULL,0,0,0,0,0},
218 {"replconf",replconfCommand,-1,"ars",0,NULL,0,0,0,0,0},
219 {"flushdb",flushdbCommand,1,"w",0,NULL,0,0,0,0,0},
220 {"flushall",flushallCommand,1,"w",0,NULL,0,0,0,0,0},
221 {"sort",sortCommand,-2,"wmS",0,NULL,1,1,1,0,0},
222 {"info",infoCommand,-1,"r",0,NULL,0,0,0,0,0},
223 {"monitor",monitorCommand,1,"ars",0,NULL,0,0,0,0,0},
224 {"ttl",ttlCommand,2,"r",0,NULL,1,1,1,0,0},
225 {"pttl",pttlCommand,2,"r",0,NULL,1,1,1,0,0},
226 {"persist",persistCommand,2,"w",0,NULL,1,1,1,0,0},
227 {"slaveof",slaveofCommand,3,"as",0,NULL,0,0,0,0,0},
228 {"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0},
229 {"config",configCommand,-2,"ar",0,NULL,0,0,0,0,0},
230 {"subscribe",subscribeCommand,-2,"rps",0,NULL,0,0,0,0,0},
231 {"unsubscribe",unsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0},
232 {"psubscribe",psubscribeCommand,-2,"rps",0,NULL,0,0,0,0,0},
233 {"punsubscribe",punsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0},
234 {"publish",publishCommand,3,"pf",0,NULL,0,0,0,0,0},
235 {"watch",watchCommand,-2,"rs",0,noPreloadGetKeys,1,-1,1,0,0},
236 {"unwatch",unwatchCommand,1,"rs",0,NULL,0,0,0,0,0},
237 {"restore",restoreCommand,4,"awm",0,NULL,1,1,1,0,0},
238 {"migrate",migrateCommand,6,"aw",0,NULL,0,0,0,0,0},
239 {"dump",dumpCommand,2,"ar",0,NULL,1,1,1,0,0},
240 {"object",objectCommand,-2,"r",0,NULL,2,2,2,0,0},
241 {"client",clientCommand,-2,"ar",0,NULL,0,0,0,0,0},
242 {"eval",evalCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0},
243 {"evalsha",evalShaCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0},
244 {"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0},
245 {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0},
246 {"time",timeCommand,1,"rR",0,NULL,0,0,0,0,0},
247 {"bitop",bitopCommand,-4,"wm",0,NULL,2,-1,1,0,0},
248 {"bitcount",bitcountCommand,-2,"r",0,NULL,1,1,1,0,0}
249 };
250
251 /*============================ Utility functions ============================ */
252
253 /* Low level logging. To use only for very big messages, otherwise
254 * redisLog() is to prefer. */
255 void redisLogRaw(int level, const char *msg) {
256 const int syslogLevelMap[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE, LOG_WARNING };
257 const char *c = ".-*#";
258 FILE *fp;
259 char buf[64];
260 int rawmode = (level & REDIS_LOG_RAW);
261
262 level &= 0xff; /* clear flags */
263 if (level < server.verbosity) return;
264
265 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
266 if (!fp) return;
267
268 if (rawmode) {
269 fprintf(fp,"%s",msg);
270 } else {
271 int off;
272 struct timeval tv;
273
274 gettimeofday(&tv,NULL);
275 off = strftime(buf,sizeof(buf),"%d %b %H:%M:%S.",localtime(&tv.tv_sec));
276 snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000);
277 fprintf(fp,"[%d] %s %c %s\n",(int)getpid(),buf,c[level],msg);
278 }
279 fflush(fp);
280
281 if (server.logfile) fclose(fp);
282
283 if (server.syslog_enabled) syslog(syslogLevelMap[level], "%s", msg);
284 }
285
286 /* Like redisLogRaw() but with printf-alike support. This is the funciton that
287 * is used across the code. The raw version is only used in order to dump
288 * the INFO output on crash. */
289 void redisLog(int level, const char *fmt, ...) {
290 va_list ap;
291 char msg[REDIS_MAX_LOGMSG_LEN];
292
293 if ((level&0xff) < server.verbosity) return;
294
295 va_start(ap, fmt);
296 vsnprintf(msg, sizeof(msg), fmt, ap);
297 va_end(ap);
298
299 redisLogRaw(level,msg);
300 }
301
302 /* Log a fixed message without printf-alike capabilities, in a way that is
303 * safe to call from a signal handler.
304 *
305 * We actually use this only for signals that are not fatal from the point
306 * of view of Redis. Signals that are going to kill the server anyway and
307 * where we need printf-alike features are served by redisLog(). */
308 void redisLogFromHandler(int level, const char *msg) {
309 int fd;
310 char buf[64];
311
312 if ((level&0xff) < server.verbosity ||
313 (server.logfile == NULL && server.daemonize)) return;
314 fd = server.logfile ?
315 open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644) :
316 STDOUT_FILENO;
317 if (fd == -1) return;
318 ll2string(buf,sizeof(buf),getpid());
319 if (write(fd,"[",1) == -1) goto err;
320 if (write(fd,buf,strlen(buf)) == -1) goto err;
321 if (write(fd," | signal handler] (",20) == -1) goto err;
322 ll2string(buf,sizeof(buf),time(NULL));
323 if (write(fd,buf,strlen(buf)) == -1) goto err;
324 if (write(fd,") ",2) == -1) goto err;
325 if (write(fd,msg,strlen(msg)) == -1) goto err;
326 if (write(fd,"\n",1) == -1) goto err;
327 err:
328 if (server.logfile) close(fd);
329 }
330
331 /* Redis generally does not try to recover from out of memory conditions
332 * when allocating objects or strings, it is not clear if it will be possible
333 * to report this condition to the client since the networking layer itself
334 * is based on heap allocation for send buffers, so we simply abort.
335 * At least the code will be simpler to read... */
336 void oom(const char *msg) {
337 redisLog(REDIS_WARNING, "%s: Out of memory\n",msg);
338 sleep(1);
339 abort();
340 }
341
342 /* Return the UNIX time in microseconds */
343 long long ustime(void) {
344 struct timeval tv;
345 long long ust;
346
347 gettimeofday(&tv, NULL);
348 ust = ((long long)tv.tv_sec)*1000000;
349 ust += tv.tv_usec;
350 return ust;
351 }
352
353 /* Return the UNIX time in milliseconds */
354 long long mstime(void) {
355 return ustime()/1000;
356 }
357
358 /* After an RDB dump or AOF rewrite we exit from children using _exit() instead of
359 * exit(), because the latter may interact with the same file objects used by
360 * the parent process. However if we are testing the coverage normal exit() is
361 * used in order to obtain the right coverage information. */
362 void exitFromChild(int retcode) {
363 #ifdef COVERAGE_TEST
364 exit(retcode);
365 #else
366 _exit(retcode);
367 #endif
368 }
369
370 /*====================== Hash table type implementation ==================== */
371
372 /* This is an hash table type that uses the SDS dynamic strings libary as
373 * keys and radis objects as values (objects can hold SDS strings,
374 * lists, sets). */
375
376 void dictVanillaFree(void *privdata, void *val)
377 {
378 DICT_NOTUSED(privdata);
379 zfree(val);
380 }
381
382 void dictListDestructor(void *privdata, void *val)
383 {
384 DICT_NOTUSED(privdata);
385 listRelease((list*)val);
386 }
387
388 int dictSdsKeyCompare(void *privdata, const void *key1,
389 const void *key2)
390 {
391 int l1,l2;
392 DICT_NOTUSED(privdata);
393
394 l1 = sdslen((sds)key1);
395 l2 = sdslen((sds)key2);
396 if (l1 != l2) return 0;
397 return memcmp(key1, key2, l1) == 0;
398 }
399
400 /* A case insensitive version used for the command lookup table. */
401 int dictSdsKeyCaseCompare(void *privdata, const void *key1,
402 const void *key2)
403 {
404 DICT_NOTUSED(privdata);
405
406 return strcasecmp(key1, key2) == 0;
407 }
408
409 void dictRedisObjectDestructor(void *privdata, void *val)
410 {
411 DICT_NOTUSED(privdata);
412
413 if (val == NULL) return; /* Values of swapped out keys as set to NULL */
414 decrRefCount(val);
415 }
416
417 void dictSdsDestructor(void *privdata, void *val)
418 {
419 DICT_NOTUSED(privdata);
420
421 sdsfree(val);
422 }
423
424 int dictObjKeyCompare(void *privdata, const void *key1,
425 const void *key2)
426 {
427 const robj *o1 = key1, *o2 = key2;
428 return dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
429 }
430
431 unsigned int dictObjHash(const void *key) {
432 const robj *o = key;
433 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
434 }
435
436 unsigned int dictSdsHash(const void *key) {
437 return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
438 }
439
440 unsigned int dictSdsCaseHash(const void *key) {
441 return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key));
442 }
443
444 int dictEncObjKeyCompare(void *privdata, const void *key1,
445 const void *key2)
446 {
447 robj *o1 = (robj*) key1, *o2 = (robj*) key2;
448 int cmp;
449
450 if (o1->encoding == REDIS_ENCODING_INT &&
451 o2->encoding == REDIS_ENCODING_INT)
452 return o1->ptr == o2->ptr;
453
454 o1 = getDecodedObject(o1);
455 o2 = getDecodedObject(o2);
456 cmp = dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
457 decrRefCount(o1);
458 decrRefCount(o2);
459 return cmp;
460 }
461
462 unsigned int dictEncObjHash(const void *key) {
463 robj *o = (robj*) key;
464
465 if (o->encoding == REDIS_ENCODING_RAW) {
466 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
467 } else {
468 if (o->encoding == REDIS_ENCODING_INT) {
469 char buf[32];
470 int len;
471
472 len = ll2string(buf,32,(long)o->ptr);
473 return dictGenHashFunction((unsigned char*)buf, len);
474 } else {
475 unsigned int hash;
476
477 o = getDecodedObject(o);
478 hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
479 decrRefCount(o);
480 return hash;
481 }
482 }
483 }
484
485 /* Sets type hash table */
486 dictType setDictType = {
487 dictEncObjHash, /* hash function */
488 NULL, /* key dup */
489 NULL, /* val dup */
490 dictEncObjKeyCompare, /* key compare */
491 dictRedisObjectDestructor, /* key destructor */
492 NULL /* val destructor */
493 };
494
495 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
496 dictType zsetDictType = {
497 dictEncObjHash, /* hash function */
498 NULL, /* key dup */
499 NULL, /* val dup */
500 dictEncObjKeyCompare, /* key compare */
501 dictRedisObjectDestructor, /* key destructor */
502 NULL /* val destructor */
503 };
504
505 /* Db->dict, keys are sds strings, vals are Redis objects. */
506 dictType dbDictType = {
507 dictSdsHash, /* hash function */
508 NULL, /* key dup */
509 NULL, /* val dup */
510 dictSdsKeyCompare, /* key compare */
511 dictSdsDestructor, /* key destructor */
512 dictRedisObjectDestructor /* val destructor */
513 };
514
515 /* Db->expires */
516 dictType keyptrDictType = {
517 dictSdsHash, /* hash function */
518 NULL, /* key dup */
519 NULL, /* val dup */
520 dictSdsKeyCompare, /* key compare */
521 NULL, /* key destructor */
522 NULL /* val destructor */
523 };
524
525 /* Command table. sds string -> command struct pointer. */
526 dictType commandTableDictType = {
527 dictSdsCaseHash, /* hash function */
528 NULL, /* key dup */
529 NULL, /* val dup */
530 dictSdsKeyCaseCompare, /* key compare */
531 dictSdsDestructor, /* key destructor */
532 NULL /* val destructor */
533 };
534
535 /* Hash type hash table (note that small hashes are represented with zimpaps) */
536 dictType hashDictType = {
537 dictEncObjHash, /* hash function */
538 NULL, /* key dup */
539 NULL, /* val dup */
540 dictEncObjKeyCompare, /* key compare */
541 dictRedisObjectDestructor, /* key destructor */
542 dictRedisObjectDestructor /* val destructor */
543 };
544
545 /* Keylist hash table type has unencoded redis objects as keys and
546 * lists as values. It's used for blocking operations (BLPOP) and to
547 * map swapped keys to a list of clients waiting for this keys to be loaded. */
548 dictType keylistDictType = {
549 dictObjHash, /* hash function */
550 NULL, /* key dup */
551 NULL, /* val dup */
552 dictObjKeyCompare, /* key compare */
553 dictRedisObjectDestructor, /* key destructor */
554 dictListDestructor /* val destructor */
555 };
556
557 int htNeedsResize(dict *dict) {
558 long long size, used;
559
560 size = dictSlots(dict);
561 used = dictSize(dict);
562 return (size && used && size > DICT_HT_INITIAL_SIZE &&
563 (used*100/size < REDIS_HT_MINFILL));
564 }
565
566 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
567 * we resize the hash table to save memory */
568 void tryResizeHashTables(void) {
569 int j;
570
571 for (j = 0; j < server.dbnum; j++) {
572 if (htNeedsResize(server.db[j].dict))
573 dictResize(server.db[j].dict);
574 if (htNeedsResize(server.db[j].expires))
575 dictResize(server.db[j].expires);
576 }
577 }
578
579 /* Our hash table implementation performs rehashing incrementally while
580 * we write/read from the hash table. Still if the server is idle, the hash
581 * table will use two tables for a long time. So we try to use 1 millisecond
582 * of CPU time at every serverCron() loop in order to rehash some key. */
583 void incrementallyRehash(void) {
584 int j;
585
586 for (j = 0; j < server.dbnum; j++) {
587 /* Keys dictionary */
588 if (dictIsRehashing(server.db[j].dict)) {
589 dictRehashMilliseconds(server.db[j].dict,1);
590 break; /* already used our millisecond for this loop... */
591 }
592 /* Expires */
593 if (dictIsRehashing(server.db[j].expires)) {
594 dictRehashMilliseconds(server.db[j].expires,1);
595 break; /* already used our millisecond for this loop... */
596 }
597 }
598 }
599
600 /* This function is called once a background process of some kind terminates,
601 * as we want to avoid resizing the hash tables when there is a child in order
602 * to play well with copy-on-write (otherwise when a resize happens lots of
603 * memory pages are copied). The goal of this function is to update the ability
604 * for dict.c to resize the hash tables accordingly to the fact we have o not
605 * running childs. */
606 void updateDictResizePolicy(void) {
607 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1)
608 dictEnableResize();
609 else
610 dictDisableResize();
611 }
612
613 /* ======================= Cron: called every 100 ms ======================== */
614
615 /* Try to expire a few timed out keys. The algorithm used is adaptive and
616 * will use few CPU cycles if there are few expiring keys, otherwise
617 * it will get more aggressive to avoid that too much memory is used by
618 * keys that can be removed from the keyspace. */
619 void activeExpireCycle(void) {
620 int j, iteration = 0;
621 long long start = ustime(), timelimit;
622
623 /* We can use at max REDIS_EXPIRELOOKUPS_TIME_PERC percentage of CPU time
624 * per iteration. Since this function gets called with a frequency of
625 * REDIS_HZ times per second, the following is the max amount of
626 * microseconds we can spend in this function. */
627 timelimit = 1000000*REDIS_EXPIRELOOKUPS_TIME_PERC/REDIS_HZ/100;
628 if (timelimit <= 0) timelimit = 1;
629
630 for (j = 0; j < server.dbnum; j++) {
631 int expired;
632 redisDb *db = server.db+j;
633
634 /* Continue to expire if at the end of the cycle more than 25%
635 * of the keys were expired. */
636 do {
637 unsigned long num = dictSize(db->expires);
638 unsigned long slots = dictSlots(db->expires);
639 long long now = mstime();
640
641 /* When there are less than 1% filled slots getting random
642 * keys is expensive, so stop here waiting for better times...
643 * The dictionary will be resized asap. */
644 if (num && slots > DICT_HT_INITIAL_SIZE &&
645 (num*100/slots < 1)) break;
646
647 /* The main collection cycle. Sample random keys among keys
648 * with an expire set, checking for expired ones. */
649 expired = 0;
650 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
651 num = REDIS_EXPIRELOOKUPS_PER_CRON;
652 while (num--) {
653 dictEntry *de;
654 long long t;
655
656 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
657 t = dictGetSignedIntegerVal(de);
658 if (now > t) {
659 sds key = dictGetKey(de);
660 robj *keyobj = createStringObject(key,sdslen(key));
661
662 propagateExpire(db,keyobj);
663 dbDelete(db,keyobj);
664 decrRefCount(keyobj);
665 expired++;
666 server.stat_expiredkeys++;
667 }
668 }
669 /* We can't block forever here even if there are many keys to
670 * expire. So after a given amount of milliseconds return to the
671 * caller waiting for the other active expire cycle. */
672 iteration++;
673 if ((iteration & 0xf) == 0 && /* check once every 16 cycles. */
674 (ustime()-start) > timelimit) return;
675 } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
676 }
677 }
678
679 void updateLRUClock(void) {
680 server.lruclock = (server.unixtime/REDIS_LRU_CLOCK_RESOLUTION) &
681 REDIS_LRU_CLOCK_MAX;
682 }
683
684
685 /* Add a sample to the operations per second array of samples. */
686 void trackOperationsPerSecond(void) {
687 long long t = mstime() - server.ops_sec_last_sample_time;
688 long long ops = server.stat_numcommands - server.ops_sec_last_sample_ops;
689 long long ops_sec;
690
691 ops_sec = t > 0 ? (ops*1000/t) : 0;
692
693 server.ops_sec_samples[server.ops_sec_idx] = ops_sec;
694 server.ops_sec_idx = (server.ops_sec_idx+1) % REDIS_OPS_SEC_SAMPLES;
695 server.ops_sec_last_sample_time = mstime();
696 server.ops_sec_last_sample_ops = server.stat_numcommands;
697 }
698
699 /* Return the mean of all the samples. */
700 long long getOperationsPerSecond(void) {
701 int j;
702 long long sum = 0;
703
704 for (j = 0; j < REDIS_OPS_SEC_SAMPLES; j++)
705 sum += server.ops_sec_samples[j];
706 return sum / REDIS_OPS_SEC_SAMPLES;
707 }
708
709 /* Check for timeouts. Returns non-zero if the client was terminated */
710 int clientsCronHandleTimeout(redisClient *c) {
711 time_t now = server.unixtime;
712
713 if (server.maxidletime &&
714 !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
715 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
716 !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */
717 dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */
718 listLength(c->pubsub_patterns) == 0 &&
719 (now - c->lastinteraction > server.maxidletime))
720 {
721 redisLog(REDIS_VERBOSE,"Closing idle client");
722 freeClient(c);
723 return 1;
724 } else if (c->flags & REDIS_BLOCKED) {
725 if (c->bpop.timeout != 0 && c->bpop.timeout < now) {
726 addReply(c,shared.nullmultibulk);
727 unblockClientWaitingData(c);
728 }
729 }
730 return 0;
731 }
732
733 /* The client query buffer is an sds.c string that can end with a lot of
734 * free space not used, this function reclaims space if needed.
735 *
736 * The funciton always returns 0 as it never terminates the client. */
737 int clientsCronResizeQueryBuffer(redisClient *c) {
738 size_t querybuf_size = sdsAllocSize(c->querybuf);
739 time_t idletime = server.unixtime - c->lastinteraction;
740
741 /* There are two conditions to resize the query buffer:
742 * 1) Query buffer is > BIG_ARG and too big for latest peak.
743 * 2) Client is inactive and the buffer is bigger than 1k. */
744 if (((querybuf_size > REDIS_MBULK_BIG_ARG) &&
745 (querybuf_size/(c->querybuf_peak+1)) > 2) ||
746 (querybuf_size > 1024 && idletime > 2))
747 {
748 /* Only resize the query buffer if it is actually wasting space. */
749 if (sdsavail(c->querybuf) > 1024) {
750 c->querybuf = sdsRemoveFreeSpace(c->querybuf);
751 }
752 }
753 /* Reset the peak again to capture the peak memory usage in the next
754 * cycle. */
755 c->querybuf_peak = 0;
756 return 0;
757 }
758
759 void clientsCron(void) {
760 /* Make sure to process at least 1/(REDIS_HZ*10) of clients per call.
761 * Since this function is called REDIS_HZ times per second we are sure that
762 * in the worst case we process all the clients in 10 seconds.
763 * In normal conditions (a reasonable number of clients) we process
764 * all the clients in a shorter time. */
765 int numclients = listLength(server.clients);
766 int iterations = numclients/(REDIS_HZ*10);
767
768 if (iterations < 50)
769 iterations = (numclients < 50) ? numclients : 50;
770 while(listLength(server.clients) && iterations--) {
771 redisClient *c;
772 listNode *head;
773
774 /* Rotate the list, take the current head, process.
775 * This way if the client must be removed from the list it's the
776 * first element and we don't incur into O(N) computation. */
777 listRotate(server.clients);
778 head = listFirst(server.clients);
779 c = listNodeValue(head);
780 /* The following functions do different service checks on the client.
781 * The protocol is that they return non-zero if the client was
782 * terminated. */
783 if (clientsCronHandleTimeout(c)) continue;
784 if (clientsCronResizeQueryBuffer(c)) continue;
785 }
786 }
787
788 /* This is our timer interrupt, called REDIS_HZ times per second.
789 * Here is where we do a number of things that need to be done asynchronously.
790 * For instance:
791 *
792 * - Active expired keys collection (it is also performed in a lazy way on
793 * lookup).
794 * - Software watchdong.
795 * - Update some statistic.
796 * - Incremental rehashing of the DBs hash tables.
797 * - Triggering BGSAVE / AOF rewrite, and handling of terminated children.
798 * - Clients timeout of differnet kinds.
799 * - Replication reconnection.
800 * - Many more...
801 *
802 * Everything directly called here will be called REDIS_HZ times per second,
803 * so in order to throttle execution of things we want to do less frequently
804 * a macro is used: run_with_period(milliseconds) { .... }
805 */
806
807 /* Using the following macro you can run code inside serverCron() with the
808 * specified period, specified in milliseconds.
809 * The actual resolution depends on REDIS_HZ. */
810 #define run_with_period(_ms_) if (!(loops % ((_ms_)/(1000/REDIS_HZ))))
811
812 int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
813 int j, loops = server.cronloops;
814 REDIS_NOTUSED(eventLoop);
815 REDIS_NOTUSED(id);
816 REDIS_NOTUSED(clientData);
817
818 /* Software watchdog: deliver the SIGALRM that will reach the signal
819 * handler if we don't return here fast enough. */
820 if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period);
821
822 /* We take a cached value of the unix time in the global state because
823 * with virtual memory and aging there is to store the current time
824 * in objects at every object access, and accuracy is not needed.
825 * To access a global var is faster than calling time(NULL) */
826 server.unixtime = time(NULL);
827
828 run_with_period(100) trackOperationsPerSecond();
829
830 /* We have just 22 bits per object for LRU information.
831 * So we use an (eventually wrapping) LRU clock with 10 seconds resolution.
832 * 2^22 bits with 10 seconds resoluton is more or less 1.5 years.
833 *
834 * Note that even if this will wrap after 1.5 years it's not a problem,
835 * everything will still work but just some object will appear younger
836 * to Redis. But for this to happen a given object should never be touched
837 * for 1.5 years.
838 *
839 * Note that you can change the resolution altering the
840 * REDIS_LRU_CLOCK_RESOLUTION define.
841 */
842 updateLRUClock();
843
844 /* Record the max memory used since the server was started. */
845 if (zmalloc_used_memory() > server.stat_peak_memory)
846 server.stat_peak_memory = zmalloc_used_memory();
847
848 /* We received a SIGTERM, shutting down here in a safe way, as it is
849 * not ok doing so inside the signal handler. */
850 if (server.shutdown_asap) {
851 if (prepareForShutdown(0) == REDIS_OK) exit(0);
852 redisLog(REDIS_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
853 }
854
855 /* Show some info about non-empty databases */
856 run_with_period(5000) {
857 for (j = 0; j < server.dbnum; j++) {
858 long long size, used, vkeys;
859
860 size = dictSlots(server.db[j].dict);
861 used = dictSize(server.db[j].dict);
862 vkeys = dictSize(server.db[j].expires);
863 if (used || vkeys) {
864 redisLog(REDIS_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
865 /* dictPrintStats(server.dict); */
866 }
867 }
868 }
869
870 /* We don't want to resize the hash tables while a bacground saving
871 * is in progress: the saving child is created using fork() that is
872 * implemented with a copy-on-write semantic in most modern systems, so
873 * if we resize the HT while there is the saving child at work actually
874 * a lot of memory movements in the parent will cause a lot of pages
875 * copied. */
876 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1) {
877 tryResizeHashTables();
878 if (server.activerehashing) incrementallyRehash();
879 }
880
881 /* Show information about connected clients */
882 run_with_period(5000) {
883 redisLog(REDIS_VERBOSE,"%d clients connected (%d slaves), %zu bytes in use",
884 listLength(server.clients)-listLength(server.slaves),
885 listLength(server.slaves),
886 zmalloc_used_memory());
887 }
888
889 /* We need to do a few operations on clients asynchronously. */
890 clientsCron();
891
892 /* Start a scheduled AOF rewrite if this was requested by the user while
893 * a BGSAVE was in progress. */
894 if (server.rdb_child_pid == -1 && server.aof_child_pid == -1 &&
895 server.aof_rewrite_scheduled)
896 {
897 rewriteAppendOnlyFileBackground();
898 }
899
900 /* Check if a background saving or AOF rewrite in progress terminated. */
901 if (server.rdb_child_pid != -1 || server.aof_child_pid != -1) {
902 int statloc;
903 pid_t pid;
904
905 if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
906 int exitcode = WEXITSTATUS(statloc);
907 int bysignal = 0;
908
909 if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
910
911 if (pid == server.rdb_child_pid) {
912 backgroundSaveDoneHandler(exitcode,bysignal);
913 } else {
914 backgroundRewriteDoneHandler(exitcode,bysignal);
915 }
916 updateDictResizePolicy();
917 }
918 } else {
919 /* If there is not a background saving/rewrite in progress check if
920 * we have to save/rewrite now */
921 for (j = 0; j < server.saveparamslen; j++) {
922 struct saveparam *sp = server.saveparams+j;
923
924 if (server.dirty >= sp->changes &&
925 server.unixtime-server.lastsave > sp->seconds) {
926 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
927 sp->changes, sp->seconds);
928 rdbSaveBackground(server.rdb_filename);
929 break;
930 }
931 }
932
933 /* Trigger an AOF rewrite if needed */
934 if (server.rdb_child_pid == -1 &&
935 server.aof_child_pid == -1 &&
936 server.aof_rewrite_perc &&
937 server.aof_current_size > server.aof_rewrite_min_size)
938 {
939 long long base = server.aof_rewrite_base_size ?
940 server.aof_rewrite_base_size : 1;
941 long long growth = (server.aof_current_size*100/base) - 100;
942 if (growth >= server.aof_rewrite_perc) {
943 redisLog(REDIS_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
944 rewriteAppendOnlyFileBackground();
945 }
946 }
947 }
948
949
950 /* If we postponed an AOF buffer flush, let's try to do it every time the
951 * cron function is called. */
952 if (server.aof_flush_postponed_start) flushAppendOnlyFile(0);
953
954 /* Expire a few keys per cycle, only if this is a master.
955 * On slaves we wait for DEL operations synthesized by the master
956 * in order to guarantee a strict consistency. */
957 if (server.masterhost == NULL) activeExpireCycle();
958
959 /* Close clients that need to be closed asynchronous */
960 freeClientsInAsyncFreeQueue();
961
962 /* Replication cron function -- used to reconnect to master and
963 * to detect transfer failures. */
964 run_with_period(1000) replicationCron();
965
966 server.cronloops++;
967 return 1000/REDIS_HZ;
968 }
969
970 /* This function gets called every time Redis is entering the
971 * main loop of the event driven library, that is, before to sleep
972 * for ready file descriptors. */
973 void beforeSleep(struct aeEventLoop *eventLoop) {
974 REDIS_NOTUSED(eventLoop);
975 listNode *ln;
976 redisClient *c;
977
978 /* Try to process pending commands for clients that were just unblocked. */
979 while (listLength(server.unblocked_clients)) {
980 ln = listFirst(server.unblocked_clients);
981 redisAssert(ln != NULL);
982 c = ln->value;
983 listDelNode(server.unblocked_clients,ln);
984 c->flags &= ~REDIS_UNBLOCKED;
985
986 /* Process remaining data in the input buffer. */
987 if (c->querybuf && sdslen(c->querybuf) > 0) {
988 server.current_client = c;
989 processInputBuffer(c);
990 server.current_client = NULL;
991 }
992 }
993
994 /* Write the AOF buffer on disk */
995 flushAppendOnlyFile(0);
996 }
997
998 /* =========================== Server initialization ======================== */
999
1000 void createSharedObjects(void) {
1001 int j;
1002
1003 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
1004 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
1005 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
1006 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
1007 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
1008 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
1009 shared.cnegone = createObject(REDIS_STRING,sdsnew(":-1\r\n"));
1010 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
1011 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
1012 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
1013 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
1014 shared.queued = createObject(REDIS_STRING,sdsnew("+QUEUED\r\n"));
1015 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
1016 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1017 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
1018 "-ERR no such key\r\n"));
1019 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
1020 "-ERR syntax error\r\n"));
1021 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
1022 "-ERR source and destination objects are the same\r\n"));
1023 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
1024 "-ERR index out of range\r\n"));
1025 shared.noscripterr = createObject(REDIS_STRING,sdsnew(
1026 "-NOSCRIPT No matching script. Please use EVAL.\r\n"));
1027 shared.loadingerr = createObject(REDIS_STRING,sdsnew(
1028 "-LOADING Redis is loading the dataset in memory\r\n"));
1029 shared.slowscripterr = createObject(REDIS_STRING,sdsnew(
1030 "-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n"));
1031 shared.masterdownerr = createObject(REDIS_STRING,sdsnew(
1032 "-MASTERDOWN Link with MASTER is down and slave-serve-stale-data is set to 'no'.\r\n"));
1033 shared.bgsaveerr = createObject(REDIS_STRING,sdsnew(
1034 "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error.\r\n"));
1035 shared.roslaveerr = createObject(REDIS_STRING,sdsnew(
1036 "-READONLY You can't write against a read only slave.\r\n"));
1037 shared.oomerr = createObject(REDIS_STRING,sdsnew(
1038 "-OOM command not allowed when used memory > 'maxmemory'.\r\n"));
1039 shared.space = createObject(REDIS_STRING,sdsnew(" "));
1040 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
1041 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
1042
1043 for (j = 0; j < REDIS_SHARED_SELECT_CMDS; j++) {
1044 shared.select[j] = createObject(REDIS_STRING,
1045 sdscatprintf(sdsempty(),"select %d\r\n", j));
1046 }
1047 shared.messagebulk = createStringObject("$7\r\nmessage\r\n",13);
1048 shared.pmessagebulk = createStringObject("$8\r\npmessage\r\n",14);
1049 shared.subscribebulk = createStringObject("$9\r\nsubscribe\r\n",15);
1050 shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18);
1051 shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
1052 shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
1053 shared.del = createStringObject("DEL",3);
1054 shared.rpop = createStringObject("RPOP",4);
1055 shared.lpop = createStringObject("LPOP",4);
1056 for (j = 0; j < REDIS_SHARED_INTEGERS; j++) {
1057 shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j);
1058 shared.integers[j]->encoding = REDIS_ENCODING_INT;
1059 }
1060 for (j = 0; j < REDIS_SHARED_BULKHDR_LEN; j++) {
1061 shared.mbulkhdr[j] = createObject(REDIS_STRING,
1062 sdscatprintf(sdsempty(),"*%d\r\n",j));
1063 shared.bulkhdr[j] = createObject(REDIS_STRING,
1064 sdscatprintf(sdsempty(),"$%d\r\n",j));
1065 }
1066 }
1067
1068 void initServerConfig() {
1069 getRandomHexChars(server.runid,REDIS_RUN_ID_SIZE);
1070 server.runid[REDIS_RUN_ID_SIZE] = '\0';
1071 server.arch_bits = (sizeof(long) == 8) ? 64 : 32;
1072 server.port = REDIS_SERVERPORT;
1073 server.bindaddr = NULL;
1074 server.unixsocket = NULL;
1075 server.unixsocketperm = 0;
1076 server.ipfd = -1;
1077 server.sofd = -1;
1078 server.dbnum = REDIS_DEFAULT_DBNUM;
1079 server.verbosity = REDIS_NOTICE;
1080 server.maxidletime = REDIS_MAXIDLETIME;
1081 server.client_max_querybuf_len = REDIS_MAX_QUERYBUF_LEN;
1082 server.saveparams = NULL;
1083 server.loading = 0;
1084 server.logfile = NULL; /* NULL = log on standard output */
1085 server.syslog_enabled = 0;
1086 server.syslog_ident = zstrdup("redis");
1087 server.syslog_facility = LOG_LOCAL0;
1088 server.daemonize = 0;
1089 server.aof_state = REDIS_AOF_OFF;
1090 server.aof_fsync = AOF_FSYNC_EVERYSEC;
1091 server.aof_no_fsync_on_rewrite = 0;
1092 server.aof_rewrite_perc = REDIS_AOF_REWRITE_PERC;
1093 server.aof_rewrite_min_size = REDIS_AOF_REWRITE_MIN_SIZE;
1094 server.aof_rewrite_base_size = 0;
1095 server.aof_rewrite_scheduled = 0;
1096 server.aof_last_fsync = time(NULL);
1097 server.aof_rewrite_time_last = -1;
1098 server.aof_rewrite_time_start = -1;
1099 server.aof_delayed_fsync = 0;
1100 server.aof_fd = -1;
1101 server.aof_selected_db = -1; /* Make sure the first time will not match */
1102 server.aof_flush_postponed_start = 0;
1103 server.pidfile = zstrdup("/var/run/redis.pid");
1104 server.rdb_filename = zstrdup("dump.rdb");
1105 server.aof_filename = zstrdup("appendonly.aof");
1106 server.requirepass = NULL;
1107 server.rdb_compression = 1;
1108 server.rdb_checksum = 1;
1109 server.activerehashing = 1;
1110 server.maxclients = REDIS_MAX_CLIENTS;
1111 server.bpop_blocked_clients = 0;
1112 server.maxmemory = 0;
1113 server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU;
1114 server.maxmemory_samples = 3;
1115 server.hash_max_ziplist_entries = REDIS_HASH_MAX_ZIPLIST_ENTRIES;
1116 server.hash_max_ziplist_value = REDIS_HASH_MAX_ZIPLIST_VALUE;
1117 server.list_max_ziplist_entries = REDIS_LIST_MAX_ZIPLIST_ENTRIES;
1118 server.list_max_ziplist_value = REDIS_LIST_MAX_ZIPLIST_VALUE;
1119 server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES;
1120 server.zset_max_ziplist_entries = REDIS_ZSET_MAX_ZIPLIST_ENTRIES;
1121 server.zset_max_ziplist_value = REDIS_ZSET_MAX_ZIPLIST_VALUE;
1122 server.shutdown_asap = 0;
1123 server.repl_ping_slave_period = REDIS_REPL_PING_SLAVE_PERIOD;
1124 server.repl_timeout = REDIS_REPL_TIMEOUT;
1125 server.lua_caller = NULL;
1126 server.lua_time_limit = REDIS_LUA_TIME_LIMIT;
1127 server.lua_client = NULL;
1128 server.lua_timedout = 0;
1129
1130 updateLRUClock();
1131 resetServerSaveParams();
1132
1133 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1134 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1135 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1136 /* Replication related */
1137 server.masterauth = NULL;
1138 server.masterhost = NULL;
1139 server.masterport = 6379;
1140 server.master = NULL;
1141 server.repl_state = REDIS_REPL_NONE;
1142 server.repl_syncio_timeout = REDIS_REPL_SYNCIO_TIMEOUT;
1143 server.repl_serve_stale_data = 1;
1144 server.repl_slave_ro = 1;
1145 server.repl_down_since = time(NULL);
1146
1147 /* Client output buffer limits */
1148 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].hard_limit_bytes = 0;
1149 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].soft_limit_bytes = 0;
1150 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].soft_limit_seconds = 0;
1151 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].hard_limit_bytes = 1024*1024*256;
1152 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].soft_limit_bytes = 1024*1024*64;
1153 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].soft_limit_seconds = 60;
1154 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].hard_limit_bytes = 1024*1024*32;
1155 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].soft_limit_bytes = 1024*1024*8;
1156 server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].soft_limit_seconds = 60;
1157
1158 /* Double constants initialization */
1159 R_Zero = 0.0;
1160 R_PosInf = 1.0/R_Zero;
1161 R_NegInf = -1.0/R_Zero;
1162 R_Nan = R_Zero/R_Zero;
1163
1164 /* Command table -- we intiialize it here as it is part of the
1165 * initial configuration, since command names may be changed via
1166 * redis.conf using the rename-command directive. */
1167 server.commands = dictCreate(&commandTableDictType,NULL);
1168 populateCommandTable();
1169 server.delCommand = lookupCommandByCString("del");
1170 server.multiCommand = lookupCommandByCString("multi");
1171 server.lpushCommand = lookupCommandByCString("lpush");
1172
1173 /* Slow log */
1174 server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN;
1175 server.slowlog_max_len = REDIS_SLOWLOG_MAX_LEN;
1176
1177 /* Debugging */
1178 server.assert_failed = "<no assertion failed>";
1179 server.assert_file = "<no file>";
1180 server.assert_line = 0;
1181 server.bug_report_start = 0;
1182 server.watchdog_period = 0;
1183 }
1184
1185 /* This function will try to raise the max number of open files accordingly to
1186 * the configured max number of clients. It will also account for 32 additional
1187 * file descriptors as we need a few more for persistence, listening
1188 * sockets, log files and so forth.
1189 *
1190 * If it will not be possible to set the limit accordingly to the configured
1191 * max number of clients, the function will do the reverse setting
1192 * server.maxclients to the value that we can actually handle. */
1193 void adjustOpenFilesLimit(void) {
1194 rlim_t maxfiles = server.maxclients+32;
1195 struct rlimit limit;
1196
1197 if (getrlimit(RLIMIT_NOFILE,&limit) == -1) {
1198 redisLog(REDIS_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.",
1199 strerror(errno));
1200 server.maxclients = 1024-32;
1201 } else {
1202 rlim_t oldlimit = limit.rlim_cur;
1203
1204 /* Set the max number of files if the current limit is not enough
1205 * for our needs. */
1206 if (oldlimit < maxfiles) {
1207 rlim_t f;
1208
1209 f = maxfiles;
1210 while(f > oldlimit) {
1211 limit.rlim_cur = f;
1212 limit.rlim_max = f;
1213 if (setrlimit(RLIMIT_NOFILE,&limit) != -1) break;
1214 f -= 128;
1215 }
1216 if (f < oldlimit) f = oldlimit;
1217 if (f != maxfiles) {
1218 server.maxclients = f-32;
1219 redisLog(REDIS_WARNING,"Unable to set the max number of files limit to %d (%s), setting the max clients configuration to %d.",
1220 (int) maxfiles, strerror(errno), (int) server.maxclients);
1221 } else {
1222 redisLog(REDIS_NOTICE,"Max number of open files set to %d",
1223 (int) maxfiles);
1224 }
1225 }
1226 }
1227 }
1228
1229 void initServer() {
1230 int j;
1231
1232 signal(SIGHUP, SIG_IGN);
1233 signal(SIGPIPE, SIG_IGN);
1234 setupSignalHandlers();
1235
1236 if (server.syslog_enabled) {
1237 openlog(server.syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT,
1238 server.syslog_facility);
1239 }
1240
1241 server.current_client = NULL;
1242 server.clients = listCreate();
1243 server.clients_to_close = listCreate();
1244 server.slaves = listCreate();
1245 server.monitors = listCreate();
1246 server.unblocked_clients = listCreate();
1247
1248 createSharedObjects();
1249 adjustOpenFilesLimit();
1250 server.el = aeCreateEventLoop(server.maxclients+1024);
1251 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
1252
1253 if (server.port != 0) {
1254 server.ipfd = anetTcpServer(server.neterr,server.port,server.bindaddr);
1255 if (server.ipfd == ANET_ERR) {
1256 redisLog(REDIS_WARNING, "Opening port %d: %s",
1257 server.port, server.neterr);
1258 exit(1);
1259 }
1260 }
1261 if (server.unixsocket != NULL) {
1262 unlink(server.unixsocket); /* don't care if this fails */
1263 server.sofd = anetUnixServer(server.neterr,server.unixsocket,server.unixsocketperm);
1264 if (server.sofd == ANET_ERR) {
1265 redisLog(REDIS_WARNING, "Opening socket: %s", server.neterr);
1266 exit(1);
1267 }
1268 }
1269 if (server.ipfd < 0 && server.sofd < 0) {
1270 redisLog(REDIS_WARNING, "Configured to not listen anywhere, exiting.");
1271 exit(1);
1272 }
1273 for (j = 0; j < server.dbnum; j++) {
1274 server.db[j].dict = dictCreate(&dbDictType,NULL);
1275 server.db[j].expires = dictCreate(&keyptrDictType,NULL);
1276 server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
1277 server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
1278 server.db[j].id = j;
1279 }
1280 server.pubsub_channels = dictCreate(&keylistDictType,NULL);
1281 server.pubsub_patterns = listCreate();
1282 listSetFreeMethod(server.pubsub_patterns,freePubsubPattern);
1283 listSetMatchMethod(server.pubsub_patterns,listMatchPubsubPattern);
1284 server.cronloops = 0;
1285 server.rdb_child_pid = -1;
1286 server.aof_child_pid = -1;
1287 aofRewriteBufferReset();
1288 server.aof_buf = sdsempty();
1289 server.lastsave = time(NULL);
1290 server.rdb_save_time_last = -1;
1291 server.rdb_save_time_start = -1;
1292 server.dirty = 0;
1293 server.stat_numcommands = 0;
1294 server.stat_numconnections = 0;
1295 server.stat_expiredkeys = 0;
1296 server.stat_evictedkeys = 0;
1297 server.stat_starttime = time(NULL);
1298 server.stat_keyspace_misses = 0;
1299 server.stat_keyspace_hits = 0;
1300 server.stat_peak_memory = 0;
1301 server.stat_fork_time = 0;
1302 server.stat_rejected_conn = 0;
1303 memset(server.ops_sec_samples,0,sizeof(server.ops_sec_samples));
1304 server.ops_sec_idx = 0;
1305 server.ops_sec_last_sample_time = mstime();
1306 server.ops_sec_last_sample_ops = 0;
1307 server.unixtime = time(NULL);
1308 server.lastbgsave_status = REDIS_OK;
1309 server.stop_writes_on_bgsave_err = 1;
1310 aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
1311 if (server.ipfd > 0 && aeCreateFileEvent(server.el,server.ipfd,AE_READABLE,
1312 acceptTcpHandler,NULL) == AE_ERR) oom("creating file event");
1313 if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
1314 acceptUnixHandler,NULL) == AE_ERR) oom("creating file event");
1315
1316 if (server.aof_state == REDIS_AOF_ON) {
1317 server.aof_fd = open(server.aof_filename,
1318 O_WRONLY|O_APPEND|O_CREAT,0644);
1319 if (server.aof_fd == -1) {
1320 redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
1321 strerror(errno));
1322 exit(1);
1323 }
1324 }
1325
1326 /* 32 bit instances are limited to 4GB of address space, so if there is
1327 * no explicit limit in the user provided configuration we set a limit
1328 * at 3.5GB using maxmemory with 'noeviction' policy'. This saves
1329 * useless crashes of the Redis instance. */
1330 if (server.arch_bits == 32 && server.maxmemory == 0) {
1331 redisLog(REDIS_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3.5 GB maxmemory limit with 'noeviction' policy now.");
1332 server.maxmemory = 3584LL*(1024*1024); /* 3584 MB = 3.5 GB */
1333 server.maxmemory_policy = REDIS_MAXMEMORY_NO_EVICTION;
1334 }
1335
1336 scriptingInit();
1337 slowlogInit();
1338 bioInit();
1339 }
1340
1341 /* Populates the Redis Command Table starting from the hard coded list
1342 * we have on top of redis.c file. */
1343 void populateCommandTable(void) {
1344 int j;
1345 int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
1346
1347 for (j = 0; j < numcommands; j++) {
1348 struct redisCommand *c = redisCommandTable+j;
1349 char *f = c->sflags;
1350 int retval;
1351
1352 while(*f != '\0') {
1353 switch(*f) {
1354 case 'w': c->flags |= REDIS_CMD_WRITE; break;
1355 case 'r': c->flags |= REDIS_CMD_READONLY; break;
1356 case 'm': c->flags |= REDIS_CMD_DENYOOM; break;
1357 case 'a': c->flags |= REDIS_CMD_ADMIN; break;
1358 case 'p': c->flags |= REDIS_CMD_PUBSUB; break;
1359 case 'f': c->flags |= REDIS_CMD_FORCE_REPLICATION; break;
1360 case 's': c->flags |= REDIS_CMD_NOSCRIPT; break;
1361 case 'R': c->flags |= REDIS_CMD_RANDOM; break;
1362 case 'S': c->flags |= REDIS_CMD_SORT_FOR_SCRIPT; break;
1363 default: redisPanic("Unsupported command flag"); break;
1364 }
1365 f++;
1366 }
1367
1368 retval = dictAdd(server.commands, sdsnew(c->name), c);
1369 assert(retval == DICT_OK);
1370 }
1371 }
1372
1373 void resetCommandTableStats(void) {
1374 int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
1375 int j;
1376
1377 for (j = 0; j < numcommands; j++) {
1378 struct redisCommand *c = redisCommandTable+j;
1379
1380 c->microseconds = 0;
1381 c->calls = 0;
1382 }
1383 }
1384
1385 /* ========================== Redis OP Array API ============================ */
1386
1387 void redisOpArrayInit(redisOpArray *oa) {
1388 oa->ops = NULL;
1389 oa->numops = 0;
1390 }
1391
1392 int redisOpArrayAppend(redisOpArray *oa, struct redisCommand *cmd, int dbid,
1393 robj **argv, int argc, int target)
1394 {
1395 redisOp *op;
1396
1397 oa->ops = zrealloc(oa->ops,sizeof(redisOp)*(oa->numops+1));
1398 op = oa->ops+oa->numops;
1399 op->cmd = cmd;
1400 op->dbid = dbid;
1401 op->argv = argv;
1402 op->argc = argc;
1403 op->target = target;
1404 oa->numops++;
1405 return oa->numops;
1406 }
1407
1408 void redisOpArrayFree(redisOpArray *oa) {
1409 while(oa->numops) {
1410 int j;
1411 redisOp *op;
1412
1413 oa->numops--;
1414 op = oa->ops+oa->numops;
1415 for (j = 0; j < op->argc; j++)
1416 decrRefCount(op->argv[j]);
1417 zfree(op->argv);
1418 }
1419 zfree(oa->ops);
1420 }
1421
1422 /* ====================== Commands lookup and execution ===================== */
1423
1424 struct redisCommand *lookupCommand(sds name) {
1425 return dictFetchValue(server.commands, name);
1426 }
1427
1428 struct redisCommand *lookupCommandByCString(char *s) {
1429 struct redisCommand *cmd;
1430 sds name = sdsnew(s);
1431
1432 cmd = dictFetchValue(server.commands, name);
1433 sdsfree(name);
1434 return cmd;
1435 }
1436
1437 /* Propagate the specified command (in the context of the specified database id)
1438 * to AOF, Slaves and Monitors.
1439 *
1440 * flags are an xor between:
1441 * + REDIS_PROPAGATE_NONE (no propagation of command at all)
1442 * + REDIS_PROPAGATE_AOF (propagate into the AOF file if is enabled)
1443 * + REDIS_PROPAGATE_REPL (propagate into the replication link)
1444 */
1445 void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
1446 int flags)
1447 {
1448 if (server.aof_state != REDIS_AOF_OFF && flags & REDIS_PROPAGATE_AOF)
1449 feedAppendOnlyFile(cmd,dbid,argv,argc);
1450 if (flags & REDIS_PROPAGATE_REPL && listLength(server.slaves))
1451 replicationFeedSlaves(server.slaves,dbid,argv,argc);
1452 }
1453
1454 /* Used inside commands to schedule the propagation of additional commands
1455 * after the current command is propagated to AOF / Replication. */
1456 void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
1457 int target)
1458 {
1459 redisOpArrayAppend(&server.also_propagate,cmd,dbid,argv,argc,target);
1460 }
1461
1462 /* Call() is the core of Redis execution of a command */
1463 void call(redisClient *c, int flags) {
1464 long long dirty, start = ustime(), duration;
1465
1466 /* Sent the command to clients in MONITOR mode, only if the commands are
1467 * not geneated from reading an AOF. */
1468 if (listLength(server.monitors) && !server.loading)
1469 replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
1470
1471 /* Call the command. */
1472 redisOpArrayInit(&server.also_propagate);
1473 dirty = server.dirty;
1474 c->cmd->proc(c);
1475 dirty = server.dirty-dirty;
1476 duration = ustime()-start;
1477
1478 /* When EVAL is called loading the AOF we don't want commands called
1479 * from Lua to go into the slowlog or to populate statistics. */
1480 if (server.loading && c->flags & REDIS_LUA_CLIENT)
1481 flags &= ~(REDIS_CALL_SLOWLOG | REDIS_CALL_STATS);
1482
1483 /* Log the command into the Slow log if needed, and populate the
1484 * per-command statistics that we show in INFO commandstats. */
1485 if (flags & REDIS_CALL_SLOWLOG)
1486 slowlogPushEntryIfNeeded(c->argv,c->argc,duration);
1487 if (flags & REDIS_CALL_STATS) {
1488 c->cmd->microseconds += duration;
1489 c->cmd->calls++;
1490 }
1491
1492 /* Propagate the command into the AOF and replication link */
1493 if (flags & REDIS_CALL_PROPAGATE) {
1494 int flags = REDIS_PROPAGATE_NONE;
1495
1496 if (c->cmd->flags & REDIS_CMD_FORCE_REPLICATION)
1497 flags |= REDIS_PROPAGATE_REPL;
1498 if (dirty)
1499 flags |= (REDIS_PROPAGATE_REPL | REDIS_PROPAGATE_AOF);
1500 if (flags != REDIS_PROPAGATE_NONE)
1501 propagate(c->cmd,c->db->id,c->argv,c->argc,flags);
1502 }
1503 /* Commands such as LPUSH or BRPOPLPUSH may propagate an additional
1504 * PUSH command. */
1505 if (server.also_propagate.numops) {
1506 int j;
1507 redisOp *rop;
1508
1509 for (j = 0; j < server.also_propagate.numops; j++) {
1510 rop = &server.also_propagate.ops[j];
1511 propagate(rop->cmd, rop->dbid, rop->argv, rop->argc, rop->target);
1512 }
1513 redisOpArrayFree(&server.also_propagate);
1514 }
1515 server.stat_numcommands++;
1516 }
1517
1518 /* If this function gets called we already read a whole
1519 * command, argments are in the client argv/argc fields.
1520 * processCommand() execute the command or prepare the
1521 * server for a bulk read from the client.
1522 *
1523 * If 1 is returned the client is still alive and valid and
1524 * and other operations can be performed by the caller. Otherwise
1525 * if 0 is returned the client was destroied (i.e. after QUIT). */
1526 int processCommand(redisClient *c) {
1527 /* The QUIT command is handled separately. Normal command procs will
1528 * go through checking for replication and QUIT will cause trouble
1529 * when FORCE_REPLICATION is enabled and would be implemented in
1530 * a regular command proc. */
1531 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
1532 addReply(c,shared.ok);
1533 c->flags |= REDIS_CLOSE_AFTER_REPLY;
1534 return REDIS_ERR;
1535 }
1536
1537 /* Now lookup the command and check ASAP about trivial error conditions
1538 * such as wrong arity, bad command name and so forth. */
1539 c->cmd = c->lastcmd = lookupCommand(c->argv[0]->ptr);
1540 if (!c->cmd) {
1541 addReplyErrorFormat(c,"unknown command '%s'",
1542 (char*)c->argv[0]->ptr);
1543 return REDIS_OK;
1544 } else if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) ||
1545 (c->argc < -c->cmd->arity)) {
1546 addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
1547 c->cmd->name);
1548 return REDIS_OK;
1549 }
1550
1551 /* Check if the user is authenticated */
1552 if (server.requirepass && !c->authenticated && c->cmd->proc != authCommand)
1553 {
1554 addReplyError(c,"operation not permitted");
1555 return REDIS_OK;
1556 }
1557
1558 /* Handle the maxmemory directive.
1559 *
1560 * First we try to free some memory if possible (if there are volatile
1561 * keys in the dataset). If there are not the only thing we can do
1562 * is returning an error. */
1563 if (server.maxmemory) {
1564 int retval = freeMemoryIfNeeded();
1565 if ((c->cmd->flags & REDIS_CMD_DENYOOM) && retval == REDIS_ERR) {
1566 addReply(c, shared.oomerr);
1567 return REDIS_OK;
1568 }
1569 }
1570
1571 /* Don't accept write commands if there are problems persisting on disk. */
1572 if (server.stop_writes_on_bgsave_err &&
1573 server.saveparamslen > 0
1574 && server.lastbgsave_status == REDIS_ERR &&
1575 c->cmd->flags & REDIS_CMD_WRITE)
1576 {
1577 addReply(c, shared.bgsaveerr);
1578 return REDIS_OK;
1579 }
1580
1581 /* Don't accept wirte commands if this is a read only slave. But
1582 * accept write commands if this is our master. */
1583 if (server.masterhost && server.repl_slave_ro &&
1584 !(c->flags & REDIS_MASTER) &&
1585 c->cmd->flags & REDIS_CMD_WRITE)
1586 {
1587 addReply(c, shared.roslaveerr);
1588 return REDIS_OK;
1589 }
1590
1591 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
1592 if ((dictSize(c->pubsub_channels) > 0 || listLength(c->pubsub_patterns) > 0)
1593 &&
1594 c->cmd->proc != subscribeCommand &&
1595 c->cmd->proc != unsubscribeCommand &&
1596 c->cmd->proc != psubscribeCommand &&
1597 c->cmd->proc != punsubscribeCommand) {
1598 addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context");
1599 return REDIS_OK;
1600 }
1601
1602 /* Only allow INFO and SLAVEOF when slave-serve-stale-data is no and
1603 * we are a slave with a broken link with master. */
1604 if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED &&
1605 server.repl_serve_stale_data == 0 &&
1606 c->cmd->proc != infoCommand && c->cmd->proc != slaveofCommand)
1607 {
1608 addReply(c, shared.masterdownerr);
1609 return REDIS_OK;
1610 }
1611
1612 /* Loading DB? Return an error if the command is not INFO */
1613 if (server.loading && c->cmd->proc != infoCommand) {
1614 addReply(c, shared.loadingerr);
1615 return REDIS_OK;
1616 }
1617
1618 /* Lua script too slow? Only allow SHUTDOWN NOSAVE and SCRIPT KILL. */
1619 if (server.lua_timedout &&
1620 !(c->cmd->proc == shutdownCommand &&
1621 c->argc == 2 &&
1622 tolower(((char*)c->argv[1]->ptr)[0]) == 'n') &&
1623 !(c->cmd->proc == scriptCommand &&
1624 c->argc == 2 &&
1625 tolower(((char*)c->argv[1]->ptr)[0]) == 'k'))
1626 {
1627 addReply(c, shared.slowscripterr);
1628 return REDIS_OK;
1629 }
1630
1631 /* Exec the command */
1632 if (c->flags & REDIS_MULTI &&
1633 c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
1634 c->cmd->proc != multiCommand && c->cmd->proc != watchCommand)
1635 {
1636 queueMultiCommand(c);
1637 addReply(c,shared.queued);
1638 } else {
1639 call(c,REDIS_CALL_FULL);
1640 }
1641 return REDIS_OK;
1642 }
1643
1644 /*================================== Shutdown =============================== */
1645
1646 int prepareForShutdown(int flags) {
1647 int save = flags & REDIS_SHUTDOWN_SAVE;
1648 int nosave = flags & REDIS_SHUTDOWN_NOSAVE;
1649
1650 redisLog(REDIS_WARNING,"User requested shutdown...");
1651 /* Kill the saving child if there is a background saving in progress.
1652 We want to avoid race conditions, for instance our saving child may
1653 overwrite the synchronous saving did by SHUTDOWN. */
1654 if (server.rdb_child_pid != -1) {
1655 redisLog(REDIS_WARNING,"There is a child saving an .rdb. Killing it!");
1656 kill(server.rdb_child_pid,SIGKILL);
1657 rdbRemoveTempFile(server.rdb_child_pid);
1658 }
1659 if (server.aof_state != REDIS_AOF_OFF) {
1660 /* Kill the AOF saving child as the AOF we already have may be longer
1661 * but contains the full dataset anyway. */
1662 if (server.aof_child_pid != -1) {
1663 redisLog(REDIS_WARNING,
1664 "There is a child rewriting the AOF. Killing it!");
1665 kill(server.aof_child_pid,SIGKILL);
1666 }
1667 /* Append only file: fsync() the AOF and exit */
1668 redisLog(REDIS_NOTICE,"Calling fsync() on the AOF file.");
1669 aof_fsync(server.aof_fd);
1670 }
1671 if ((server.saveparamslen > 0 && !nosave) || save) {
1672 redisLog(REDIS_NOTICE,"Saving the final RDB snapshot before exiting.");
1673 /* Snapshotting. Perform a SYNC SAVE and exit */
1674 if (rdbSave(server.rdb_filename) != REDIS_OK) {
1675 /* Ooops.. error saving! The best we can do is to continue
1676 * operating. Note that if there was a background saving process,
1677 * in the next cron() Redis will be notified that the background
1678 * saving aborted, handling special stuff like slaves pending for
1679 * synchronization... */
1680 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit.");
1681 return REDIS_ERR;
1682 }
1683 }
1684 if (server.daemonize) {
1685 redisLog(REDIS_NOTICE,"Removing the pid file.");
1686 unlink(server.pidfile);
1687 }
1688 /* Close the listening sockets. Apparently this allows faster restarts. */
1689 if (server.ipfd != -1) close(server.ipfd);
1690 if (server.sofd != -1) close(server.sofd);
1691 if (server.unixsocket) {
1692 redisLog(REDIS_NOTICE,"Removing the unix socket file.");
1693 unlink(server.unixsocket); /* don't care if this fails */
1694 }
1695
1696 redisLog(REDIS_WARNING,"Redis is now ready to exit, bye bye...");
1697 return REDIS_OK;
1698 }
1699
1700 /*================================== Commands =============================== */
1701
1702 /* Return zero if strings are the same, non-zero if they are not.
1703 * The comparison is performed in a way that prevents an attacker to obtain
1704 * information about the nature of the strings just monitoring the execution
1705 * time of the function.
1706 *
1707 * Note that limiting the comparison length to strings up to 512 bytes we
1708 * can avoid leaking any information about the password length and any
1709 * possible branch misprediction related leak.
1710 */
1711 int time_independent_strcmp(char *a, char *b) {
1712 char bufa[REDIS_AUTHPASS_MAX_LEN], bufb[REDIS_AUTHPASS_MAX_LEN];
1713 /* The above two strlen perform len(a) + len(b) operations where either
1714 * a or b are fixed (our password) length, and the difference is only
1715 * relative to the length of the user provided string, so no information
1716 * leak is possible in the following two lines of code. */
1717 int alen = strlen(a);
1718 int blen = strlen(b);
1719 int j;
1720 int diff = 0;
1721
1722 /* We can't compare strings longer than our static buffers.
1723 * Note that this will never pass the first test in practical circumstances
1724 * so there is no info leak. */
1725 if (alen > sizeof(bufa) || blen > sizeof(bufb)) return 1;
1726
1727 memset(bufa,0,sizeof(bufa)); /* Constant time. */
1728 memset(bufb,0,sizeof(bufb)); /* Constant time. */
1729 /* Again the time of the following two copies is proportional to
1730 * len(a) + len(b) so no info is leaked. */
1731 memcpy(bufa,a,alen);
1732 memcpy(bufb,b,blen);
1733
1734 /* Always compare all the chars in the two buffers without
1735 * conditional expressions. */
1736 for (j = 0; j < sizeof(bufa); j++) {
1737 diff |= (bufa[j] ^ bufb[j]);
1738 }
1739 /* Length must be equal as well. */
1740 diff |= alen ^ blen;
1741 return diff; /* If zero strings are the same. */
1742 }
1743
1744 void authCommand(redisClient *c) {
1745 if (!server.requirepass) {
1746 addReplyError(c,"Client sent AUTH, but no password is set");
1747 } else if (!time_independent_strcmp(c->argv[1]->ptr, server.requirepass)) {
1748 c->authenticated = 1;
1749 addReply(c,shared.ok);
1750 } else {
1751 c->authenticated = 0;
1752 addReplyError(c,"invalid password");
1753 }
1754 }
1755
1756 void pingCommand(redisClient *c) {
1757 addReply(c,shared.pong);
1758 }
1759
1760 void echoCommand(redisClient *c) {
1761 addReplyBulk(c,c->argv[1]);
1762 }
1763
1764 void timeCommand(redisClient *c) {
1765 struct timeval tv;
1766
1767 /* gettimeofday() can only fail if &tv is a bad addresss so we
1768 * don't check for errors. */
1769 gettimeofday(&tv,NULL);
1770 addReplyMultiBulkLen(c,2);
1771 addReplyBulkLongLong(c,tv.tv_sec);
1772 addReplyBulkLongLong(c,tv.tv_usec);
1773 }
1774
1775 /* Convert an amount of bytes into a human readable string in the form
1776 * of 100B, 2G, 100M, 4K, and so forth. */
1777 void bytesToHuman(char *s, unsigned long long n) {
1778 double d;
1779
1780 if (n < 1024) {
1781 /* Bytes */
1782 sprintf(s,"%lluB",n);
1783 return;
1784 } else if (n < (1024*1024)) {
1785 d = (double)n/(1024);
1786 sprintf(s,"%.2fK",d);
1787 } else if (n < (1024LL*1024*1024)) {
1788 d = (double)n/(1024*1024);
1789 sprintf(s,"%.2fM",d);
1790 } else if (n < (1024LL*1024*1024*1024)) {
1791 d = (double)n/(1024LL*1024*1024);
1792 sprintf(s,"%.2fG",d);
1793 }
1794 }
1795
1796 /* Create the string returned by the INFO command. This is decoupled
1797 * by the INFO command itself as we need to report the same information
1798 * on memory corruption problems. */
1799 sds genRedisInfoString(char *section) {
1800 sds info = sdsempty();
1801 time_t uptime = server.unixtime-server.stat_starttime;
1802 int j, numcommands;
1803 struct rusage self_ru, c_ru;
1804 unsigned long lol, bib;
1805 int allsections = 0, defsections = 0;
1806 int sections = 0;
1807
1808 if (section) {
1809 allsections = strcasecmp(section,"all") == 0;
1810 defsections = strcasecmp(section,"default") == 0;
1811 }
1812
1813 getrusage(RUSAGE_SELF, &self_ru);
1814 getrusage(RUSAGE_CHILDREN, &c_ru);
1815 getClientsMaxBuffers(&lol,&bib);
1816
1817 /* Server */
1818 if (allsections || defsections || !strcasecmp(section,"server")) {
1819 struct utsname name;
1820
1821 if (sections++) info = sdscat(info,"\r\n");
1822 uname(&name);
1823 info = sdscatprintf(info,
1824 "# Server\r\n"
1825 "redis_version:%s\r\n"
1826 "redis_git_sha1:%s\r\n"
1827 "redis_git_dirty:%d\r\n"
1828 "os:%s %s %s\r\n"
1829 "arch_bits:%d\r\n"
1830 "multiplexing_api:%s\r\n"
1831 "gcc_version:%d.%d.%d\r\n"
1832 "process_id:%ld\r\n"
1833 "run_id:%s\r\n"
1834 "tcp_port:%d\r\n"
1835 "uptime_in_seconds:%ld\r\n"
1836 "uptime_in_days:%ld\r\n"
1837 "lru_clock:%ld\r\n",
1838 REDIS_VERSION,
1839 redisGitSHA1(),
1840 strtol(redisGitDirty(),NULL,10) > 0,
1841 name.sysname, name.release, name.machine,
1842 server.arch_bits,
1843 aeGetApiName(),
1844 #ifdef __GNUC__
1845 __GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__,
1846 #else
1847 0,0,0,
1848 #endif
1849 (long) getpid(),
1850 server.runid,
1851 server.port,
1852 uptime,
1853 uptime/(3600*24),
1854 (unsigned long) server.lruclock);
1855 }
1856
1857 /* Clients */
1858 if (allsections || defsections || !strcasecmp(section,"clients")) {
1859 if (sections++) info = sdscat(info,"\r\n");
1860 info = sdscatprintf(info,
1861 "# Clients\r\n"
1862 "connected_clients:%lu\r\n"
1863 "client_longest_output_list:%lu\r\n"
1864 "client_biggest_input_buf:%lu\r\n"
1865 "blocked_clients:%d\r\n",
1866 listLength(server.clients)-listLength(server.slaves),
1867 lol, bib,
1868 server.bpop_blocked_clients);
1869 }
1870
1871 /* Memory */
1872 if (allsections || defsections || !strcasecmp(section,"memory")) {
1873 char hmem[64];
1874 char peak_hmem[64];
1875
1876 bytesToHuman(hmem,zmalloc_used_memory());
1877 bytesToHuman(peak_hmem,server.stat_peak_memory);
1878 if (sections++) info = sdscat(info,"\r\n");
1879 info = sdscatprintf(info,
1880 "# Memory\r\n"
1881 "used_memory:%zu\r\n"
1882 "used_memory_human:%s\r\n"
1883 "used_memory_rss:%zu\r\n"
1884 "used_memory_peak:%zu\r\n"
1885 "used_memory_peak_human:%s\r\n"
1886 "used_memory_lua:%lld\r\n"
1887 "mem_fragmentation_ratio:%.2f\r\n"
1888 "mem_allocator:%s\r\n",
1889 zmalloc_used_memory(),
1890 hmem,
1891 zmalloc_get_rss(),
1892 server.stat_peak_memory,
1893 peak_hmem,
1894 ((long long)lua_gc(server.lua,LUA_GCCOUNT,0))*1024LL,
1895 zmalloc_get_fragmentation_ratio(),
1896 ZMALLOC_LIB
1897 );
1898 }
1899
1900 /* Persistence */
1901 if (allsections || defsections || !strcasecmp(section,"persistence")) {
1902 if (sections++) info = sdscat(info,"\r\n");
1903 info = sdscatprintf(info,
1904 "# Persistence\r\n"
1905 "loading:%d\r\n"
1906 "rdb_changes_since_last_save:%lld\r\n"
1907 "rdb_bgsave_in_progress:%d\r\n"
1908 "rdb_last_save_time:%ld\r\n"
1909 "rdb_last_bgsave_status:%s\r\n"
1910 "rdb_last_bgsave_time_sec:%ld\r\n"
1911 "rdb_current_bgsave_time_sec:%ld\r\n"
1912 "aof_enabled:%d\r\n"
1913 "aof_rewrite_in_progress:%d\r\n"
1914 "aof_rewrite_scheduled:%d\r\n"
1915 "aof_last_rewrite_time_sec:%ld\r\n"
1916 "aof_current_rewrite_time_sec:%ld\r\n",
1917 server.loading,
1918 server.dirty,
1919 server.rdb_child_pid != -1,
1920 server.lastsave,
1921 server.lastbgsave_status == REDIS_OK ? "ok" : "err",
1922 server.rdb_save_time_last,
1923 (server.rdb_child_pid == -1) ?
1924 -1 : time(NULL)-server.rdb_save_time_start,
1925 server.aof_state != REDIS_AOF_OFF,
1926 server.aof_child_pid != -1,
1927 server.aof_rewrite_scheduled,
1928 server.aof_rewrite_time_last,
1929 (server.aof_child_pid == -1) ?
1930 -1 : time(NULL)-server.aof_rewrite_time_start);
1931
1932 if (server.aof_state != REDIS_AOF_OFF) {
1933 info = sdscatprintf(info,
1934 "aof_current_size:%lld\r\n"
1935 "aof_base_size:%lld\r\n"
1936 "aof_pending_rewrite:%d\r\n"
1937 "aof_buffer_length:%zu\r\n"
1938 "aof_rewrite_buffer_length:%zu\r\n"
1939 "aof_pending_bio_fsync:%llu\r\n"
1940 "aof_delayed_fsync:%lu\r\n",
1941 (long long) server.aof_current_size,
1942 (long long) server.aof_rewrite_base_size,
1943 server.aof_rewrite_scheduled,
1944 sdslen(server.aof_buf),
1945 aofRewriteBufferSize(),
1946 bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC),
1947 server.aof_delayed_fsync);
1948 }
1949
1950 if (server.loading) {
1951 double perc;
1952 time_t eta, elapsed;
1953 off_t remaining_bytes = server.loading_total_bytes-
1954 server.loading_loaded_bytes;
1955
1956 perc = ((double)server.loading_loaded_bytes /
1957 server.loading_total_bytes) * 100;
1958
1959 elapsed = server.unixtime-server.loading_start_time;
1960 if (elapsed == 0) {
1961 eta = 1; /* A fake 1 second figure if we don't have
1962 enough info */
1963 } else {
1964 eta = (elapsed*remaining_bytes)/server.loading_loaded_bytes;
1965 }
1966
1967 info = sdscatprintf(info,
1968 "loading_start_time:%ld\r\n"
1969 "loading_total_bytes:%llu\r\n"
1970 "loading_loaded_bytes:%llu\r\n"
1971 "loading_loaded_perc:%.2f\r\n"
1972 "loading_eta_seconds:%ld\r\n"
1973 ,(unsigned long) server.loading_start_time,
1974 (unsigned long long) server.loading_total_bytes,
1975 (unsigned long long) server.loading_loaded_bytes,
1976 perc,
1977 eta
1978 );
1979 }
1980 }
1981
1982 /* Stats */
1983 if (allsections || defsections || !strcasecmp(section,"stats")) {
1984 if (sections++) info = sdscat(info,"\r\n");
1985 info = sdscatprintf(info,
1986 "# Stats\r\n"
1987 "total_connections_received:%lld\r\n"
1988 "total_commands_processed:%lld\r\n"
1989 "instantaneous_ops_per_sec:%lld\r\n"
1990 "rejected_connections:%lld\r\n"
1991 "expired_keys:%lld\r\n"
1992 "evicted_keys:%lld\r\n"
1993 "keyspace_hits:%lld\r\n"
1994 "keyspace_misses:%lld\r\n"
1995 "pubsub_channels:%ld\r\n"
1996 "pubsub_patterns:%lu\r\n"
1997 "latest_fork_usec:%lld\r\n",
1998 server.stat_numconnections,
1999 server.stat_numcommands,
2000 getOperationsPerSecond(),
2001 server.stat_rejected_conn,
2002 server.stat_expiredkeys,
2003 server.stat_evictedkeys,
2004 server.stat_keyspace_hits,
2005 server.stat_keyspace_misses,
2006 dictSize(server.pubsub_channels),
2007 listLength(server.pubsub_patterns),
2008 server.stat_fork_time);
2009 }
2010
2011 /* Replication */
2012 if (allsections || defsections || !strcasecmp(section,"replication")) {
2013 if (sections++) info = sdscat(info,"\r\n");
2014 info = sdscatprintf(info,
2015 "# Replication\r\n"
2016 "role:%s\r\n",
2017 server.masterhost == NULL ? "master" : "slave");
2018 if (server.masterhost) {
2019 info = sdscatprintf(info,
2020 "master_host:%s\r\n"
2021 "master_port:%d\r\n"
2022 "master_link_status:%s\r\n"
2023 "master_last_io_seconds_ago:%d\r\n"
2024 "master_sync_in_progress:%d\r\n"
2025 ,server.masterhost,
2026 server.masterport,
2027 (server.repl_state == REDIS_REPL_CONNECTED) ?
2028 "up" : "down",
2029 server.master ?
2030 ((int)(server.unixtime-server.master->lastinteraction)) : -1,
2031 server.repl_state == REDIS_REPL_TRANSFER
2032 );
2033
2034 if (server.repl_state == REDIS_REPL_TRANSFER) {
2035 info = sdscatprintf(info,
2036 "master_sync_left_bytes:%ld\r\n"
2037 "master_sync_last_io_seconds_ago:%d\r\n"
2038 ,(long)server.repl_transfer_left,
2039 (int)(server.unixtime-server.repl_transfer_lastio)
2040 );
2041 }
2042
2043 if (server.repl_state != REDIS_REPL_CONNECTED) {
2044 info = sdscatprintf(info,
2045 "master_link_down_since_seconds:%ld\r\n",
2046 (long)server.unixtime-server.repl_down_since);
2047 }
2048 }
2049 info = sdscatprintf(info,
2050 "connected_slaves:%lu\r\n",
2051 listLength(server.slaves));
2052 if (listLength(server.slaves)) {
2053 int slaveid = 0;
2054 listNode *ln;
2055 listIter li;
2056
2057 listRewind(server.slaves,&li);
2058 while((ln = listNext(&li))) {
2059 redisClient *slave = listNodeValue(ln);
2060 char *state = NULL;
2061 char ip[32];
2062 int port;
2063
2064 if (anetPeerToString(slave->fd,ip,&port) == -1) continue;
2065 switch(slave->replstate) {
2066 case REDIS_REPL_WAIT_BGSAVE_START:
2067 case REDIS_REPL_WAIT_BGSAVE_END:
2068 state = "wait_bgsave";
2069 break;
2070 case REDIS_REPL_SEND_BULK:
2071 state = "send_bulk";
2072 break;
2073 case REDIS_REPL_ONLINE:
2074 state = "online";
2075 break;
2076 }
2077 if (state == NULL) continue;
2078 info = sdscatprintf(info,"slave%d:%s,%d,%s\r\n",
2079 slaveid,ip,slave->slave_listening_port,state);
2080 slaveid++;
2081 }
2082 }
2083 }
2084
2085 /* CPU */
2086 if (allsections || defsections || !strcasecmp(section,"cpu")) {
2087 if (sections++) info = sdscat(info,"\r\n");
2088 info = sdscatprintf(info,
2089 "# CPU\r\n"
2090 "used_cpu_sys:%.2f\r\n"
2091 "used_cpu_user:%.2f\r\n"
2092 "used_cpu_sys_children:%.2f\r\n"
2093 "used_cpu_user_children:%.2f\r\n",
2094 (float)self_ru.ru_stime.tv_sec+(float)self_ru.ru_stime.tv_usec/1000000,
2095 (float)self_ru.ru_utime.tv_sec+(float)self_ru.ru_utime.tv_usec/1000000,
2096 (float)c_ru.ru_stime.tv_sec+(float)c_ru.ru_stime.tv_usec/1000000,
2097 (float)c_ru.ru_utime.tv_sec+(float)c_ru.ru_utime.tv_usec/1000000);
2098 }
2099
2100 /* cmdtime */
2101 if (allsections || !strcasecmp(section,"commandstats")) {
2102 if (sections++) info = sdscat(info,"\r\n");
2103 info = sdscatprintf(info, "# Commandstats\r\n");
2104 numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
2105 for (j = 0; j < numcommands; j++) {
2106 struct redisCommand *c = redisCommandTable+j;
2107
2108 if (!c->calls) continue;
2109 info = sdscatprintf(info,
2110 "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f\r\n",
2111 c->name, c->calls, c->microseconds,
2112 (c->calls == 0) ? 0 : ((float)c->microseconds/c->calls));
2113 }
2114 }
2115
2116 /* Key space */
2117 if (allsections || defsections || !strcasecmp(section,"keyspace")) {
2118 if (sections++) info = sdscat(info,"\r\n");
2119 info = sdscatprintf(info, "# Keyspace\r\n");
2120 for (j = 0; j < server.dbnum; j++) {
2121 long long keys, vkeys;
2122
2123 keys = dictSize(server.db[j].dict);
2124 vkeys = dictSize(server.db[j].expires);
2125 if (keys || vkeys) {
2126 info = sdscatprintf(info, "db%d:keys=%lld,expires=%lld\r\n",
2127 j, keys, vkeys);
2128 }
2129 }
2130 }
2131 return info;
2132 }
2133
2134 void infoCommand(redisClient *c) {
2135 char *section = c->argc == 2 ? c->argv[1]->ptr : "default";
2136
2137 if (c->argc > 2) {
2138 addReply(c,shared.syntaxerr);
2139 return;
2140 }
2141 sds info = genRedisInfoString(section);
2142 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
2143 (unsigned long)sdslen(info)));
2144 addReplySds(c,info);
2145 addReply(c,shared.crlf);
2146 }
2147
2148 void monitorCommand(redisClient *c) {
2149 /* ignore MONITOR if aleady slave or in monitor mode */
2150 if (c->flags & REDIS_SLAVE) return;
2151
2152 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
2153 c->slaveseldb = 0;
2154 listAddNodeTail(server.monitors,c);
2155 addReply(c,shared.ok);
2156 }
2157
2158 /* ============================ Maxmemory directive ======================== */
2159
2160 /* This function gets called when 'maxmemory' is set on the config file to limit
2161 * the max memory used by the server, before processing a command.
2162 *
2163 * The goal of the function is to free enough memory to keep Redis under the
2164 * configured memory limit.
2165 *
2166 * The function starts calculating how many bytes should be freed to keep
2167 * Redis under the limit, and enters a loop selecting the best keys to
2168 * evict accordingly to the configured policy.
2169 *
2170 * If all the bytes needed to return back under the limit were freed the
2171 * function returns REDIS_OK, otherwise REDIS_ERR is returned, and the caller
2172 * should block the execution of commands that will result in more memory
2173 * used by the server.
2174 */
2175 int freeMemoryIfNeeded(void) {
2176 size_t mem_used, mem_tofree, mem_freed;
2177 int slaves = listLength(server.slaves);
2178
2179 /* Remove the size of slaves output buffers and AOF buffer from the
2180 * count of used memory. */
2181 mem_used = zmalloc_used_memory();
2182 if (slaves) {
2183 listIter li;
2184 listNode *ln;
2185
2186 listRewind(server.slaves,&li);
2187 while((ln = listNext(&li))) {
2188 redisClient *slave = listNodeValue(ln);
2189 unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
2190 if (obuf_bytes > mem_used)
2191 mem_used = 0;
2192 else
2193 mem_used -= obuf_bytes;
2194 }
2195 }
2196 if (server.aof_state != REDIS_AOF_OFF) {
2197 mem_used -= sdslen(server.aof_buf);
2198 mem_used -= aofRewriteBufferSize();
2199 }
2200
2201 /* Check if we are over the memory limit. */
2202 if (mem_used <= server.maxmemory) return REDIS_OK;
2203
2204 if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION)
2205 return REDIS_ERR; /* We need to free memory, but policy forbids. */
2206
2207 /* Compute how much memory we need to free. */
2208 mem_tofree = mem_used - server.maxmemory;
2209 mem_freed = 0;
2210 while (mem_freed < mem_tofree) {
2211 int j, k, keys_freed = 0;
2212
2213 for (j = 0; j < server.dbnum; j++) {
2214 long bestval = 0; /* just to prevent warning */
2215 sds bestkey = NULL;
2216 struct dictEntry *de;
2217 redisDb *db = server.db+j;
2218 dict *dict;
2219
2220 if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
2221 server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM)
2222 {
2223 dict = server.db[j].dict;
2224 } else {
2225 dict = server.db[j].expires;
2226 }
2227 if (dictSize(dict) == 0) continue;
2228
2229 /* volatile-random and allkeys-random policy */
2230 if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM ||
2231 server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_RANDOM)
2232 {
2233 de = dictGetRandomKey(dict);
2234 bestkey = dictGetKey(de);
2235 }
2236
2237 /* volatile-lru and allkeys-lru policy */
2238 else if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
2239 server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
2240 {
2241 for (k = 0; k < server.maxmemory_samples; k++) {
2242 sds thiskey;
2243 long thisval;
2244 robj *o;
2245
2246 de = dictGetRandomKey(dict);
2247 thiskey = dictGetKey(de);
2248 /* When policy is volatile-lru we need an additonal lookup
2249 * to locate the real key, as dict is set to db->expires. */
2250 if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
2251 de = dictFind(db->dict, thiskey);
2252 o = dictGetVal(de);
2253 thisval = estimateObjectIdleTime(o);
2254
2255 /* Higher idle time is better candidate for deletion */
2256 if (bestkey == NULL || thisval > bestval) {
2257 bestkey = thiskey;
2258 bestval = thisval;
2259 }
2260 }
2261 }
2262
2263 /* volatile-ttl */
2264 else if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_TTL) {
2265 for (k = 0; k < server.maxmemory_samples; k++) {
2266 sds thiskey;
2267 long thisval;
2268
2269 de = dictGetRandomKey(dict);
2270 thiskey = dictGetKey(de);
2271 thisval = (long) dictGetVal(de);
2272
2273 /* Expire sooner (minor expire unix timestamp) is better
2274 * candidate for deletion */
2275 if (bestkey == NULL || thisval < bestval) {
2276 bestkey = thiskey;
2277 bestval = thisval;
2278 }
2279 }
2280 }
2281
2282 /* Finally remove the selected key. */
2283 if (bestkey) {
2284 long long delta;
2285
2286 robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
2287 propagateExpire(db,keyobj);
2288 /* We compute the amount of memory freed by dbDelete() alone.
2289 * It is possible that actually the memory needed to propagate
2290 * the DEL in AOF and replication link is greater than the one
2291 * we are freeing removing the key, but we can't account for
2292 * that otherwise we would never exit the loop.
2293 *
2294 * AOF and Output buffer memory will be freed eventually so
2295 * we only care about memory used by the key space. */
2296 delta = (long long) zmalloc_used_memory();
2297 dbDelete(db,keyobj);
2298 delta -= (long long) zmalloc_used_memory();
2299 mem_freed += delta;
2300 server.stat_evictedkeys++;
2301 decrRefCount(keyobj);
2302 keys_freed++;
2303
2304 /* When the memory to free starts to be big enough, we may
2305 * start spending so much time here that is impossible to
2306 * deliver data to the slaves fast enough, so we force the
2307 * transmission here inside the loop. */
2308 if (slaves) flushSlavesOutputBuffers();
2309 }
2310 }
2311 if (!keys_freed) return REDIS_ERR; /* nothing to free... */
2312 }
2313 return REDIS_OK;
2314 }
2315
2316 /* =================================== Main! ================================ */
2317
2318 #ifdef __linux__
2319 int linuxOvercommitMemoryValue(void) {
2320 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
2321 char buf[64];
2322
2323 if (!fp) return -1;
2324 if (fgets(buf,64,fp) == NULL) {
2325 fclose(fp);
2326 return -1;
2327 }
2328 fclose(fp);
2329
2330 return atoi(buf);
2331 }
2332
2333 void linuxOvercommitMemoryWarning(void) {
2334 if (linuxOvercommitMemoryValue() == 0) {
2335 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
2336 }
2337 }
2338 #endif /* __linux__ */
2339
2340 void createPidFile(void) {
2341 /* Try to write the pid file in a best-effort way. */
2342 FILE *fp = fopen(server.pidfile,"w");
2343 if (fp) {
2344 fprintf(fp,"%d\n",(int)getpid());
2345 fclose(fp);
2346 }
2347 }
2348
2349 void daemonize(void) {
2350 int fd;
2351
2352 if (fork() != 0) exit(0); /* parent exits */
2353 setsid(); /* create a new session */
2354
2355 /* Every output goes to /dev/null. If Redis is daemonized but
2356 * the 'logfile' is set to 'stdout' in the configuration file
2357 * it will not log at all. */
2358 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
2359 dup2(fd, STDIN_FILENO);
2360 dup2(fd, STDOUT_FILENO);
2361 dup2(fd, STDERR_FILENO);
2362 if (fd > STDERR_FILENO) close(fd);
2363 }
2364 }
2365
2366 void version() {
2367 printf("Redis server v=%s sha=%s:%d malloc=%s bits=%d\n",
2368 REDIS_VERSION,
2369 redisGitSHA1(),
2370 atoi(redisGitDirty()) > 0,
2371 ZMALLOC_LIB,
2372 sizeof(long) == 4 ? 32 : 64);
2373 exit(0);
2374 }
2375
2376 void usage() {
2377 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf] [options]\n");
2378 fprintf(stderr," ./redis-server - (read config from stdin)\n");
2379 fprintf(stderr," ./redis-server -v or --version\n");
2380 fprintf(stderr," ./redis-server -h or --help\n");
2381 fprintf(stderr," ./redis-server --test-memory <megabytes>\n\n");
2382 fprintf(stderr,"Examples:\n");
2383 fprintf(stderr," ./redis-server (run the server with default conf)\n");
2384 fprintf(stderr," ./redis-server /etc/redis/6379.conf\n");
2385 fprintf(stderr," ./redis-server --port 7777\n");
2386 fprintf(stderr," ./redis-server --port 7777 --slaveof 127.0.0.1 8888\n");
2387 fprintf(stderr," ./redis-server /etc/myredis.conf --loglevel verbose\n");
2388 exit(1);
2389 }
2390
2391 void redisAsciiArt(void) {
2392 #include "asciilogo.h"
2393 char *buf = zmalloc(1024*16);
2394
2395 snprintf(buf,1024*16,ascii_logo,
2396 REDIS_VERSION,
2397 redisGitSHA1(),
2398 strtol(redisGitDirty(),NULL,10) > 0,
2399 (sizeof(long) == 8) ? "64" : "32",
2400 "stand alone",
2401 server.port,
2402 (long) getpid()
2403 );
2404 redisLogRaw(REDIS_NOTICE|REDIS_LOG_RAW,buf);
2405 zfree(buf);
2406 }
2407
2408 static void sigtermHandler(int sig) {
2409 REDIS_NOTUSED(sig);
2410
2411 redisLogFromHandler(REDIS_WARNING,"Received SIGTERM, scheduling shutdown...");
2412 server.shutdown_asap = 1;
2413 }
2414
2415 void setupSignalHandlers(void) {
2416 struct sigaction act;
2417
2418 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used.
2419 * Otherwise, sa_handler is used. */
2420 sigemptyset(&act.sa_mask);
2421 act.sa_flags = 0;
2422 act.sa_handler = sigtermHandler;
2423 sigaction(SIGTERM, &act, NULL);
2424
2425 #ifdef HAVE_BACKTRACE
2426 sigemptyset(&act.sa_mask);
2427 act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO;
2428 act.sa_sigaction = sigsegvHandler;
2429 sigaction(SIGSEGV, &act, NULL);
2430 sigaction(SIGBUS, &act, NULL);
2431 sigaction(SIGFPE, &act, NULL);
2432 sigaction(SIGILL, &act, NULL);
2433 #endif
2434 return;
2435 }
2436
2437 void memtest(size_t megabytes, int passes);
2438
2439 int main(int argc, char **argv) {
2440 long long start;
2441 struct timeval tv;
2442
2443 /* We need to initialize our libraries, and the server configuration. */
2444 zmalloc_enable_thread_safeness();
2445 srand(time(NULL)^getpid());
2446 gettimeofday(&tv,NULL);
2447 dictSetHashFunctionSeed(tv.tv_sec^tv.tv_usec^getpid());
2448 initServerConfig();
2449
2450 if (argc >= 2) {
2451 int j = 1; /* First option to parse in argv[] */
2452 sds options = sdsempty();
2453 char *configfile = NULL;
2454
2455 /* Handle special options --help and --version */
2456 if (strcmp(argv[1], "-v") == 0 ||
2457 strcmp(argv[1], "--version") == 0) version();
2458 if (strcmp(argv[1], "--help") == 0 ||
2459 strcmp(argv[1], "-h") == 0) usage();
2460 if (strcmp(argv[1], "--test-memory") == 0) {
2461 if (argc == 3) {
2462 memtest(atoi(argv[2]),50);
2463 exit(0);
2464 } else {
2465 fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n");
2466 fprintf(stderr,"Example: ./redis-server --test-memory 4096\n\n");
2467 exit(1);
2468 }
2469 }
2470
2471 /* First argument is the config file name? */
2472 if (argv[j][0] != '-' || argv[j][1] != '-')
2473 configfile = argv[j++];
2474 /* All the other options are parsed and conceptually appended to the
2475 * configuration file. For instance --port 6380 will generate the
2476 * string "port 6380\n" to be parsed after the actual file name
2477 * is parsed, if any. */
2478 while(j != argc) {
2479 if (argv[j][0] == '-' && argv[j][1] == '-') {
2480 /* Option name */
2481 if (sdslen(options)) options = sdscat(options,"\n");
2482 options = sdscat(options,argv[j]+2);
2483 options = sdscat(options," ");
2484 } else {
2485 /* Option argument */
2486 options = sdscatrepr(options,argv[j],strlen(argv[j]));
2487 options = sdscat(options," ");
2488 }
2489 j++;
2490 }
2491 resetServerSaveParams();
2492 loadServerConfig(configfile,options);
2493 sdsfree(options);
2494 } else {
2495 redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
2496 }
2497 if (server.daemonize) daemonize();
2498 initServer();
2499 if (server.daemonize) createPidFile();
2500 redisAsciiArt();
2501 redisLog(REDIS_WARNING,"Server started, Redis version " REDIS_VERSION);
2502 #ifdef __linux__
2503 linuxOvercommitMemoryWarning();
2504 #endif
2505 start = ustime();
2506 if (server.aof_state == REDIS_AOF_ON) {
2507 if (loadAppendOnlyFile(server.aof_filename) == REDIS_OK)
2508 redisLog(REDIS_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000);
2509 } else {
2510 if (rdbLoad(server.rdb_filename) == REDIS_OK) {
2511 redisLog(REDIS_NOTICE,"DB loaded from disk: %.3f seconds",
2512 (float)(ustime()-start)/1000000);
2513 } else if (errno != ENOENT) {
2514 redisLog(REDIS_WARNING,"Fatal error loading the DB. Exiting.");
2515 exit(1);
2516 }
2517 }
2518 if (server.ipfd > 0)
2519 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
2520 if (server.sofd > 0)
2521 redisLog(REDIS_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket);
2522 aeSetBeforeSleepProc(server.el,beforeSleep);
2523 aeMain(server.el);
2524 aeDeleteEventLoop(server.el);
2525 return 0;
2526 }
2527
2528 /* The End */