* results. For instance SPOP and RANDOMKEY are two random commands.
* S: Sort command output array if called from script, so that the output
* is deterministic.
+ * l: Allow command while loading the database.
+ * t: Allow command while a slave has stale data but is not allowed to
+ * server this data. Normally no command is accepted in this condition
+ * but just a few.
*/
struct redisCommand redisCommandTable[] = {
{"get",getCommand,2,"r",0,NULL,1,1,1,0,0},
{"sismember",sismemberCommand,3,"r",0,NULL,1,1,1,0,0},
{"scard",scardCommand,2,"r",0,NULL,1,1,1,0,0},
{"spop",spopCommand,2,"wRs",0,NULL,1,1,1,0,0},
- {"srandmember",srandmemberCommand,2,"rR",0,NULL,1,1,1,0,0},
+ {"srandmember",srandmemberCommand,-2,"rR",0,NULL,1,1,1,0,0},
{"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
{"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
{"sunion",sunionCommand,-2,"rS",0,NULL,1,-1,1,0,0},
{"exec",execCommand,1,"s",0,NULL,0,0,0,0,0},
{"discard",discardCommand,1,"rs",0,NULL,0,0,0,0,0},
{"sync",syncCommand,1,"ars",0,NULL,0,0,0,0,0},
+ {"replconf",replconfCommand,-1,"ars",0,NULL,0,0,0,0,0},
{"flushdb",flushdbCommand,1,"w",0,NULL,0,0,0,0,0},
{"flushall",flushallCommand,1,"w",0,NULL,0,0,0,0,0},
- {"sort",sortCommand,-2,"wmS",0,NULL,1,1,1,0,0},
- {"info",infoCommand,-1,"r",0,NULL,0,0,0,0,0},
+ {"sort",sortCommand,-2,"wm",0,NULL,1,1,1,0,0},
+ {"info",infoCommand,-1,"rlt",0,NULL,0,0,0,0,0},
{"monitor",monitorCommand,1,"ars",0,NULL,0,0,0,0,0},
{"ttl",ttlCommand,2,"r",0,NULL,1,1,1,0,0},
{"pttl",pttlCommand,2,"r",0,NULL,1,1,1,0,0},
{"persist",persistCommand,2,"w",0,NULL,1,1,1,0,0},
- {"slaveof",slaveofCommand,3,"as",0,NULL,0,0,0,0,0},
+ {"slaveof",slaveofCommand,3,"ast",0,NULL,0,0,0,0,0},
{"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0},
{"config",configCommand,-2,"ar",0,NULL,0,0,0,0,0},
- {"subscribe",subscribeCommand,-2,"rps",0,NULL,0,0,0,0,0},
- {"unsubscribe",unsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0},
- {"psubscribe",psubscribeCommand,-2,"rps",0,NULL,0,0,0,0,0},
- {"punsubscribe",punsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0},
- {"publish",publishCommand,3,"pf",0,NULL,0,0,0,0,0},
+ {"subscribe",subscribeCommand,-2,"rpslt",0,NULL,0,0,0,0,0},
+ {"unsubscribe",unsubscribeCommand,-1,"rpslt",0,NULL,0,0,0,0,0},
+ {"psubscribe",psubscribeCommand,-2,"rpslt",0,NULL,0,0,0,0,0},
+ {"punsubscribe",punsubscribeCommand,-1,"rpslt",0,NULL,0,0,0,0,0},
+ {"publish",publishCommand,3,"pflt",0,NULL,0,0,0,0,0},
{"watch",watchCommand,-2,"rs",0,noPreloadGetKeys,1,-1,1,0,0},
{"unwatch",unwatchCommand,1,"rs",0,NULL,0,0,0,0,0},
{"restore",restoreCommand,4,"awm",0,NULL,1,1,1,0,0},
{"evalsha",evalShaCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0},
{"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0},
{"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0},
- {"time",timeCommand,1,"rR",0,NULL,0,0,0,0,0}
+ {"time",timeCommand,1,"rR",0,NULL,0,0,0,0,0},
+ {"bitop",bitopCommand,-4,"wm",0,NULL,2,-1,1,0,0},
+ {"bitcount",bitcountCommand,-2,"r",0,NULL,1,1,1,0,0}
};
/*============================ Utility functions ============================ */
if (server.logfile) close(fd);
}
-/* Redis generally does not try to recover from out of memory conditions
- * when allocating objects or strings, it is not clear if it will be possible
- * to report this condition to the client since the networking layer itself
- * is based on heap allocation for send buffers, so we simply abort.
- * At least the code will be simpler to read... */
-void oom(const char *msg) {
- redisLog(REDIS_WARNING, "%s: Out of memory\n",msg);
- sleep(1);
- abort();
-}
-
/* Return the UNIX time in microseconds */
long long ustime(void) {
struct timeval tv;
* it will get more aggressive to avoid that too much memory is used by
* keys that can be removed from the keyspace. */
void activeExpireCycle(void) {
- int j;
+ int j, iteration = 0;
long long start = ustime(), timelimit;
/* We can use at max REDIS_EXPIRELOOKUPS_TIME_PERC percentage of CPU time
if (timelimit <= 0) timelimit = 1;
for (j = 0; j < server.dbnum; j++) {
- int expired, iteration = 0;
+ int expired;
redisDb *db = server.db+j;
/* Continue to expire if at the end of the cycle more than 25%
if (num && slots > DICT_HT_INITIAL_SIZE &&
(num*100/slots < 1)) break;
+ /* The main collection cycle. Sample random keys among keys
+ * with an expire set, checking for expired ones. */
expired = 0;
if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
num = REDIS_EXPIRELOOKUPS_PER_CRON;
* expire. So after a given amount of milliseconds return to the
* caller waiting for the other active expire cycle. */
iteration++;
- if ((iteration & 0xff) == 0 && /* Check once every 255 iterations */
+ if ((iteration & 0xf) == 0 && /* check once every 16 cycles. */
(ustime()-start) > timelimit) return;
} while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
}
shared.del = createStringObject("DEL",3);
shared.rpop = createStringObject("RPOP",4);
shared.lpop = createStringObject("LPOP",4);
+ shared.lpush = createStringObject("LPUSH",5);
for (j = 0; j < REDIS_SHARED_INTEGERS; j++) {
shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j);
shared.integers[j]->encoding = REDIS_ENCODING_INT;
server.aof_rewrite_base_size = 0;
server.aof_rewrite_scheduled = 0;
server.aof_last_fsync = time(NULL);
+ server.aof_rewrite_time_last = -1;
+ server.aof_rewrite_time_start = -1;
+ server.aof_lastbgrewrite_status = REDIS_OK;
server.aof_delayed_fsync = 0;
server.aof_fd = -1;
server.aof_selected_db = -1; /* Make sure the first time will not match */
server.repl_serve_stale_data = 1;
server.repl_slave_ro = 1;
server.repl_down_since = time(NULL);
+ server.slave_priority = REDIS_DEFAULT_SLAVE_PRIORITY;
/* Client output buffer limits */
server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].hard_limit_bytes = 0;
server.delCommand = lookupCommandByCString("del");
server.multiCommand = lookupCommandByCString("multi");
server.lpushCommand = lookupCommandByCString("lpush");
+ server.lpopCommand = lookupCommandByCString("lpop");
+ server.rpopCommand = lookupCommandByCString("rpop");
/* Slow log */
server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN;
server.slaves = listCreate();
server.monitors = listCreate();
server.unblocked_clients = listCreate();
+ server.ready_keys = listCreate();
createSharedObjects();
adjustOpenFilesLimit();
server.db[j].dict = dictCreate(&dbDictType,NULL);
server.db[j].expires = dictCreate(&keyptrDictType,NULL);
server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
+ server.db[j].ready_keys = dictCreate(&setDictType,NULL);
server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
server.db[j].id = j;
}
server.cronloops = 0;
server.rdb_child_pid = -1;
server.aof_child_pid = -1;
- server.aof_rewrite_buf = sdsempty();
+ aofRewriteBufferReset();
server.aof_buf = sdsempty();
server.lastsave = time(NULL);
+ server.rdb_save_time_last = -1;
+ server.rdb_save_time_start = -1;
server.dirty = 0;
server.stat_numcommands = 0;
server.stat_numconnections = 0;
server.stop_writes_on_bgsave_err = 1;
aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
if (server.ipfd > 0 && aeCreateFileEvent(server.el,server.ipfd,AE_READABLE,
- acceptTcpHandler,NULL) == AE_ERR) oom("creating file event");
+ acceptTcpHandler,NULL) == AE_ERR) redisPanic("Unrecoverable error creating server.ipfd file event.");
if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
- acceptUnixHandler,NULL) == AE_ERR) oom("creating file event");
+ acceptUnixHandler,NULL) == AE_ERR) redisPanic("Unrecoverable error creating server.sofd file event.");
if (server.aof_state == REDIS_AOF_ON) {
server.aof_fd = open(server.aof_filename,
case 's': c->flags |= REDIS_CMD_NOSCRIPT; break;
case 'R': c->flags |= REDIS_CMD_RANDOM; break;
case 'S': c->flags |= REDIS_CMD_SORT_FOR_SCRIPT; break;
+ case 'l': c->flags |= REDIS_CMD_LOADING; break;
+ case 't': c->flags |= REDIS_CMD_STALE; break;
default: redisPanic("Unsupported command flag"); break;
}
f++;
return REDIS_OK;
}
- /* Don't accept wirte commands if this is a read only slave. But
+ /* Don't accept write commands if this is a read only slave. But
* accept write commands if this is our master. */
if (server.masterhost && server.repl_slave_ro &&
!(c->flags & REDIS_MASTER) &&
* we are a slave with a broken link with master. */
if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED &&
server.repl_serve_stale_data == 0 &&
- c->cmd->proc != infoCommand && c->cmd->proc != slaveofCommand)
+ !(c->cmd->flags & REDIS_CMD_STALE))
{
addReply(c, shared.masterdownerr);
return REDIS_OK;
}
- /* Loading DB? Return an error if the command is not INFO */
- if (server.loading && c->cmd->proc != infoCommand) {
+ /* Loading DB? Return an error if the command has not the
+ * REDIS_CMD_LOADING flag. */
+ if (server.loading && !(c->cmd->flags & REDIS_CMD_LOADING)) {
addReply(c, shared.loadingerr);
return REDIS_OK;
}
- /* Lua script too slow? Only allow SHUTDOWN NOSAVE and SCRIPT KILL. */
+ /* Lua script too slow? Only allow commands with REDIS_CMD_STALE flag. */
if (server.lua_timedout &&
!(c->cmd->proc == shutdownCommand &&
c->argc == 2 &&
addReply(c,shared.queued);
} else {
call(c,REDIS_CALL_FULL);
+ if (listLength(server.ready_keys))
+ handleClientsBlockedOnLists();
}
return REDIS_OK;
}
/*================================== Commands =============================== */
+/* Return zero if strings are the same, non-zero if they are not.
+ * The comparison is performed in a way that prevents an attacker to obtain
+ * information about the nature of the strings just monitoring the execution
+ * time of the function.
+ *
+ * Note that limiting the comparison length to strings up to 512 bytes we
+ * can avoid leaking any information about the password length and any
+ * possible branch misprediction related leak.
+ */
+int time_independent_strcmp(char *a, char *b) {
+ char bufa[REDIS_AUTHPASS_MAX_LEN], bufb[REDIS_AUTHPASS_MAX_LEN];
+ /* The above two strlen perform len(a) + len(b) operations where either
+ * a or b are fixed (our password) length, and the difference is only
+ * relative to the length of the user provided string, so no information
+ * leak is possible in the following two lines of code. */
+ int alen = strlen(a);
+ int blen = strlen(b);
+ int j;
+ int diff = 0;
+
+ /* We can't compare strings longer than our static buffers.
+ * Note that this will never pass the first test in practical circumstances
+ * so there is no info leak. */
+ if (alen > sizeof(bufa) || blen > sizeof(bufb)) return 1;
+
+ memset(bufa,0,sizeof(bufa)); /* Constant time. */
+ memset(bufb,0,sizeof(bufb)); /* Constant time. */
+ /* Again the time of the following two copies is proportional to
+ * len(a) + len(b) so no info is leaked. */
+ memcpy(bufa,a,alen);
+ memcpy(bufb,b,blen);
+
+ /* Always compare all the chars in the two buffers without
+ * conditional expressions. */
+ for (j = 0; j < sizeof(bufa); j++) {
+ diff |= (bufa[j] ^ bufb[j]);
+ }
+ /* Length must be equal as well. */
+ diff |= alen ^ blen;
+ return diff; /* If zero strings are the same. */
+}
+
void authCommand(redisClient *c) {
if (!server.requirepass) {
addReplyError(c,"Client sent AUTH, but no password is set");
- } else if (!strcmp(c->argv[1]->ptr, server.requirepass)) {
+ } else if (!time_independent_strcmp(c->argv[1]->ptr, server.requirepass)) {
c->authenticated = 1;
addReply(c,shared.ok);
} else {
info = sdscatprintf(info,
"# Persistence\r\n"
"loading:%d\r\n"
+ "rdb_changes_since_last_save:%lld\r\n"
+ "rdb_bgsave_in_progress:%d\r\n"
+ "rdb_last_save_time:%ld\r\n"
+ "rdb_last_bgsave_status:%s\r\n"
+ "rdb_last_bgsave_time_sec:%ld\r\n"
+ "rdb_current_bgsave_time_sec:%ld\r\n"
"aof_enabled:%d\r\n"
- "changes_since_last_save:%lld\r\n"
- "bgsave_in_progress:%d\r\n"
- "last_save_time:%ld\r\n"
- "last_bgsave_status:%s\r\n"
- "bgrewriteaof_in_progress:%d\r\n"
- "bgrewriteaof_scheduled:%d\r\n",
+ "aof_rewrite_in_progress:%d\r\n"
+ "aof_rewrite_scheduled:%d\r\n"
+ "aof_last_rewrite_time_sec:%ld\r\n"
+ "aof_current_rewrite_time_sec:%ld\r\n"
+ "aof_last_bgrewrite_status:%s\r\n",
server.loading,
- server.aof_state != REDIS_AOF_OFF,
server.dirty,
server.rdb_child_pid != -1,
server.lastsave,
- server.lastbgsave_status == REDIS_OK ? "ok" : "err",
+ (server.lastbgsave_status == REDIS_OK) ? "ok" : "err",
+ server.rdb_save_time_last,
+ (server.rdb_child_pid == -1) ?
+ -1 : time(NULL)-server.rdb_save_time_start,
+ server.aof_state != REDIS_AOF_OFF,
server.aof_child_pid != -1,
- server.aof_rewrite_scheduled);
+ server.aof_rewrite_scheduled,
+ server.aof_rewrite_time_last,
+ (server.aof_child_pid == -1) ?
+ -1 : time(NULL)-server.aof_rewrite_time_start,
+ (server.aof_lastbgrewrite_status == REDIS_OK) ? "ok" : "err");
if (server.aof_state != REDIS_AOF_OFF) {
info = sdscatprintf(info,
"aof_base_size:%lld\r\n"
"aof_pending_rewrite:%d\r\n"
"aof_buffer_length:%zu\r\n"
+ "aof_rewrite_buffer_length:%lu\r\n"
"aof_pending_bio_fsync:%llu\r\n"
"aof_delayed_fsync:%lu\r\n",
(long long) server.aof_current_size,
(long long) server.aof_rewrite_base_size,
server.aof_rewrite_scheduled,
sdslen(server.aof_buf),
+ aofRewriteBufferSize(),
bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC),
server.aof_delayed_fsync);
}
if (server.repl_state == REDIS_REPL_TRANSFER) {
info = sdscatprintf(info,
- "master_sync_left_bytes:%ld\r\n"
+ "master_sync_left_bytes:%lld\r\n"
"master_sync_last_io_seconds_ago:%d\r\n"
- ,(long)server.repl_transfer_left,
+ , (long long)
+ (server.repl_transfer_size - server.repl_transfer_read),
(int)(server.unixtime-server.repl_transfer_lastio)
);
}
"master_link_down_since_seconds:%ld\r\n",
(long)server.unixtime-server.repl_down_since);
}
+ info = sdscatprintf(info,
+ "slave_priority:%d\r\n", server.slave_priority);
}
info = sdscatprintf(info,
"connected_slaves:%lu\r\n",
}
if (state == NULL) continue;
info = sdscatprintf(info,"slave%d:%s,%d,%s\r\n",
- slaveid,ip,port,state);
+ slaveid,ip,slave->slave_listening_port,state);
slaveid++;
}
}
}
if (server.aof_state != REDIS_AOF_OFF) {
mem_used -= sdslen(server.aof_buf);
- mem_used -= sdslen(server.aof_rewrite_buf);
+ mem_used -= aofRewriteBufferSize();
}
/* Check if we are over the memory limit. */
void memtest(size_t megabytes, int passes);
+void redisOutOfMemoryHandler(size_t allocation_size) {
+ redisLog(REDIS_WARNING,"Out Of Memory allocating %zu bytes!",
+ allocation_size);
+ redisPanic("OOM");
+}
+
int main(int argc, char **argv) {
long long start;
struct timeval tv;
/* We need to initialize our libraries, and the server configuration. */
zmalloc_enable_thread_safeness();
+ zmalloc_set_oom_handler(redisOutOfMemoryHandler);
srand(time(NULL)^getpid());
gettimeofday(&tv,NULL);
dictSetHashFunctionSeed(tv.tv_sec^tv.tv_usec^getpid());