X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/7d47ecd543ed41db70396bc3866eac872a31774d..ac06fc011df598372232a5dc1805683004240c0d:/src/redis.c?ds=sidebyside diff --git a/src/redis.c b/src/redis.c index 99ff1898..8a5f9632 100644 --- a/src/redis.c +++ b/src/redis.c @@ -89,6 +89,7 @@ struct redisCommand readonlyCommandTable[] = { {"rpop",rpopCommand,2,0,NULL,1,1,1}, {"lpop",lpopCommand,2,0,NULL,1,1,1}, {"brpop",brpopCommand,-3,0,NULL,1,1,1}, + {"brpoplpush",brpoplpushCommand,4,REDIS_CMD_DENYOOM,NULL,1,2,1}, {"blpop",blpopCommand,-3,0,NULL,1,1,1}, {"llen",llenCommand,2,0,NULL,1,1,1}, {"lindex",lindexCommand,3,0,NULL,1,1,1}, @@ -96,7 +97,7 @@ struct redisCommand readonlyCommandTable[] = { {"lrange",lrangeCommand,4,0,NULL,1,1,1}, {"ltrim",ltrimCommand,4,0,NULL,1,1,1}, {"lrem",lremCommand,4,0,NULL,1,1,1}, - {"rpoplpush",rpoplpushcommand,3,REDIS_CMD_DENYOOM,NULL,1,2,1}, + {"rpoplpush",rpoplpushCommand,3,REDIS_CMD_DENYOOM,NULL,1,2,1}, {"sadd",saddCommand,3,REDIS_CMD_DENYOOM,NULL,1,1,1}, {"srem",sremCommand,3,0,NULL,1,1,1}, {"smove",smoveCommand,4,0,NULL,1,2,1}, @@ -124,7 +125,7 @@ struct redisCommand readonlyCommandTable[] = { {"zcount",zcountCommand,4,0,NULL,1,1,1}, {"zrevrange",zrevrangeCommand,-4,0,NULL,1,1,1}, {"zcard",zcardCommand,2,0,NULL,1,1,1}, - {"zscore",zscoreCommand,3,REDIS_CMD_DENYOOM,NULL,1,1,1}, + {"zscore",zscoreCommand,3,0,NULL,1,1,1}, {"zrank",zrankCommand,3,0,NULL,1,1,1}, {"zrevrank",zrevrankCommand,3,0,NULL,1,1,1}, {"hset",hsetCommand,4,REDIS_CMD_DENYOOM,NULL,1,1,1}, @@ -252,6 +253,15 @@ int dictSdsKeyCompare(void *privdata, const void *key1, return memcmp(key1, key2, l1) == 0; } +/* A case insensitive version used for the command lookup table. */ +int dictSdsKeyCaseCompare(void *privdata, const void *key1, + const void *key2) +{ + DICT_NOTUSED(privdata); + + return strcasecmp(key1, key2) == 0; +} + void dictRedisObjectDestructor(void *privdata, void *val) { DICT_NOTUSED(privdata); @@ -283,6 +293,10 @@ unsigned int dictSdsHash(const void *key) { return dictGenHashFunction((unsigned char*)key, sdslen((char*)key)); } +unsigned int dictSdsCaseHash(const void *key) { + return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key)); +} + int dictEncObjKeyCompare(void *privdata, const void *key1, const void *key2) { @@ -364,6 +378,16 @@ dictType keyptrDictType = { NULL /* val destructor */ }; +/* Command table. sds string -> command struct pointer. */ +dictType commandTableDictType = { + dictSdsCaseHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCaseCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + NULL /* val destructor */ +}; + /* Hash type hash table (note that small hashes are represented with zimpaps) */ dictType hashDictType = { dictEncObjHash, /* hash function */ @@ -549,7 +573,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { } /* Close connections of timedout clients */ - if ((server.maxidletime && !(loops % 100)) || server.blpop_blocked_clients) + if ((server.maxidletime && !(loops % 100)) || server.bpop_blocked_clients) closeTimedoutClients(); /* Check if a background saving or AOF rewrite in progress terminated */ @@ -593,10 +617,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { while (server.vm_enabled && zmalloc_used_memory() > server.vm_max_memory) { - int retval; - - if (tryFreeOneObjectFromFreelist() == REDIS_OK) continue; - retval = (server.vm_max_threads == 0) ? + int retval = (server.vm_max_threads == 0) ? vmSwapOneObjectBlocking() : vmSwapOneObjectThreaded(); if (retval == REDIS_ERR && !(loops % 300) && @@ -613,17 +634,10 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { } } - /* Check if we should connect to a MASTER */ - if (server.replstate == REDIS_REPL_CONNECT && !(loops % 10)) { - redisLog(REDIS_NOTICE,"Connecting to MASTER..."); - if (syncWithMaster() == REDIS_OK) { - redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync succeeded"); - if (server.appendonly) rewriteAppendOnlyFileBackground(); - } - } + /* Replication cron function -- used to reconnect to master and + * to detect transfer failures. */ + if (!(loops % 10)) replicationCron(); - /* Update fragmentation info, used for fast RSS estimation */ - if (!(loops % 10)) server.fragmentation = zmalloc_get_fragmentation_ratio(); return 100; } @@ -689,6 +703,8 @@ void createSharedObjects(void) { "-ERR source and destination objects are the same\r\n")); shared.outofrangeerr = createObject(REDIS_STRING,sdsnew( "-ERR index out of range\r\n")); + shared.loadingerr = createObject(REDIS_STRING,sdsnew( + "-LOADING Redis is loading the dataset in memory\r\n")); shared.space = createObject(REDIS_STRING,sdsnew(" ")); shared.colon = createObject(REDIS_STRING,sdsnew(":")); shared.plus = createObject(REDIS_STRING,sdsnew("+")); @@ -717,13 +733,17 @@ void createSharedObjects(void) { } void initServerConfig() { - server.dbnum = REDIS_DEFAULT_DBNUM; server.port = REDIS_SERVERPORT; + server.bindaddr = NULL; + server.unixsocket = NULL; + server.ipfd = -1; + server.sofd = -1; + server.dbnum = REDIS_DEFAULT_DBNUM; server.verbosity = REDIS_VERBOSE; server.maxidletime = REDIS_MAXIDLETIME; server.saveparams = NULL; + server.loading = 0; server.logfile = NULL; /* NULL = log on standard output */ - server.bindaddr = NULL; server.glueoutputbuf = 1; server.daemonize = 0; server.appendonly = 0; @@ -738,9 +758,8 @@ void initServerConfig() { server.requirepass = NULL; server.rdbcompression = 1; server.activerehashing = 1; - server.fragmentation = 1; server.maxclients = 0; - server.blpop_blocked_clients = 0; + server.bpop_blocked_clients = 0; server.maxmemory = 0; server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU; server.maxmemory_samples = 3; @@ -771,12 +790,21 @@ void initServerConfig() { server.masterport = 6379; server.master = NULL; server.replstate = REDIS_REPL_NONE; + server.repl_serve_stale_data = 1; /* Double constants initialization */ R_Zero = 0.0; R_PosInf = 1.0/R_Zero; R_NegInf = -1.0/R_Zero; R_Nan = R_Zero/R_Zero; + + /* Command table -- we intiialize it here as it is part of the + * initial configuration, since command names may be changed via + * redis.conf using the rename-command directive. */ + server.commands = dictCreate(&commandTableDictType,NULL); + populateCommandTable(); + server.delCommand = lookupCommandByCString("del"); + server.multiCommand = lookupCommandByCString("multi"); } void initServer() { @@ -787,21 +815,27 @@ void initServer() { setupSigSegvAction(); server.mainthread = pthread_self(); - server.devnull = fopen("/dev/null","w"); - if (server.devnull == NULL) { - redisLog(REDIS_WARNING, "Can't open /dev/null: %s", server.neterr); - exit(1); - } server.clients = listCreate(); server.slaves = listCreate(); server.monitors = listCreate(); - server.objfreelist = listCreate(); createSharedObjects(); server.el = aeCreateEventLoop(); server.db = zmalloc(sizeof(redisDb)*server.dbnum); - server.fd = anetTcpServer(server.neterr, server.port, server.bindaddr); - if (server.fd == -1) { - redisLog(REDIS_WARNING, "Opening TCP port: %s", server.neterr); + server.ipfd = anetTcpServer(server.neterr,server.port,server.bindaddr); + if (server.ipfd == ANET_ERR) { + redisLog(REDIS_WARNING, "Opening port: %s", server.neterr); + exit(1); + } + if (server.unixsocket != NULL) { + unlink(server.unixsocket); /* don't care if this fails */ + server.sofd = anetUnixServer(server.neterr,server.unixsocket); + if (server.sofd == ANET_ERR) { + redisLog(REDIS_WARNING, "Opening socket: %s", server.neterr); + exit(1); + } + } + if (server.ipfd < 0 && server.sofd < 0) { + redisLog(REDIS_WARNING, "Configured to not listen anywhere, exiting."); exit(1); } for (j = 0; j < server.dbnum; j++) { @@ -832,8 +866,10 @@ void initServer() { server.stat_keyspace_hits = 0; server.unixtime = time(NULL); aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL); - if (aeCreateFileEvent(server.el, server.fd, AE_READABLE, - acceptHandler, NULL) == AE_ERR) oom("creating file event"); + if (server.ipfd > 0 && aeCreateFileEvent(server.el,server.ipfd,AE_READABLE, + acceptTcpHandler,NULL) == AE_ERR) oom("creating file event"); + if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE, + acceptUnixHandler,NULL) == AE_ERR) oom("creating file event"); if (server.appendonly) { server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644); @@ -847,31 +883,34 @@ void initServer() { if (server.vm_enabled) vmInit(); } -int qsortRedisCommands(const void *r1, const void *r2) { - return strcasecmp( - ((struct redisCommand*)r1)->name, - ((struct redisCommand*)r2)->name); -} +/* Populates the Redis Command Table starting from the hard coded list + * we have on top of redis.c file. */ +void populateCommandTable(void) { + int j; + int numcommands = sizeof(readonlyCommandTable)/sizeof(struct redisCommand); + + for (j = 0; j < numcommands; j++) { + struct redisCommand *c = readonlyCommandTable+j; + int retval; -void sortCommandTable() { - /* Copy and sort the read-only version of the command table */ - commandTable = (struct redisCommand*)zmalloc(sizeof(readonlyCommandTable)); - memcpy(commandTable,readonlyCommandTable,sizeof(readonlyCommandTable)); - qsort(commandTable, - sizeof(readonlyCommandTable)/sizeof(struct redisCommand), - sizeof(struct redisCommand),qsortRedisCommands); + retval = dictAdd(server.commands, sdsnew(c->name), c); + assert(retval == DICT_OK); + } } /* ====================== Commands lookup and execution ===================== */ -struct redisCommand *lookupCommand(char *name) { - struct redisCommand tmp = {name,NULL,0,0,NULL,0,0,0}; - return bsearch( - &tmp, - commandTable, - sizeof(readonlyCommandTable)/sizeof(struct redisCommand), - sizeof(struct redisCommand), - qsortRedisCommands); +struct redisCommand *lookupCommand(sds name) { + return dictFetchValue(server.commands, name); +} + +struct redisCommand *lookupCommandByCString(char *s) { + struct redisCommand *cmd; + sds name = sdsnew(s); + + cmd = dictFetchValue(server.commands, name); + sdsfree(name); + return cmd; } /* Call() is the core of Redis execution of a command */ @@ -955,6 +994,23 @@ int processCommand(redisClient *c) { return REDIS_OK; } + /* Only allow INFO and SLAVEOF when slave-serve-stale-data is no and + * we are a slave with a broken link with master. */ + if (server.masterhost && server.replstate != REDIS_REPL_CONNECTED && + server.repl_serve_stale_data == 0 && + cmd->proc != infoCommand && cmd->proc != slaveofCommand) + { + addReplyError(c, + "link with MASTER is down and slave-serve-stale-data is set to no"); + return REDIS_OK; + } + + /* Loading DB? Return an error if the command is not INFO */ + if (server.loading && cmd->proc != infoCommand) { + addReply(c, shared.loadingerr); + return REDIS_OK; + } + /* Exec the command */ if (c->flags & REDIS_MULTI && cmd->proc != execCommand && cmd->proc != discardCommand && @@ -1080,9 +1136,10 @@ sds genRedisInfoString(void) { "used_memory:%zu\r\n" "used_memory_human:%s\r\n" "used_memory_rss:%zu\r\n" - "used_memory_estimated_rss:%zu\r\n" "mem_fragmentation_ratio:%.2f\r\n" "use_tcmalloc:%d\r\n" + "loading:%d\r\n" + "aof_enabled:%d\r\n" "changes_since_last_save:%lld\r\n" "bgsave_in_progress:%d\r\n" "last_save_time:%ld\r\n" @@ -1113,17 +1170,18 @@ sds genRedisInfoString(void) { (float)c_ru.ru_stime.tv_sec+(float)c_ru.ru_stime.tv_usec/1000000, listLength(server.clients)-listLength(server.slaves), listLength(server.slaves), - server.blpop_blocked_clients, + server.bpop_blocked_clients, zmalloc_used_memory(), hmem, zmalloc_get_rss(), - redisEstimateRSS(), zmalloc_get_fragmentation_ratio(), #ifdef USE_TCMALLOC 1, #else 0, #endif + server.loading, + server.appendonly, server.dirty, server.bgsavechildpid != -1, server.lastsave, @@ -1146,12 +1204,23 @@ sds genRedisInfoString(void) { "master_port:%d\r\n" "master_link_status:%s\r\n" "master_last_io_seconds_ago:%d\r\n" + "master_sync_in_progress:%d\r\n" ,server.masterhost, server.masterport, (server.replstate == REDIS_REPL_CONNECTED) ? "up" : "down", - server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1 + server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1, + server.replstate == REDIS_REPL_TRANSFER ); + + if (server.replstate == REDIS_REPL_TRANSFER) { + info = sdscatprintf(info, + "master_sync_left_bytes:%ld\r\n" + "master_sync_last_io_seconds_ago:%d\r\n" + ,(long)server.repl_transfer_left, + (int)(time(NULL)-server.repl_transfer_lastio) + ); + } } if (server.vm_enabled) { lockThreadedIO(); @@ -1183,6 +1252,35 @@ sds genRedisInfoString(void) { ); unlockThreadedIO(); } + if (server.loading) { + double perc; + time_t eta, elapsed; + off_t remaining_bytes = server.loading_total_bytes- + server.loading_loaded_bytes; + + perc = ((double)server.loading_loaded_bytes / + server.loading_total_bytes) * 100; + + elapsed = time(NULL)-server.loading_start_time; + if (elapsed == 0) { + eta = 1; /* A fake 1 second figure if we don't have enough info */ + } else { + eta = (elapsed*remaining_bytes)/server.loading_loaded_bytes; + } + + info = sdscatprintf(info, + "loading_start_time:%ld\r\n" + "loading_total_bytes:%llu\r\n" + "loading_loaded_bytes:%llu\r\n" + "loading_loaded_perc:%.2f\r\n" + "loading_eta_seconds:%ld\r\n" + ,(unsigned long) server.loading_start_time, + (unsigned long long) server.loading_total_bytes, + (unsigned long long) server.loading_loaded_bytes, + perc, + eta + ); + } for (j = 0; j < server.dbnum; j++) { long long keys, vkeys; @@ -1216,61 +1314,6 @@ void monitorCommand(redisClient *c) { /* ============================ Maxmemory directive ======================== */ -/* Try to free one object form the pre-allocated objects free list. - * This is useful under low mem conditions as by default we take 1 million - * free objects allocated. On success REDIS_OK is returned, otherwise - * REDIS_ERR. */ -int tryFreeOneObjectFromFreelist(void) { - robj *o; - - if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex); - if (listLength(server.objfreelist)) { - listNode *head = listFirst(server.objfreelist); - o = listNodeValue(head); - listDelNode(server.objfreelist,head); - if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex); - zfree(o); - return REDIS_OK; - } else { - if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex); - return REDIS_ERR; - } -} - -/* A fast RSS sampling function. - * - * The function is reasonably accurate while fast, since it uses the trick of - * using the server.fragmentation ratio that is computed every second and - * is the ratio between the RSS and our zmalloc() count of allocated bytes. - * - * So in order to compute the current RSS used we just need to multiply - * the zmalloc() memory reporting, that is as fast as reading a counter, - * for the latest estimation of fragmentation. - * - * The behavior of this function is also very desirable because it is - * very responsive to memory changes: while the real RSS is actually measured - * in pages, the RSS estimation will actually change even if just a few bytes - * are freed, and this is a good property when the function is used in order - * to evict keys for Virtual Memory of for 'maxmemory' directive. - * - * Note that when the memory reported by zmalloc is smaller than the RSS - * (that is, fragmentation < 1) this means that something is odd (many pages - * swapped since the Redis instance is idle) and we consider the fragmentation - * ratio 1. */ -size_t redisEstimateRSS(void) { - size_t used = zmalloc_used_memory(); - float maxfrag; - - if (server.fragmentation < 1) return used; - maxfrag = (float)SIZE_MAX / used; - - /* If multiplying memory usage reported by zmalloc per fragmentation - * ratio will lead to an overflow we just return SIZE_MAX. */ - if (maxfrag < server.fragmentation) return SIZE_MAX; - - return (size_t)((used * server.fragmentation)); -} - /* This function gets called when 'maxmemory' is set on the config file to limit * the max memory used by the server, and we are out of memory. * This function will try to, in order: @@ -1285,12 +1328,11 @@ size_t redisEstimateRSS(void) { void freeMemoryIfNeeded(void) { /* Remove keys accordingly to the active policy as long as we are * over the memory limit. */ + if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION) return; + while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) { int j, k, freed = 0; - /* Basic strategy -- remove objects from the free list. */ - if (tryFreeOneObjectFromFreelist() == REDIS_OK) continue; - for (j = 0; j < server.dbnum; j++) { long bestval = 0; /* just to prevent warning */ sds bestkey = NULL; @@ -1326,6 +1368,10 @@ void freeMemoryIfNeeded(void) { de = dictGetRandomKey(dict); thiskey = dictGetEntryKey(de); + /* When policy is volatile-lru we need an additonal lookup + * to locate the real key, as dict is set to db->expires. */ + if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU) + de = dictFind(db->dict, thiskey); o = dictGetEntryVal(de); thisval = estimateObjectIdleTime(o); @@ -1466,7 +1512,6 @@ int main(int argc, char **argv) { time_t start; initServerConfig(); - sortCommandTable(); if (argc == 2) { if (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0) version(); @@ -1493,7 +1538,10 @@ int main(int argc, char **argv) { if (rdbLoad(server.dbfilename) == REDIS_OK) redisLog(REDIS_NOTICE,"DB loaded from disk: %ld seconds",time(NULL)-start); } - redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port); + if (server.ipfd > 0) + redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port); + if (server.sofd > 0) + redisLog(REDIS_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket); aeSetBeforeSleepProc(server.el,beforeSleep); aeMain(server.el); aeDeleteEventLoop(server.el);