# maxclients 10000
# Don't use more memory than the specified amount of bytes.
-# When the memory limit is reached Redis will try to remove keys with an
-# EXPIRE set. It will try to start freeing keys that are going to expire
-# in little time and preserve keys with a longer time to live.
-# Redis will also try to remove objects from free lists if possible.
-#
-# If all this fails, Redis will start to reply with errors to commands
-# that will use more memory, like SET, LPUSH, and so on, and will continue
-# to reply to most read-only commands like GET.
-#
-# WARNING: maxmemory can be a good idea mainly if you want to use Redis as a
-# 'state' server or cache, not as a real DB. When Redis is used as a real
-# database the memory usage will grow over the weeks, it will be obvious if
-# it is going to use too much memory in the long run, and you'll have the time
-# to upgrade. With maxmemory after the limit is reached you'll start to get
-# errors for write operations, and this may even lead to DB inconsistency.
+# When the memory limit is reached Redis will try to remove keys
+# accordingly to the eviction policy selected (see maxmemmory-policy).
+#
+# If Redis can't remove keys according to the policy, or if the policy is
+# set to 'noeviction', Redis will start to reply with errors to commands
+# that would use more memory, like SET, LPUSH, and so on, and will continue
+# to reply to read-only commands like GET.
+#
+# This option is usually useful when using Redis as an LRU cache, or to set
+# an hard memory limit for an instance (using the 'noeviction' policy).
+#
+# WARNING: If you have slaves attached to an instance with maxmemory on,
+# the size of the output buffers needed to feed the slaves are subtracted
+# from the used memory count, so that network problems / resyncs will
+# not trigger a loop where keys are evicted, and in turn the output
+# buffer of slaves is full with DELs of keys evicted triggering the deletion
+# of more keys, and so forth until the database is completely emptied.
+#
+# In short... if you have slaves attached it is suggested that you set a lower
+# limit for maxmemory so that there is some free RAM on the system for slave
+# output buffers (but this is not needed if the policy is 'noeviction').
#
# maxmemory <bytes>
void propagateExpire(redisDb *db, robj *key) {
robj *argv[2];
- argv[0] = createStringObject("DEL",3);
+ argv[0] = shared.del;
argv[1] = key;
- incrRefCount(key);
+ incrRefCount(argv[0]);
+ incrRefCount(argv[1]);
if (server.aof_state != REDIS_AOF_OFF)
feedAppendOnlyFile(server.delCommand,db->id,argv,2);
static void setProtocolError(redisClient *c, int pos);
+/* To evaluate the output buffer size of a client we need to get size of
+ * allocated objects, however we can't used zmalloc_size() directly on sds
+ * strings because of the trick they use to work (the header is before the
+ * returned pointer), so we use this helper function. */
+size_t zmalloc_size_sds(sds s) {
+ return zmalloc_size(s-sizeof(struct sdshdr));
+}
+
void *dupClientReplyValue(void *o) {
incrRefCount((robj*)o);
return o;
if (listLength(c->reply) == 0) {
incrRefCount(o);
listAddNodeTail(c->reply,o);
+ c->reply_bytes += zmalloc_size_sds(o->ptr);
} else {
tail = listNodeValue(listLast(c->reply));
if (tail->ptr != NULL &&
sdslen(tail->ptr)+sdslen(o->ptr) <= REDIS_REPLY_CHUNK_BYTES)
{
+ c->reply_bytes -= zmalloc_size_sds(tail->ptr);
tail = dupLastObjectIfNeeded(c->reply);
tail->ptr = sdscatlen(tail->ptr,o->ptr,sdslen(o->ptr));
+ c->reply_bytes += zmalloc_size_sds(tail->ptr);
} else {
incrRefCount(o);
listAddNodeTail(c->reply,o);
+ c->reply_bytes += zmalloc_size_sds(o->ptr);
}
}
- c->reply_bytes += sdslen(o->ptr);
asyncCloseClientOnOutputBufferLimitReached(c);
}
return;
}
- c->reply_bytes += sdslen(s);
if (listLength(c->reply) == 0) {
listAddNodeTail(c->reply,createObject(REDIS_STRING,s));
+ c->reply_bytes += zmalloc_size_sds(s);
} else {
tail = listNodeValue(listLast(c->reply));
if (tail->ptr != NULL &&
sdslen(tail->ptr)+sdslen(s) <= REDIS_REPLY_CHUNK_BYTES)
{
+ c->reply_bytes -= zmalloc_size_sds(tail->ptr);
tail = dupLastObjectIfNeeded(c->reply);
tail->ptr = sdscatlen(tail->ptr,s,sdslen(s));
+ c->reply_bytes += zmalloc_size_sds(tail->ptr);
sdsfree(s);
} else {
listAddNodeTail(c->reply,createObject(REDIS_STRING,s));
+ c->reply_bytes += zmalloc_size_sds(s);
}
}
asyncCloseClientOnOutputBufferLimitReached(c);
if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;
if (listLength(c->reply) == 0) {
- listAddNodeTail(c->reply,createStringObject(s,len));
+ robj *o = createStringObject(s,len);
+
+ listAddNodeTail(c->reply,o);
+ c->reply_bytes += zmalloc_size_sds(o->ptr);
} else {
tail = listNodeValue(listLast(c->reply));
if (tail->ptr != NULL &&
sdslen(tail->ptr)+len <= REDIS_REPLY_CHUNK_BYTES)
{
+ c->reply_bytes -= zmalloc_size_sds(tail->ptr);
tail = dupLastObjectIfNeeded(c->reply);
tail->ptr = sdscatlen(tail->ptr,s,len);
+ c->reply_bytes += zmalloc_size_sds(tail->ptr);
} else {
- listAddNodeTail(c->reply,createStringObject(s,len));
+ robj *o = createStringObject(s,len);
+
+ listAddNodeTail(c->reply,o);
+ c->reply_bytes += zmalloc_size_sds(o->ptr);
}
}
- c->reply_bytes += len;
asyncCloseClientOnOutputBufferLimitReached(c);
}
len = listNodeValue(ln);
len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length);
- c->reply_bytes += sdslen(len->ptr);
+ c->reply_bytes += zmalloc_size_sds(len->ptr);
if (ln->next != NULL) {
next = listNodeValue(ln->next);
void addReplyLongLongWithPrefix(redisClient *c, long long ll, char prefix) {
char buf[128];
int len;
+
+ /* Things like $3\r\n or *2\r\n are emitted very often by the protocol
+ * so we have a few shared objects to use if the integer is small
+ * like it is most of the times. */
+ if (prefix == '*' && ll < REDIS_SHARED_BULKHDR_LEN) {
+ addReply(c,shared.mbulkhdr[ll]);
+ return;
+ } else if (prefix == '$' && ll < REDIS_SHARED_BULKHDR_LEN) {
+ addReply(c,shared.bulkhdr[ll]);
+ return;
+ }
+
buf[0] = prefix;
len = ll2string(buf+1,sizeof(buf)-1,ll);
buf[len+1] = '\r';
void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
redisClient *c = privdata;
int nwritten = 0, totwritten = 0, objlen;
+ size_t objmem;
robj *o;
REDIS_NOTUSED(el);
REDIS_NOTUSED(mask);
} else {
o = listNodeValue(listFirst(c->reply));
objlen = sdslen(o->ptr);
+ objmem = zmalloc_size_sds(o->ptr);
if (objlen == 0) {
listDelNode(c->reply,listFirst(c->reply));
if (c->sentlen == objlen) {
listDelNode(c->reply,listFirst(c->reply));
c->sentlen = 0;
- c->reply_bytes -= objlen;
+ c->reply_bytes -= objmem;
}
}
- /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
+ /* Note that we avoid to send more than REDIS_MAX_WRITE_PER_EVENT
* bytes, in a single threaded server it's a good idea to serve
* other clients as well, even if a very large request comes from
* super fast link that is always able to accept data (in real world
- * scenario think about 'KEYS *' against the loopback interfae) */
- if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
+ * scenario think about 'KEYS *' against the loopback interface).
+ *
+ * However if we are over the maxmemory limit we ignore that and
+ * just deliver as much data as it is possible to deliver. */
+ if (totwritten > REDIS_MAX_WRITE_PER_EVENT &&
+ (server.maxmemory == 0 ||
+ zmalloc_used_memory() < server.maxmemory)) break;
}
if (nwritten == -1) {
if (errno == EAGAIN) {
* the caller wishes. The main usage of this function currently is
* enforcing the client output length limits. */
unsigned long getClientOutputBufferMemoryUsage(redisClient *c) {
- unsigned long list_item_size = sizeof(listNode);
+ unsigned long list_item_size = sizeof(listNode)+sizeof(robj);
return c->reply_bytes + (list_item_size*listLength(c->reply));
}
sdsfree(client);
}
}
+
+/* Helper function used by freeMemoryIfNeeded() in order to flush slaves
+ * output buffers without returning control to the event loop. */
+void flushSlavesOutputBuffers(void) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ redisClient *slave = listNodeValue(ln);
+ int events;
+
+ events = aeGetFileEvents(server.el,slave->fd);
+ if (events & AE_WRITABLE &&
+ slave->replstate == REDIS_REPL_ONLINE &&
+ listLength(slave->reply))
+ {
+ sendReplyToClient(server.el,slave->fd,slave,0);
+ }
+ }
+}
listAddNodeTail(clients,c);
}
/* Notify the client */
- addReply(c,shared.mbulk3);
+ addReply(c,shared.mbulkhdr[3]);
addReply(c,shared.subscribebulk);
addReplyBulk(c,channel);
addReplyLongLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns));
}
/* Notify the client */
if (notify) {
- addReply(c,shared.mbulk3);
+ addReply(c,shared.mbulkhdr[3]);
addReply(c,shared.unsubscribebulk);
addReplyBulk(c,channel);
addReplyLongLong(c,dictSize(c->pubsub_channels)+
listAddNodeTail(server.pubsub_patterns,pat);
}
/* Notify the client */
- addReply(c,shared.mbulk3);
+ addReply(c,shared.mbulkhdr[3]);
addReply(c,shared.psubscribebulk);
addReplyBulk(c,pattern);
addReplyLongLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns));
}
/* Notify the client */
if (notify) {
- addReply(c,shared.mbulk3);
+ addReply(c,shared.mbulkhdr[3]);
addReply(c,shared.punsubscribebulk);
addReplyBulk(c,pattern);
addReplyLongLong(c,dictSize(c->pubsub_channels)+
while ((ln = listNext(&li)) != NULL) {
redisClient *c = ln->value;
- addReply(c,shared.mbulk3);
+ addReply(c,shared.mbulkhdr[3]);
addReply(c,shared.messagebulk);
addReplyBulk(c,channel);
addReplyBulk(c,message);
sdslen(pat->pattern->ptr),
(char*)channel->ptr,
sdslen(channel->ptr),0)) {
- addReply(pat->client,shared.mbulk4);
+ addReply(pat->client,shared.mbulkhdr[4]);
addReply(pat->client,shared.pmessagebulk);
addReplyBulk(pat->client,pat->pattern);
addReplyBulk(pat->client,channel);
shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18);
shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
- shared.mbulk3 = createStringObject("*3\r\n",4);
- shared.mbulk4 = createStringObject("*4\r\n",4);
+ shared.del = createStringObject("DEL",3);
for (j = 0; j < REDIS_SHARED_INTEGERS; j++) {
shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j);
shared.integers[j]->encoding = REDIS_ENCODING_INT;
}
+ for (j = 0; j < REDIS_SHARED_BULKHDR_LEN; j++) {
+ shared.mbulkhdr[j] = createObject(REDIS_STRING,
+ sdscatprintf(sdsempty(),"*%d\r\n",j));
+ shared.bulkhdr[j] = createObject(REDIS_STRING,
+ sdscatprintf(sdsempty(),"$%d\r\n",j));
+ }
}
void initServerConfig() {
* First we try to free some memory if possible (if there are volatile
* keys in the dataset). If there are not the only thing we can do
* is returning an error. */
- if (server.maxmemory) freeMemoryIfNeeded();
- if (server.maxmemory && (c->cmd->flags & REDIS_CMD_DENYOOM) &&
- zmalloc_used_memory() > server.maxmemory)
- {
- addReplyError(c,"command not allowed when used memory > 'maxmemory'");
- return REDIS_OK;
+ if (server.maxmemory) {
+ int retval = freeMemoryIfNeeded();
+ if ((c->cmd->flags & REDIS_CMD_DENYOOM) && retval == REDIS_ERR) {
+ addReplyError(c,
+ "command not allowed when used memory > 'maxmemory'");
+ return REDIS_OK;
+ }
}
/* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
/* ============================ Maxmemory directive ======================== */
/* This function gets called when 'maxmemory' is set on the config file to limit
- * the max memory used by the server, and we are out of memory.
- * This function will try to, in order:
+ * the max memory used by the server, before processing a command.
+ *
+ * The goal of the function is to free enough memory to keep Redis under the
+ * configured memory limit.
*
- * - Free objects from the free list
- * - Try to remove keys with an EXPIRE set
+ * The function starts calculating how many bytes should be freed to keep
+ * Redis under the limit, and enters a loop selecting the best keys to
+ * evict accordingly to the configured policy.
*
- * It is not possible to free enough memory to reach used-memory < maxmemory
- * the server will start refusing commands that will enlarge even more the
- * memory usage.
+ * If all the bytes needed to return back under the limit were freed the
+ * function returns REDIS_OK, otherwise REDIS_ERR is returned, and the caller
+ * should block the execution of commands that will result in more memory
+ * used by the server.
*/
-void freeMemoryIfNeeded(void) {
- /* Remove keys accordingly to the active policy as long as we are
- * over the memory limit. */
- if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION) return;
+int freeMemoryIfNeeded(void) {
+ size_t mem_used, mem_tofree, mem_freed;
+ int slaves = listLength(server.slaves);
+
+ /* Remove the size of slaves output buffers and AOF buffer from the
+ * count of used memory. */
+ mem_used = zmalloc_used_memory();
+ if (slaves) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ redisClient *slave = listNodeValue(ln);
+ unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
+ if (obuf_bytes > mem_used)
+ mem_used = 0;
+ else
+ mem_used -= obuf_bytes;
+ }
+ }
+ if (server.aof_state != REDIS_AOF_OFF) {
+ mem_used -= sdslen(server.aof_buf);
+ mem_used -= sdslen(server.aof_rewrite_buf);
+ }
+
+ /* Check if we are over the memory limit. */
+ if (mem_used <= server.maxmemory) return REDIS_OK;
+
+ if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION)
+ return REDIS_ERR; /* We need to free memory, but policy forbids. */
- while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) {
- int j, k, freed = 0;
+ /* Compute how much memory we need to free. */
+ mem_tofree = mem_used - server.maxmemory;
+ mem_freed = 0;
+ while (mem_freed < mem_tofree) {
+ int j, k, keys_freed = 0;
for (j = 0; j < server.dbnum; j++) {
long bestval = 0; /* just to prevent warning */
/* Finally remove the selected key. */
if (bestkey) {
+ long long delta;
+
robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
propagateExpire(db,keyobj);
+ /* We compute the amount of memory freed by dbDelete() alone.
+ * It is possible that actually the memory needed to propagate
+ * the DEL in AOF and replication link is greater than the one
+ * we are freeing removing the key, but we can't account for
+ * that otherwise we would never exit the loop.
+ *
+ * AOF and Output buffer memory will be freed eventually so
+ * we only care about memory used by the key space. */
+ delta = (long long) zmalloc_used_memory();
dbDelete(db,keyobj);
+ delta -= (long long) zmalloc_used_memory();
+ mem_freed += delta;
server.stat_evictedkeys++;
decrRefCount(keyobj);
- freed++;
+ keys_freed++;
+
+ /* When the memory to free starts to be big enough, we may
+ * start spending so much time here that is impossible to
+ * deliver data to the slaves fast enough, so we force the
+ * transmission here inside the loop. */
+ if (slaves) flushSlavesOutputBuffers();
}
}
- if (!freed) return; /* nothing to free... */
+ if (!keys_freed) return REDIS_ERR; /* nothing to free... */
}
+ return REDIS_OK;
}
/* =================================== Main! ================================ */
#define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
#define REDIS_SHARED_INTEGERS 10000
+#define REDIS_SHARED_BULKHDR_LEN 32
#define REDIS_MAX_LOGMSG_LEN 1024 /* Default maximum length of syslog messages */
#define REDIS_AOF_REWRITE_PERC 100
#define REDIS_AOF_REWRITE_MIN_SIZE (1024*1024)
*outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *plus,
*select0, *select1, *select2, *select3, *select4,
*select5, *select6, *select7, *select8, *select9,
- *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *mbulk3,
- *mbulk4, *psubscribebulk, *punsubscribebulk,
- *integers[REDIS_SHARED_INTEGERS];
+ *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk,
+ *psubscribebulk, *punsubscribebulk, *del,
+ *integers[REDIS_SHARED_INTEGERS],
+ *mbulkhdr[REDIS_SHARED_BULKHDR_LEN], /* "*<value>\r\n" */
+ *bulkhdr[REDIS_SHARED_BULKHDR_LEN]; /* "$<value>\r\n" */
};
/* ZSETs use a specialized version of Skiplists */
void asyncCloseClientOnOutputBufferLimitReached(redisClient *c);
int getClientLimitClassByName(char *name);
char *getClientLimitClassName(int class);
+void flushSlavesOutputBuffers(void);
#ifdef __GNUC__
void addReplyErrorFormat(redisClient *c, const char *fmt, ...)
void zsetConvert(robj *zobj, int encoding);
/* Core functions */
-void freeMemoryIfNeeded(void);
+int freeMemoryIfNeeded(void);
int processCommand(redisClient *c);
void setupSignalHandlers(void);
struct redisCommand *lookupCommand(sds name);
#endif
}
+/* Provide zmalloc_size() for systems where this function is not provided by
+ * malloc itself, given that in that case we store an header with this
+ * information as the first bytes of every allocation. */
+#ifndef HAVE_MALLOC_SIZE
+size_t zmalloc_size(void *ptr) {
+ void *realptr = (char*)ptr-PREFIX_SIZE;
+ size_t size = *((size_t*)realptr);
+ /* Assume at least that all the allocations are padded at sizeof(long) by
+ * the underlying allocator. */
+ if (size&(sizeof(long)-1)) size += sizeof(long)-(size&(sizeof(long)-1));
+ return size+PREFIX_SIZE;
+}
+#endif
+
void zfree(void *ptr) {
#ifndef HAVE_MALLOC_SIZE
void *realptr;
float zmalloc_get_fragmentation_ratio(void);
size_t zmalloc_get_rss(void);
+#ifndef HAVE_MALLOC_SIZE
+size_t zmalloc_size(void *ptr);
+#endif
+
#endif /* __ZMALLOC_H */