X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/36c19d03e08b94ea1bc246918cbd71ea810d38aa..4b1f6ad3e7a5c7c28618e43e7539c9a937bf8521:/src/networking.c?ds=sidebyside diff --git a/src/networking.c b/src/networking.c index dd005335..3bc084f7 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1,6 +1,16 @@ #include "redis.h" #include +static void setProtocolError(redisClient *c, int pos); + +/* To evaluate the output buffer size of a client we need to get size of + * allocated objects, however we can't used zmalloc_size() directly on sds + * strings because of the trick they use to work (the header is before the + * returned pointer), so we use this helper function. */ +size_t zmalloc_size_sds(sds s) { + return zmalloc_size(s-sizeof(struct sdshdr)); +} + void *dupClientReplyValue(void *o) { incrRefCount((robj*)o); return o; @@ -11,35 +21,50 @@ int listMatchObjects(void *a, void *b) { } redisClient *createClient(int fd) { - redisClient *c; - - /* Allocate more space to hold a static write buffer. */ - c = zmalloc(sizeof(redisClient)+REDIS_REPLY_CHUNK_BYTES); - c->buflen = REDIS_REPLY_CHUNK_BYTES; - c->bufpos = 0; + redisClient *c = zmalloc(sizeof(redisClient)); + + /* passing -1 as fd it is possible to create a non connected client. + * This is useful since all the Redis commands needs to be executed + * in the context of a client. When commands are executed in other + * contexts (for instance a Lua script) we need a non connected client. */ + if (fd != -1) { + anetNonBlock(NULL,fd); + anetTcpNoDelay(NULL,fd); + if (aeCreateFileEvent(server.el,fd,AE_READABLE, + readQueryFromClient, c) == AE_ERR) + { + close(fd); + zfree(c); + return NULL; + } + } - anetNonBlock(NULL,fd); - anetTcpNoDelay(NULL,fd); - if (!c) return NULL; selectDb(c,0); c->fd = fd; + c->bufpos = 0; c->querybuf = sdsempty(); + c->querybuf_peak = 0; + c->reqtype = 0; c->argc = 0; c->argv = NULL; + c->cmd = c->lastcmd = NULL; + c->multibulklen = 0; c->bulklen = -1; - c->multibulk = 0; - c->mbargc = 0; - c->mbargv = NULL; c->sentlen = 0; c->flags = 0; - c->lastinteraction = time(NULL); + c->ctime = c->lastinteraction = server.unixtime; c->authenticated = 0; c->replstate = REDIS_REPL_NONE; + c->slave_listening_port = 0; c->reply = listCreate(); + c->reply_bytes = 0; + c->obuf_soft_limit_reached_time = 0; listSetFreeMethod(c->reply,decrRefCount); listSetDupMethod(c->reply,dupClientReplyValue); - c->blocking_keys = NULL; - c->blocking_keys_num = 0; + c->bpop.keys = NULL; + c->bpop.count = 0; + c->bpop.timeout = 0; + c->bpop.target = NULL; c->io_keys = listCreate(); c->watched_keys = listCreate(); listSetFreeMethod(c->io_keys,decrRefCount); @@ -47,18 +72,28 @@ redisClient *createClient(int fd) { c->pubsub_patterns = listCreate(); listSetFreeMethod(c->pubsub_patterns,decrRefCount); listSetMatchMethod(c->pubsub_patterns,listMatchObjects); - if (aeCreateFileEvent(server.el, c->fd, AE_READABLE, - readQueryFromClient, c) == AE_ERR) { - freeClient(c); - return NULL; - } - listAddNodeTail(server.clients,c); + if (fd != -1) listAddNodeTail(server.clients,c); initClientMultiState(c); return c; } -int _ensureFileEvent(redisClient *c) { - if (c->fd <= 0) return REDIS_ERR; +/* This function is called every time we are going to transmit new data + * to the client. The behavior is the following: + * + * If the client should receive new data (normal clients will) the function + * returns REDIS_OK, and make sure to install the write handler in our event + * loop so that when the socket is writable new data gets written. + * + * If the client should not receive new data, because it is a fake client + * or a slave, or because the setup of the write handler failed, the function + * returns REDIS_ERR. + * + * Typically gets called every time a reply is built, before adding more + * data to the clients output buffers. If the function returns REDIS_ERR no + * data should be appended to the output buffers. */ +int prepareClientToWrite(redisClient *c) { + if (c->flags & REDIS_LUA_CLIENT) return REDIS_OK; + if (c->fd <= 0) return REDIS_ERR; /* Fake client */ if (c->bufpos == 0 && listLength(c->reply) == 0 && (c->replstate == REDIS_REPL_NONE || c->replstate == REDIS_REPL_ONLINE) && @@ -83,8 +118,14 @@ robj *dupLastObjectIfNeeded(list *reply) { return listNodeValue(ln); } +/* ----------------------------------------------------------------------------- + * Low level functions to add more data to output buffers. + * -------------------------------------------------------------------------- */ + int _addReplyToBuffer(redisClient *c, char *s, size_t len) { - size_t available = c->buflen-c->bufpos; + size_t available = sizeof(c->buf)-c->bufpos; + + if (c->flags & REDIS_CLOSE_AFTER_REPLY) return REDIS_OK; /* If there already are entries in the reply list, we cannot * add anything more to the static buffer. */ @@ -100,9 +141,13 @@ int _addReplyToBuffer(redisClient *c, char *s, size_t len) { void _addReplyObjectToList(redisClient *c, robj *o) { robj *tail; + + if (c->flags & REDIS_CLOSE_AFTER_REPLY) return; + if (listLength(c->reply) == 0) { incrRefCount(o); listAddNodeTail(c->reply,o); + c->reply_bytes += zmalloc_size_sds(o->ptr); } else { tail = listNodeValue(listLast(c->reply)); @@ -110,21 +155,32 @@ void _addReplyObjectToList(redisClient *c, robj *o) { if (tail->ptr != NULL && sdslen(tail->ptr)+sdslen(o->ptr) <= REDIS_REPLY_CHUNK_BYTES) { + c->reply_bytes -= zmalloc_size_sds(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,o->ptr,sdslen(o->ptr)); + c->reply_bytes += zmalloc_size_sds(tail->ptr); } else { incrRefCount(o); listAddNodeTail(c->reply,o); + c->reply_bytes += zmalloc_size_sds(o->ptr); } } + asyncCloseClientOnOutputBufferLimitReached(c); } /* This method takes responsibility over the sds. When it is no longer * needed it will be free'd, otherwise it ends up in a robj. */ void _addReplySdsToList(redisClient *c, sds s) { robj *tail; + + if (c->flags & REDIS_CLOSE_AFTER_REPLY) { + sdsfree(s); + return; + } + if (listLength(c->reply) == 0) { listAddNodeTail(c->reply,createObject(REDIS_STRING,s)); + c->reply_bytes += zmalloc_size_sds(s); } else { tail = listNodeValue(listLast(c->reply)); @@ -132,19 +188,29 @@ void _addReplySdsToList(redisClient *c, sds s) { if (tail->ptr != NULL && sdslen(tail->ptr)+sdslen(s) <= REDIS_REPLY_CHUNK_BYTES) { + c->reply_bytes -= zmalloc_size_sds(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,s,sdslen(s)); + c->reply_bytes += zmalloc_size_sds(tail->ptr); sdsfree(s); } else { listAddNodeTail(c->reply,createObject(REDIS_STRING,s)); + c->reply_bytes += zmalloc_size_sds(s); } } + asyncCloseClientOnOutputBufferLimitReached(c); } void _addReplyStringToList(redisClient *c, char *s, size_t len) { robj *tail; + + if (c->flags & REDIS_CLOSE_AFTER_REPLY) return; + if (listLength(c->reply) == 0) { - listAddNodeTail(c->reply,createStringObject(s,len)); + robj *o = createStringObject(s,len); + + listAddNodeTail(c->reply,o); + c->reply_bytes += zmalloc_size_sds(o->ptr); } else { tail = listNodeValue(listLast(c->reply)); @@ -152,30 +218,63 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) { if (tail->ptr != NULL && sdslen(tail->ptr)+len <= REDIS_REPLY_CHUNK_BYTES) { + c->reply_bytes -= zmalloc_size_sds(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,s,len); + c->reply_bytes += zmalloc_size_sds(tail->ptr); } else { - listAddNodeTail(c->reply,createStringObject(s,len)); + robj *o = createStringObject(s,len); + + listAddNodeTail(c->reply,o); + c->reply_bytes += zmalloc_size_sds(o->ptr); } } + asyncCloseClientOnOutputBufferLimitReached(c); } +/* ----------------------------------------------------------------------------- + * Higher level functions to queue data on the client output buffer. + * The following functions are the ones that commands implementations will call. + * -------------------------------------------------------------------------- */ + void addReply(redisClient *c, robj *obj) { - if (_ensureFileEvent(c) != REDIS_OK) return; - if (server.vm_enabled && obj->storage != REDIS_VM_MEMORY) { - /* Returns a new object with refcount 1 */ - obj = dupStringObject(obj); - } else { - /* This increments the refcount. */ + if (prepareClientToWrite(c) != REDIS_OK) return; + + /* This is an important place where we can avoid copy-on-write + * when there is a saving child running, avoiding touching the + * refcount field of the object if it's not needed. + * + * If the encoding is RAW and there is room in the static buffer + * we'll be able to send the object to the client without + * messing with its page. */ + if (obj->encoding == REDIS_ENCODING_RAW) { + if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) + _addReplyObjectToList(c,obj); + } else if (obj->encoding == REDIS_ENCODING_INT) { + /* Optimization: if there is room in the static buffer for 32 bytes + * (more than the max chars a 64 bit integer can take as string) we + * avoid decoding the object and go for the lower level approach. */ + if (listLength(c->reply) == 0 && (sizeof(c->buf) - c->bufpos) >= 32) { + char buf[32]; + int len; + + len = ll2string(buf,sizeof(buf),(long)obj->ptr); + if (_addReplyToBuffer(c,buf,len) == REDIS_OK) + return; + /* else... continue with the normal code path, but should never + * happen actually since we verified there is room. */ + } obj = getDecodedObject(obj); + if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) + _addReplyObjectToList(c,obj); + decrRefCount(obj); + } else { + redisPanic("Wrong obj->encoding in addReply()"); } - if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) - _addReplyObjectToList(c,obj); - decrRefCount(obj); } void addReplySds(redisClient *c, sds s) { - if (_ensureFileEvent(c) != REDIS_OK) { + if (prepareClientToWrite(c) != REDIS_OK) { /* The caller expects the sds to be free'd. */ sdsfree(s); return; @@ -189,15 +288,63 @@ void addReplySds(redisClient *c, sds s) { } void addReplyString(redisClient *c, char *s, size_t len) { - if (_ensureFileEvent(c) != REDIS_OK) return; + if (prepareClientToWrite(c) != REDIS_OK) return; if (_addReplyToBuffer(c,s,len) != REDIS_OK) _addReplyStringToList(c,s,len); } +void addReplyErrorLength(redisClient *c, char *s, size_t len) { + addReplyString(c,"-ERR ",5); + addReplyString(c,s,len); + addReplyString(c,"\r\n",2); +} + +void addReplyError(redisClient *c, char *err) { + addReplyErrorLength(c,err,strlen(err)); +} + +void addReplyErrorFormat(redisClient *c, const char *fmt, ...) { + size_t l, j; + va_list ap; + va_start(ap,fmt); + sds s = sdscatvprintf(sdsempty(),fmt,ap); + va_end(ap); + /* Make sure there are no newlines in the string, otherwise invalid protocol + * is emitted. */ + l = sdslen(s); + for (j = 0; j < l; j++) { + if (s[j] == '\r' || s[j] == '\n') s[j] = ' '; + } + addReplyErrorLength(c,s,sdslen(s)); + sdsfree(s); +} + +void addReplyStatusLength(redisClient *c, char *s, size_t len) { + addReplyString(c,"+",1); + addReplyString(c,s,len); + addReplyString(c,"\r\n",2); +} + +void addReplyStatus(redisClient *c, char *status) { + addReplyStatusLength(c,status,strlen(status)); +} + +void addReplyStatusFormat(redisClient *c, const char *fmt, ...) { + va_list ap; + va_start(ap,fmt); + sds s = sdscatvprintf(sdsempty(),fmt,ap); + va_end(ap); + addReplyStatusLength(c,s,sdslen(s)); + sdsfree(s); +} + /* Adds an empty object to the reply list that will contain the multi bulk * length, which is not known when this function is called. */ void *addDeferredMultiBulkLength(redisClient *c) { - if (_ensureFileEvent(c) != REDIS_OK) return NULL; + /* Note that we install the write event here even if the object is not + * ready to be sent, since we are sure that before returning to the + * event loop setDeferredMultiBulkLength() will be called. */ + if (prepareClientToWrite(c) != REDIS_OK) return NULL; listAddNodeTail(c->reply,createObject(REDIS_STRING,NULL)); return listLast(c->reply); } @@ -212,17 +359,23 @@ void setDeferredMultiBulkLength(redisClient *c, void *node, long length) { len = listNodeValue(ln); len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length); + c->reply_bytes += zmalloc_size_sds(len->ptr); if (ln->next != NULL) { next = listNodeValue(ln->next); - /* Only glue when the next node is an sds */ + /* Only glue when the next node is non-NULL (an sds in this case) */ if (next->ptr != NULL) { - len->ptr = sdscat(len->ptr,next->ptr); + c->reply_bytes -= zmalloc_size_sds(len->ptr); + c->reply_bytes -= zmalloc_size_sds(next->ptr); + len->ptr = sdscatlen(len->ptr,next->ptr,sdslen(next->ptr)); + c->reply_bytes += zmalloc_size_sds(len->ptr); listDelNode(c->reply,ln->next); } } + asyncCloseClientOnOutputBufferLimitReached(c); } +/* Add a duble as a bulk reply */ void addReplyDouble(redisClient *c, double d) { char dbuf[128], sbuf[128]; int dlen, slen; @@ -231,9 +384,23 @@ void addReplyDouble(redisClient *c, double d) { addReplyString(c,sbuf,slen); } -void _addReplyLongLong(redisClient *c, long long ll, char prefix) { +/* Add a long long as integer reply or bulk len / multi bulk count. + * Basically this is used to output . */ +void addReplyLongLongWithPrefix(redisClient *c, long long ll, char prefix) { char buf[128]; int len; + + /* Things like $3\r\n or *2\r\n are emitted very often by the protocol + * so we have a few shared objects to use if the integer is small + * like it is most of the times. */ + if (prefix == '*' && ll < REDIS_SHARED_BULKHDR_LEN) { + addReply(c,shared.mbulkhdr[ll]); + return; + } else if (prefix == '$' && ll < REDIS_SHARED_BULKHDR_LEN) { + addReply(c,shared.bulkhdr[ll]); + return; + } + buf[0] = prefix; len = ll2string(buf+1,sizeof(buf)-1,ll); buf[len+1] = '\r'; @@ -242,13 +409,19 @@ void _addReplyLongLong(redisClient *c, long long ll, char prefix) { } void addReplyLongLong(redisClient *c, long long ll) { - _addReplyLongLong(c,ll,':'); + if (ll == 0) + addReply(c,shared.czero); + else if (ll == 1) + addReply(c,shared.cone); + else + addReplyLongLongWithPrefix(c,ll,':'); } void addReplyMultiBulkLen(redisClient *c, long length) { - _addReplyLongLong(c,length,'*'); + addReplyLongLongWithPrefix(c,length,'*'); } +/* Create the length prefix of a bulk reply, example: $2234 */ void addReplyBulkLen(redisClient *c, robj *obj) { size_t len; @@ -267,76 +440,133 @@ void addReplyBulkLen(redisClient *c, robj *obj) { len++; } } - _addReplyLongLong(c,len,'$'); + addReplyLongLongWithPrefix(c,len,'$'); } +/* Add a Redis Object as a bulk reply */ void addReplyBulk(redisClient *c, robj *obj) { addReplyBulkLen(c,obj); addReply(c,obj); addReply(c,shared.crlf); } -/* In the CONFIG command we need to add vanilla C string as bulk replies */ +/* Add a C buffer as bulk reply */ +void addReplyBulkCBuffer(redisClient *c, void *p, size_t len) { + addReplyLongLongWithPrefix(c,len,'$'); + addReplyString(c,p,len); + addReply(c,shared.crlf); +} + +/* Add a C nul term string as bulk reply */ void addReplyBulkCString(redisClient *c, char *s) { if (s == NULL) { addReply(c,shared.nullbulk); } else { - robj *o = createStringObject(s,strlen(s)); - addReplyBulk(c,o); - decrRefCount(o); + addReplyBulkCBuffer(c,s,strlen(s)); } } -void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) { - int cport, cfd; - char cip[128]; - redisClient *c; - REDIS_NOTUSED(el); - REDIS_NOTUSED(mask); - REDIS_NOTUSED(privdata); +/* Add a long long as a bulk reply */ +void addReplyBulkLongLong(redisClient *c, long long ll) { + char buf[64]; + int len; - cfd = anetAccept(server.neterr, fd, cip, &cport); - if (cfd == AE_ERR) { - redisLog(REDIS_VERBOSE,"Accepting client connection: %s", server.neterr); - return; - } - redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport); - if ((c = createClient(cfd)) == NULL) { + len = ll2string(buf,64,ll); + addReplyBulkCBuffer(c,buf,len); +} + +/* Copy 'src' client output buffers into 'dst' client output buffers. + * The function takes care of freeing the old output buffers of the + * destination client. */ +void copyClientOutputBuffer(redisClient *dst, redisClient *src) { + listRelease(dst->reply); + dst->reply = listDup(src->reply); + memcpy(dst->buf,src->buf,src->bufpos); + dst->bufpos = src->bufpos; + dst->reply_bytes = src->reply_bytes; +} + +static void acceptCommonHandler(int fd) { + redisClient *c; + if ((c = createClient(fd)) == NULL) { redisLog(REDIS_WARNING,"Error allocating resoures for the client"); - close(cfd); /* May be already closed, just ingore errors */ + close(fd); /* May be already closed, just ingore errors */ return; } /* If maxclient directive is set and this is one client more... close the * connection. Note that we create the client instead to check before * for this condition, since now the socket is already set in nonblocking * mode and we can send an error for free using the Kernel I/O */ - if (server.maxclients && listLength(server.clients) > server.maxclients) { + if (listLength(server.clients) > server.maxclients) { char *err = "-ERR max number of clients reached\r\n"; /* That's a best effort error message, don't check write errors */ if (write(c->fd,err,strlen(err)) == -1) { /* Nothing to do, Just to avoid the warning... */ } + server.stat_rejected_conn++; freeClient(c); return; } server.stat_numconnections++; } +void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) { + int cport, cfd; + char cip[128]; + REDIS_NOTUSED(el); + REDIS_NOTUSED(mask); + REDIS_NOTUSED(privdata); + + cfd = anetTcpAccept(server.neterr, fd, cip, &cport); + if (cfd == AE_ERR) { + redisLog(REDIS_WARNING,"Accepting client connection: %s", server.neterr); + return; + } + redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport); + acceptCommonHandler(cfd); +} + +void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask) { + int cfd; + REDIS_NOTUSED(el); + REDIS_NOTUSED(mask); + REDIS_NOTUSED(privdata); + + cfd = anetUnixAccept(server.neterr, fd); + if (cfd == AE_ERR) { + redisLog(REDIS_WARNING,"Accepting client connection: %s", server.neterr); + return; + } + redisLog(REDIS_VERBOSE,"Accepted connection to %s", server.unixsocket); + acceptCommonHandler(cfd); +} + + static void freeClientArgv(redisClient *c) { int j; - for (j = 0; j < c->argc; j++) decrRefCount(c->argv[j]); - for (j = 0; j < c->mbargc; j++) - decrRefCount(c->mbargv[j]); c->argc = 0; - c->mbargc = 0; + c->cmd = NULL; +} + +/* Close all the slaves connections. This is useful in chained replication + * when we resync with our own master and want to force all our slaves to + * resync with us as well. */ +void disconnectSlaves(void) { + while (listLength(server.slaves)) { + listNode *ln = listFirst(server.slaves); + freeClient((redisClient*)ln->value); + } } void freeClient(redisClient *c) { listNode *ln; + /* If this is marked as current client unset it */ + if (server.current_client == c) server.current_client = NULL; + /* Note that if the client we are freeing is blocked into a blocking * call, we have to set querybuf to NULL *before* to call * unblockClientWaitingData() to avoid processInputBuffer() will get @@ -365,24 +595,12 @@ void freeClient(redisClient *c) { ln = listSearchKey(server.clients,c); redisAssert(ln != NULL); listDelNode(server.clients,ln); - /* Remove from the list of clients waiting for swapped keys, or ready - * to be restarted, but not yet woken up again. */ - if (c->flags & REDIS_IO_WAIT) { - redisAssert(server.vm_enabled); - if (listLength(c->io_keys) == 0) { - ln = listSearchKey(server.io_ready_clients,c); - - /* When this client is waiting to be woken up (REDIS_IO_WAIT), - * it should be present in the list io_ready_clients */ - redisAssert(ln != NULL); - listDelNode(server.io_ready_clients,ln); - } else { - while (listLength(c->io_keys)) { - ln = listFirst(c->io_keys); - dontWaitForSwappedKey(c,ln->value); - } - } - server.vm_blocked_clients--; + /* When client was just unblocked because of a blocking operation, + * remove it from the list with unblocked clients. */ + if (c->flags & REDIS_UNBLOCKED) { + ln = listSearchKey(server.unblocked_clients,c); + redisAssert(ln != NULL); + listDelNode(server.unblocked_clients,ln); } listRelease(c->io_keys); /* Master/slave cleanup. @@ -399,40 +617,59 @@ void freeClient(redisClient *c) { /* Case 2: we lost the connection with the master. */ if (c->flags & REDIS_MASTER) { server.master = NULL; - server.replstate = REDIS_REPL_CONNECT; - /* Since we lost the connection with the master, we should also - * close the connection with all our slaves if we have any, so - * when we'll resync with the master the other slaves will sync again - * with us as well. Note that also when the slave is not connected - * to the master it will keep refusing connections by other slaves. */ - while (listLength(server.slaves)) { - ln = listFirst(server.slaves); - freeClient((redisClient*)ln->value); - } + server.repl_state = REDIS_REPL_CONNECT; + server.repl_down_since = server.unixtime; + /* We lost connection with our master, force our slaves to resync + * with us as well to load the new data set. + * + * If server.masterhost is NULL the user called SLAVEOF NO ONE so + * slave resync is not needed. */ + if (server.masterhost != NULL) disconnectSlaves(); } + + /* If this client was scheduled for async freeing we need to remove it + * from the queue. */ + if (c->flags & REDIS_CLOSE_ASAP) { + ln = listSearchKey(server.clients_to_close,c); + redisAssert(ln != NULL); + listDelNode(server.clients_to_close,ln); + } + /* Release memory */ zfree(c->argv); - zfree(c->mbargv); freeClientMultiState(c); zfree(c); } +/* Schedule a client to free it at a safe time in the serverCron() function. + * This function is useful when we need to terminate a client but we are in + * a context where calling freeClient() is not possible, because the client + * should be valid for the continuation of the flow of the program. */ +void freeClientAsync(redisClient *c) { + if (c->flags & REDIS_CLOSE_ASAP) return; + c->flags |= REDIS_CLOSE_ASAP; + listAddNodeTail(server.clients_to_close,c); +} + +void freeClientsInAsyncFreeQueue(void) { + while (listLength(server.clients_to_close)) { + listNode *ln = listFirst(server.clients_to_close); + redisClient *c = listNodeValue(ln); + + c->flags &= ~REDIS_CLOSE_ASAP; + freeClient(c); + listDelNode(server.clients_to_close,ln); + } +} + void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { redisClient *c = privdata; int nwritten = 0, totwritten = 0, objlen; + size_t objmem; robj *o; REDIS_NOTUSED(el); REDIS_NOTUSED(mask); - /* Use writev() if we have enough buffers to send */ - if (!server.glueoutputbuf && - listLength(c->reply) > REDIS_WRITEV_THRESHOLD && - !(c->flags & REDIS_MASTER)) - { - sendReplyToClientWritev(el, fd, privdata, mask); - return; - } - while(c->bufpos > 0 || listLength(c->reply)) { if (c->bufpos > 0) { if (c->flags & REDIS_MASTER) { @@ -454,6 +691,7 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { } else { o = listNodeValue(listFirst(c->reply)); objlen = sdslen(o->ptr); + objmem = zmalloc_size_sds(o->ptr); if (objlen == 0) { listDelNode(c->reply,listFirst(c->reply)); @@ -474,14 +712,20 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { if (c->sentlen == objlen) { listDelNode(c->reply,listFirst(c->reply)); c->sentlen = 0; + c->reply_bytes -= objmem; } } - /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT + /* Note that we avoid to send more than REDIS_MAX_WRITE_PER_EVENT * bytes, in a single threaded server it's a good idea to serve * other clients as well, even if a very large request comes from * super fast link that is always able to accept data (in real world - * scenario think about 'KEYS *' against the loopback interfae) */ - if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break; + * scenario think about 'KEYS *' against the loopback interface). + * + * However if we are over the maxmemory limit we ignore that and + * just deliver as much data as it is possible to deliver. */ + if (totwritten > REDIS_MAX_WRITE_PER_EVENT && + (server.maxmemory == 0 || + zmalloc_used_memory() < server.maxmemory)) break; } if (nwritten == -1) { if (errno == EAGAIN) { @@ -493,216 +737,276 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { return; } } - if (totwritten > 0) c->lastinteraction = time(NULL); - if (listLength(c->reply) == 0) { + if (totwritten > 0) c->lastinteraction = server.unixtime; + if (c->bufpos == 0 && listLength(c->reply) == 0) { c->sentlen = 0; aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE); + + /* Close connection after entire reply has been sent. */ + if (c->flags & REDIS_CLOSE_AFTER_REPLY) freeClient(c); } } -void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask) -{ - redisClient *c = privdata; - int nwritten = 0, totwritten = 0, objlen, willwrite; - robj *o; - struct iovec iov[REDIS_WRITEV_IOVEC_COUNT]; - int offset, ion = 0; - REDIS_NOTUSED(el); - REDIS_NOTUSED(mask); +/* resetClient prepare the client to process the next command */ +void resetClient(redisClient *c) { + freeClientArgv(c); + c->reqtype = 0; + c->multibulklen = 0; + c->bulklen = -1; + /* We clear the ASKING flag as well if we are not inside a MULTI. */ + if (!(c->flags & REDIS_MULTI)) c->flags &= (~REDIS_ASKING); +} - listNode *node; - while (listLength(c->reply)) { - offset = c->sentlen; - ion = 0; - willwrite = 0; +int processInlineBuffer(redisClient *c) { + char *newline = strstr(c->querybuf,"\r\n"); + int argc, j; + sds *argv; + size_t querylen; + + /* Nothing to do without a \r\n */ + if (newline == NULL) { + if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) { + addReplyError(c,"Protocol error: too big inline request"); + setProtocolError(c,0); + } + return REDIS_ERR; + } - /* fill-in the iov[] array */ - for(node = listFirst(c->reply); node; node = listNextNode(node)) { - o = listNodeValue(node); - objlen = sdslen(o->ptr); + /* Split the input buffer up to the \r\n */ + querylen = newline-(c->querybuf); + argv = sdssplitlen(c->querybuf,querylen," ",1,&argc); - if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT) - break; + /* Leave data after the first line of the query in the buffer */ + c->querybuf = sdsrange(c->querybuf,querylen+2,-1); - if(ion == REDIS_WRITEV_IOVEC_COUNT) - break; /* no more iovecs */ + /* Setup argv array on client structure */ + if (c->argv) zfree(c->argv); + c->argv = zmalloc(sizeof(robj*)*argc); - iov[ion].iov_base = ((char*)o->ptr) + offset; - iov[ion].iov_len = objlen - offset; - willwrite += objlen - offset; - offset = 0; /* just for the first item */ - ion++; + /* Create redis objects for all arguments. */ + for (c->argc = 0, j = 0; j < argc; j++) { + if (sdslen(argv[j])) { + c->argv[c->argc] = createObject(REDIS_STRING,argv[j]); + c->argc++; + } else { + sdsfree(argv[j]); } + } + zfree(argv); + return REDIS_OK; +} - if(willwrite == 0) - break; +/* Helper function. Trims query buffer to make the function that processes + * multi bulk requests idempotent. */ +static void setProtocolError(redisClient *c, int pos) { + if (server.verbosity >= REDIS_VERBOSE) { + sds client = getClientInfoString(c); + redisLog(REDIS_VERBOSE, + "Protocol error from client: %s", client); + sdsfree(client); + } + c->flags |= REDIS_CLOSE_AFTER_REPLY; + c->querybuf = sdsrange(c->querybuf,pos,-1); +} - /* write all collected blocks at once */ - if((nwritten = writev(fd, iov, ion)) < 0) { - if (errno != EAGAIN) { - redisLog(REDIS_VERBOSE, - "Error writing to client: %s", strerror(errno)); - freeClient(c); - return; +int processMultibulkBuffer(redisClient *c) { + char *newline = NULL; + int pos = 0, ok; + long long ll; + + if (c->multibulklen == 0) { + /* The client should have been reset */ + redisAssertWithInfo(c,NULL,c->argc == 0); + + /* Multi bulk length cannot be read without a \r\n */ + newline = strchr(c->querybuf,'\r'); + if (newline == NULL) { + if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) { + addReplyError(c,"Protocol error: too big mbulk count string"); + setProtocolError(c,0); } - break; + return REDIS_ERR; } - totwritten += nwritten; - offset = c->sentlen; + /* Buffer should also contain \n */ + if (newline-(c->querybuf) > ((signed)sdslen(c->querybuf)-2)) + return REDIS_ERR; + + /* We know for sure there is a whole line since newline != NULL, + * so go ahead and find out the multi bulk length. */ + redisAssertWithInfo(c,NULL,c->querybuf[0] == '*'); + ok = string2ll(c->querybuf+1,newline-(c->querybuf+1),&ll); + if (!ok || ll > 1024*1024) { + addReplyError(c,"Protocol error: invalid multibulk length"); + setProtocolError(c,pos); + return REDIS_ERR; + } - /* remove written robjs from c->reply */ - while (nwritten && listLength(c->reply)) { - o = listNodeValue(listFirst(c->reply)); - objlen = sdslen(o->ptr); + pos = (newline-c->querybuf)+2; + if (ll <= 0) { + c->querybuf = sdsrange(c->querybuf,pos,-1); + return REDIS_OK; + } - if(nwritten >= objlen - offset) { - listDelNode(c->reply, listFirst(c->reply)); - nwritten -= objlen - offset; - c->sentlen = 0; - } else { - /* partial write */ - c->sentlen += nwritten; + c->multibulklen = ll; + + /* Setup argv array on client structure */ + if (c->argv) zfree(c->argv); + c->argv = zmalloc(sizeof(robj*)*c->multibulklen); + } + + redisAssertWithInfo(c,NULL,c->multibulklen > 0); + while(c->multibulklen) { + /* Read bulk length if unknown */ + if (c->bulklen == -1) { + newline = strchr(c->querybuf+pos,'\r'); + if (newline == NULL) { + if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) { + addReplyError(c,"Protocol error: too big bulk count string"); + setProtocolError(c,0); + } break; } - offset = 0; - } - } - if (totwritten > 0) - c->lastinteraction = time(NULL); + /* Buffer should also contain \n */ + if (newline-(c->querybuf) > ((signed)sdslen(c->querybuf)-2)) + break; - if (listLength(c->reply) == 0) { - c->sentlen = 0; - aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE); - } -} + if (c->querybuf[pos] != '$') { + addReplyErrorFormat(c, + "Protocol error: expected '$', got '%c'", + c->querybuf[pos]); + setProtocolError(c,pos); + return REDIS_ERR; + } -/* resetClient prepare the client to process the next command */ -void resetClient(redisClient *c) { - freeClientArgv(c); - c->bulklen = -1; - c->multibulk = 0; -} + ok = string2ll(c->querybuf+pos+1,newline-(c->querybuf+pos+1),&ll); + if (!ok || ll < 0 || ll > 512*1024*1024) { + addReplyError(c,"Protocol error: invalid bulk length"); + setProtocolError(c,pos); + return REDIS_ERR; + } -void closeTimedoutClients(void) { - redisClient *c; - listNode *ln; - time_t now = time(NULL); - listIter li; + pos += newline-(c->querybuf+pos)+2; + if (ll >= REDIS_MBULK_BIG_ARG) { + /* If we are going to read a large object from network + * try to make it likely that it will start at c->querybuf + * boundary so that we can optimized object creation + * avoiding a large copy of data. */ + c->querybuf = sdsrange(c->querybuf,pos,-1); + pos = 0; + /* Hint the sds library about the amount of bytes this string is + * going to contain. */ + c->querybuf = sdsMakeRoomFor(c->querybuf,ll+2); + } + c->bulklen = ll; + } - listRewind(server.clients,&li); - while ((ln = listNext(&li)) != NULL) { - c = listNodeValue(ln); - if (server.maxidletime && - !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */ - !(c->flags & REDIS_MASTER) && /* no timeout for masters */ - !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */ - dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */ - listLength(c->pubsub_patterns) == 0 && - (now - c->lastinteraction > server.maxidletime)) - { - redisLog(REDIS_VERBOSE,"Closing idle client"); - freeClient(c); - } else if (c->flags & REDIS_BLOCKED) { - if (c->blockingto != 0 && c->blockingto < now) { - addReply(c,shared.nullmultibulk); - unblockClientWaitingData(c); + /* Read bulk argument */ + if (sdslen(c->querybuf)-pos < (unsigned)(c->bulklen+2)) { + /* Not enough data (+2 == trailing \r\n) */ + break; + } else { + /* Optimization: if the buffer contanins JUST our bulk element + * instead of creating a new object by *copying* the sds we + * just use the current sds string. */ + if (pos == 0 && + c->bulklen >= REDIS_MBULK_BIG_ARG && + (signed) sdslen(c->querybuf) == c->bulklen+2) + { + c->argv[c->argc++] = createObject(REDIS_STRING,c->querybuf); + sdsIncrLen(c->querybuf,-2); /* remove CRLF */ + c->querybuf = sdsempty(); + /* Assume that if we saw a fat argument we'll see another one + * likely... */ + c->querybuf = sdsMakeRoomFor(c->querybuf,c->bulklen+2); + pos = 0; + } else { + c->argv[c->argc++] = + createStringObject(c->querybuf+pos,c->bulklen); + pos += c->bulklen+2; } + c->bulklen = -1; + c->multibulklen--; } } -} -void processInputBuffer(redisClient *c) { -again: - /* Before to process the input buffer, make sure the client is not - * waitig for a blocking operation such as BLPOP. Note that the first - * iteration the client is never blocked, otherwise the processInputBuffer - * would not be called at all, but after the execution of the first commands - * in the input buffer the client may be blocked, and the "goto again" - * will try to reiterate. The following line will make it return asap. */ - if (c->flags & REDIS_BLOCKED || c->flags & REDIS_IO_WAIT) return; - if (c->bulklen == -1) { - /* Read the first line of the query */ - char *p = strchr(c->querybuf,'\n'); - size_t querylen; - - if (p) { - sds query, *argv; - int argc, j; - - query = c->querybuf; - c->querybuf = sdsempty(); - querylen = 1+(p-(query)); - if (sdslen(query) > querylen) { - /* leave data after the first line of the query in the buffer */ - c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen); - } - *p = '\0'; /* remove "\n" */ - if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */ - sdsupdatelen(query); + /* Trim to pos */ + if (pos) c->querybuf = sdsrange(c->querybuf,pos,-1); - /* Now we can split the query in arguments */ - argv = sdssplitlen(query,sdslen(query)," ",1,&argc); - sdsfree(query); + /* We're done when c->multibulk == 0 */ + if (c->multibulklen == 0) return REDIS_OK; - if (c->argv) zfree(c->argv); - c->argv = zmalloc(sizeof(robj*)*argc); + /* Still not read to process the command */ + return REDIS_ERR; +} - for (j = 0; j < argc; j++) { - if (sdslen(argv[j])) { - c->argv[c->argc] = createObject(REDIS_STRING,argv[j]); - c->argc++; - } else { - sdsfree(argv[j]); - } - } - zfree(argv); - if (c->argc) { - /* Execute the command. If the client is still valid - * after processCommand() return and there is something - * on the query buffer try to process the next command. */ - if (processCommand(c) && sdslen(c->querybuf)) goto again; +void processInputBuffer(redisClient *c) { + /* Keep processing while there is something in the input buffer */ + while(sdslen(c->querybuf)) { + /* Immediately abort if the client is in the middle of something. */ + if (c->flags & REDIS_BLOCKED) return; + + /* REDIS_CLOSE_AFTER_REPLY closes the connection once the reply is + * written to the client. Make sure to not let the reply grow after + * this flag has been set (i.e. don't process more commands). */ + if (c->flags & REDIS_CLOSE_AFTER_REPLY) return; + + /* Determine request type when unknown. */ + if (!c->reqtype) { + if (c->querybuf[0] == '*') { + c->reqtype = REDIS_REQ_MULTIBULK; } else { - /* Nothing to process, argc == 0. Just process the query - * buffer if it's not empty or return to the caller */ - if (sdslen(c->querybuf)) goto again; + c->reqtype = REDIS_REQ_INLINE; } - return; - } else if (sdslen(c->querybuf) >= REDIS_REQUEST_MAX_SIZE) { - redisLog(REDIS_VERBOSE, "Client protocol error"); - freeClient(c); - return; } - } else { - /* Bulk read handling. Note that if we are at this point - the client already sent a command terminated with a newline, - we are reading the bulk data that is actually the last - argument of the command. */ - int qbl = sdslen(c->querybuf); - - if (c->bulklen <= qbl) { - /* Copy everything but the final CRLF as final argument */ - c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2); - c->argc++; - c->querybuf = sdsrange(c->querybuf,c->bulklen,-1); - /* Process the command. If the client is still valid after - * the processing and there is more data in the buffer - * try to parse it. */ - if (processCommand(c) && sdslen(c->querybuf)) goto again; - return; + + if (c->reqtype == REDIS_REQ_INLINE) { + if (processInlineBuffer(c) != REDIS_OK) break; + } else if (c->reqtype == REDIS_REQ_MULTIBULK) { + if (processMultibulkBuffer(c) != REDIS_OK) break; + } else { + redisPanic("Unknown request type"); + } + + /* Multibulk processing could see a <= 0 length. */ + if (c->argc == 0) { + resetClient(c); + } else { + /* Only reset the client when the command was executed. */ + if (processCommand(c) == REDIS_OK) + resetClient(c); } } } void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { redisClient *c = (redisClient*) privdata; - char buf[REDIS_IOBUF_LEN]; - int nread; + int nread, readlen; + size_t qblen; REDIS_NOTUSED(el); REDIS_NOTUSED(mask); - nread = read(fd, buf, REDIS_IOBUF_LEN); + server.current_client = c; + readlen = REDIS_IOBUF_LEN; + /* If this is a multi bulk request, and we are processing a bulk reply + * that is large enough, try to maximize the probabilty that the query + * buffer contains excatly the SDS string representing the object, even + * at the risk of requring more read(2) calls. This way the function + * processMultiBulkBuffer() can avoid copying buffers to create the + * Redis Object representing the argument. */ + if (c->reqtype == REDIS_REQ_MULTIBULK && c->multibulklen && c->bulklen != -1 + && c->bulklen >= REDIS_MBULK_BIG_ARG) + { + int remaining = (unsigned)(c->bulklen+2)-sdslen(c->querybuf); + + if (remaining < readlen) readlen = remaining; + } + + qblen = sdslen(c->querybuf); + if (c->querybuf_peak < qblen) c->querybuf_peak = qblen; + c->querybuf = sdsMakeRoomFor(c->querybuf, readlen); + nread = read(fd, c->querybuf+qblen, readlen); if (nread == -1) { if (errno == EAGAIN) { nread = 0; @@ -717,10 +1021,319 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { return; } if (nread) { - c->querybuf = sdscatlen(c->querybuf, buf, nread); - c->lastinteraction = time(NULL); + sdsIncrLen(c->querybuf,nread); + c->lastinteraction = server.unixtime; } else { + server.current_client = NULL; + return; + } + if (sdslen(c->querybuf) > server.client_max_querybuf_len) { + sds ci = getClientInfoString(c), bytes = sdsempty(); + + bytes = sdscatrepr(bytes,c->querybuf,64); + redisLog(REDIS_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); + sdsfree(ci); + sdsfree(bytes); + freeClient(c); return; } processInputBuffer(c); + server.current_client = NULL; +} + +void getClientsMaxBuffers(unsigned long *longest_output_list, + unsigned long *biggest_input_buffer) { + redisClient *c; + listNode *ln; + listIter li; + unsigned long lol = 0, bib = 0; + + listRewind(server.clients,&li); + while ((ln = listNext(&li)) != NULL) { + c = listNodeValue(ln); + + if (listLength(c->reply) > lol) lol = listLength(c->reply); + if (sdslen(c->querybuf) > bib) bib = sdslen(c->querybuf); + } + *longest_output_list = lol; + *biggest_input_buffer = bib; +} + +/* Turn a Redis client into an sds string representing its state. */ +sds getClientInfoString(redisClient *client) { + char ip[32], flags[16], events[3], *p; + int port; + int emask; + + anetPeerToString(client->fd,ip,&port); + p = flags; + if (client->flags & REDIS_SLAVE) { + if (client->flags & REDIS_MONITOR) + *p++ = 'O'; + else + *p++ = 'S'; + } + if (client->flags & REDIS_MASTER) *p++ = 'M'; + if (client->flags & REDIS_MULTI) *p++ = 'x'; + if (client->flags & REDIS_BLOCKED) *p++ = 'b'; + if (client->flags & REDIS_DIRTY_CAS) *p++ = 'd'; + if (client->flags & REDIS_CLOSE_AFTER_REPLY) *p++ = 'c'; + if (client->flags & REDIS_UNBLOCKED) *p++ = 'u'; + if (client->flags & REDIS_CLOSE_ASAP) *p++ = 'A'; + if (p == flags) *p++ = 'N'; + *p++ = '\0'; + + emask = client->fd == -1 ? 0 : aeGetFileEvents(server.el,client->fd); + p = events; + if (emask & AE_READABLE) *p++ = 'r'; + if (emask & AE_WRITABLE) *p++ = 'w'; + *p = '\0'; + return sdscatprintf(sdsempty(), + "addr=%s:%d fd=%d age=%ld idle=%ld flags=%s db=%d sub=%d psub=%d multi=%d qbuf=%lu qbuf-free=%lu obl=%lu oll=%lu omem=%lu events=%s cmd=%s", + ip,port,client->fd, + (long)(server.unixtime - client->ctime), + (long)(server.unixtime - client->lastinteraction), + flags, + client->db->id, + (int) dictSize(client->pubsub_channels), + (int) listLength(client->pubsub_patterns), + (client->flags & REDIS_MULTI) ? client->mstate.count : -1, + (unsigned long) sdslen(client->querybuf), + (unsigned long) sdsavail(client->querybuf), + (unsigned long) client->bufpos, + (unsigned long) listLength(client->reply), + getClientOutputBufferMemoryUsage(client), + events, + client->lastcmd ? client->lastcmd->name : "NULL"); +} + +sds getAllClientsInfoString(void) { + listNode *ln; + listIter li; + redisClient *client; + sds o = sdsempty(); + + listRewind(server.clients,&li); + while ((ln = listNext(&li)) != NULL) { + sds cs; + + client = listNodeValue(ln); + cs = getClientInfoString(client); + o = sdscatsds(o,cs); + sdsfree(cs); + o = sdscatlen(o,"\n",1); + } + return o; +} + +void clientCommand(redisClient *c) { + listNode *ln; + listIter li; + redisClient *client; + + if (!strcasecmp(c->argv[1]->ptr,"list") && c->argc == 2) { + sds o = getAllClientsInfoString(); + addReplyBulkCBuffer(c,o,sdslen(o)); + sdsfree(o); + } else if (!strcasecmp(c->argv[1]->ptr,"kill") && c->argc == 3) { + listRewind(server.clients,&li); + while ((ln = listNext(&li)) != NULL) { + char ip[32], addr[64]; + int port; + + client = listNodeValue(ln); + if (anetPeerToString(client->fd,ip,&port) == -1) continue; + snprintf(addr,sizeof(addr),"%s:%d",ip,port); + if (strcmp(addr,c->argv[2]->ptr) == 0) { + addReply(c,shared.ok); + if (c == client) { + client->flags |= REDIS_CLOSE_AFTER_REPLY; + } else { + freeClient(client); + } + return; + } + } + addReplyError(c,"No such client"); + } else { + addReplyError(c, "Syntax error, try CLIENT (LIST | KILL ip:port)"); + } +} + +/* Rewrite the command vector of the client. All the new objects ref count + * is incremented. The old command vector is freed, and the old objects + * ref count is decremented. */ +void rewriteClientCommandVector(redisClient *c, int argc, ...) { + va_list ap; + int j; + robj **argv; /* The new argument vector */ + + argv = zmalloc(sizeof(robj*)*argc); + va_start(ap,argc); + for (j = 0; j < argc; j++) { + robj *a; + + a = va_arg(ap, robj*); + argv[j] = a; + incrRefCount(a); + } + /* We free the objects in the original vector at the end, so we are + * sure that if the same objects are reused in the new vector the + * refcount gets incremented before it gets decremented. */ + for (j = 0; j < c->argc; j++) decrRefCount(c->argv[j]); + zfree(c->argv); + /* Replace argv and argc with our new versions. */ + c->argv = argv; + c->argc = argc; + c->cmd = lookupCommand(c->argv[0]->ptr); + redisAssertWithInfo(c,NULL,c->cmd != NULL); + va_end(ap); +} + +/* Rewrite a single item in the command vector. + * The new val ref count is incremented, and the old decremented. */ +void rewriteClientCommandArgument(redisClient *c, int i, robj *newval) { + robj *oldval; + + redisAssertWithInfo(c,NULL,i < c->argc); + oldval = c->argv[i]; + c->argv[i] = newval; + incrRefCount(newval); + decrRefCount(oldval); + + /* If this is the command name make sure to fix c->cmd. */ + if (i == 0) { + c->cmd = lookupCommand(c->argv[0]->ptr); + redisAssertWithInfo(c,NULL,c->cmd != NULL); + } +} + +/* This function returns the number of bytes that Redis is virtually + * using to store the reply still not read by the client. + * It is "virtual" since the reply output list may contain objects that + * are shared and are not really using additional memory. + * + * The function returns the total sum of the length of all the objects + * stored in the output list, plus the memory used to allocate every + * list node. The static reply buffer is not taken into account since it + * is allocated anyway. + * + * Note: this function is very fast so can be called as many time as + * the caller wishes. The main usage of this function currently is + * enforcing the client output length limits. */ +unsigned long getClientOutputBufferMemoryUsage(redisClient *c) { + unsigned long list_item_size = sizeof(listNode)+sizeof(robj); + + return c->reply_bytes + (list_item_size*listLength(c->reply)); +} + +/* Get the class of a client, used in order to envorce limits to different + * classes of clients. + * + * The function will return one of the following: + * REDIS_CLIENT_LIMIT_CLASS_NORMAL -> Normal client + * REDIS_CLIENT_LIMIT_CLASS_SLAVE -> Slave or client executing MONITOR command + * REDIS_CLIENT_LIMIT_CLASS_PUBSUB -> Client subscribed to Pub/Sub channels + */ +int getClientLimitClass(redisClient *c) { + if (c->flags & REDIS_SLAVE) return REDIS_CLIENT_LIMIT_CLASS_SLAVE; + if (dictSize(c->pubsub_channels) || listLength(c->pubsub_patterns)) + return REDIS_CLIENT_LIMIT_CLASS_PUBSUB; + return REDIS_CLIENT_LIMIT_CLASS_NORMAL; +} + +int getClientLimitClassByName(char *name) { + if (!strcasecmp(name,"normal")) return REDIS_CLIENT_LIMIT_CLASS_NORMAL; + else if (!strcasecmp(name,"slave")) return REDIS_CLIENT_LIMIT_CLASS_SLAVE; + else if (!strcasecmp(name,"pubsub")) return REDIS_CLIENT_LIMIT_CLASS_PUBSUB; + else return -1; +} + +char *getClientLimitClassName(int class) { + switch(class) { + case REDIS_CLIENT_LIMIT_CLASS_NORMAL: return "normal"; + case REDIS_CLIENT_LIMIT_CLASS_SLAVE: return "slave"; + case REDIS_CLIENT_LIMIT_CLASS_PUBSUB: return "pubsub"; + default: return NULL; + } +} + +/* The function checks if the client reached output buffer soft or hard + * limit, and also update the state needed to check the soft limit as + * a side effect. + * + * Return value: non-zero if the client reached the soft or the hard limit. + * Otherwise zero is returned. */ +int checkClientOutputBufferLimits(redisClient *c) { + int soft = 0, hard = 0, class; + unsigned long used_mem = getClientOutputBufferMemoryUsage(c); + + class = getClientLimitClass(c); + if (server.client_obuf_limits[class].hard_limit_bytes && + used_mem >= server.client_obuf_limits[class].hard_limit_bytes) + hard = 1; + if (server.client_obuf_limits[class].soft_limit_bytes && + used_mem >= server.client_obuf_limits[class].soft_limit_bytes) + soft = 1; + + /* We need to check if the soft limit is reached continuously for the + * specified amount of seconds. */ + if (soft) { + if (c->obuf_soft_limit_reached_time == 0) { + c->obuf_soft_limit_reached_time = server.unixtime; + soft = 0; /* First time we see the soft limit reached */ + } else { + time_t elapsed = server.unixtime - c->obuf_soft_limit_reached_time; + + if (elapsed <= + server.client_obuf_limits[class].soft_limit_seconds) { + soft = 0; /* The client still did not reached the max number of + seconds for the soft limit to be considered + reached. */ + } + } + } else { + c->obuf_soft_limit_reached_time = 0; + } + return soft || hard; +} + +/* Asynchronously close a client if soft or hard limit is reached on the + * output buffer size. The caller can check if the client will be closed + * checking if the client REDIS_CLOSE_ASAP flag is set. + * + * Note: we need to close the client asynchronously because this function is + * called from contexts where the client can't be freed safely, i.e. from the + * lower level functions pushing data inside the client output buffers. */ +void asyncCloseClientOnOutputBufferLimitReached(redisClient *c) { + redisAssert(c->reply_bytes < ULONG_MAX-(1024*64)); + if (c->reply_bytes == 0 || c->flags & REDIS_CLOSE_ASAP) return; + if (checkClientOutputBufferLimits(c)) { + sds client = getClientInfoString(c); + + freeClientAsync(c); + redisLog(REDIS_WARNING,"Client %s scheduled to be closed ASAP for overcoming of output buffer limits.", client); + sdsfree(client); + } +} + +/* Helper function used by freeMemoryIfNeeded() in order to flush slaves + * output buffers without returning control to the event loop. */ +void flushSlavesOutputBuffers(void) { + listIter li; + listNode *ln; + + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + redisClient *slave = listNodeValue(ln); + int events; + + events = aeGetFileEvents(server.el,slave->fd); + if (events & AE_WRITABLE && + slave->replstate == REDIS_REPL_ONLINE && + listLength(slave->reply)) + { + sendReplyToClient(server.el,slave->fd,slave,0); + } + } }