X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/7eac2a75a43bace72947fb534db9016707af1a02..dbb27a0a90ca3800f5be1d8170e404b9e7b9bc44:/src/networking.c diff --git a/src/networking.c b/src/networking.c index 6c3e7d81..d8fb8132 100644 --- a/src/networking.c +++ b/src/networking.c @@ -3,6 +3,14 @@ static void setProtocolError(redisClient *c, int pos); +/* To evaluate the output buffer size of a client we need to get size of + * allocated objects, however we can't used zmalloc_size() directly on sds + * strings because of the trick they use to work (the header is before the + * returned pointer), so we use this helper function. */ +size_t zmalloc_size_sds(sds s) { + return zmalloc_size(s-sizeof(struct sdshdr)); +} + void *dupClientReplyValue(void *o) { incrRefCount((robj*)o); return o; @@ -67,11 +75,23 @@ redisClient *createClient(int fd) { return c; } -/* Set the event loop to listen for write events on the client's socket. - * Typically gets called every time a reply is built. */ -int _installWriteEvent(redisClient *c) { +/* This function is called every time we are going to transmit new data + * to the client. The behavior is the following: + * + * If the client should receive new data (normal clients will) the function + * returns REDIS_OK, and make sure to install the write handler in our event + * loop so that when the socket is writable new data gets written. + * + * If the client should not receive new data, because it is a fake client + * or a slave, or because the setup of the write handler failed, the function + * returns REDIS_ERR. + * + * Typically gets called every time a reply is built, before adding more + * data to the clients output buffers. If the function returns REDIS_ERR no + * data should be appended to the output buffers. */ +int prepareClientToWrite(redisClient *c) { if (c->flags & REDIS_LUA_CLIENT) return REDIS_OK; - if (c->fd <= 0) return REDIS_ERR; + if (c->fd <= 0) return REDIS_ERR; /* Fake client */ if (c->bufpos == 0 && listLength(c->reply) == 0 && (c->replstate == REDIS_REPL_NONE || c->replstate == REDIS_REPL_ONLINE) && @@ -125,6 +145,7 @@ void _addReplyObjectToList(redisClient *c, robj *o) { if (listLength(c->reply) == 0) { incrRefCount(o); listAddNodeTail(c->reply,o); + c->reply_bytes += zmalloc_size_sds(o->ptr); } else { tail = listNodeValue(listLast(c->reply)); @@ -132,14 +153,16 @@ void _addReplyObjectToList(redisClient *c, robj *o) { if (tail->ptr != NULL && sdslen(tail->ptr)+sdslen(o->ptr) <= REDIS_REPLY_CHUNK_BYTES) { + c->reply_bytes -= zmalloc_size_sds(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,o->ptr,sdslen(o->ptr)); + c->reply_bytes += zmalloc_size_sds(tail->ptr); } else { incrRefCount(o); listAddNodeTail(c->reply,o); + c->reply_bytes += zmalloc_size_sds(o->ptr); } } - c->reply_bytes += sdslen(o->ptr); asyncCloseClientOnOutputBufferLimitReached(c); } @@ -153,9 +176,9 @@ void _addReplySdsToList(redisClient *c, sds s) { return; } - c->reply_bytes += sdslen(s); if (listLength(c->reply) == 0) { listAddNodeTail(c->reply,createObject(REDIS_STRING,s)); + c->reply_bytes += zmalloc_size_sds(s); } else { tail = listNodeValue(listLast(c->reply)); @@ -163,11 +186,14 @@ void _addReplySdsToList(redisClient *c, sds s) { if (tail->ptr != NULL && sdslen(tail->ptr)+sdslen(s) <= REDIS_REPLY_CHUNK_BYTES) { + c->reply_bytes -= zmalloc_size_sds(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,s,sdslen(s)); + c->reply_bytes += zmalloc_size_sds(tail->ptr); sdsfree(s); } else { listAddNodeTail(c->reply,createObject(REDIS_STRING,s)); + c->reply_bytes += zmalloc_size_sds(s); } } asyncCloseClientOnOutputBufferLimitReached(c); @@ -179,7 +205,10 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) { if (c->flags & REDIS_CLOSE_AFTER_REPLY) return; if (listLength(c->reply) == 0) { - listAddNodeTail(c->reply,createStringObject(s,len)); + robj *o = createStringObject(s,len); + + listAddNodeTail(c->reply,o); + c->reply_bytes += zmalloc_size_sds(o->ptr); } else { tail = listNodeValue(listLast(c->reply)); @@ -187,13 +216,17 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) { if (tail->ptr != NULL && sdslen(tail->ptr)+len <= REDIS_REPLY_CHUNK_BYTES) { + c->reply_bytes -= zmalloc_size_sds(tail->ptr); tail = dupLastObjectIfNeeded(c->reply); tail->ptr = sdscatlen(tail->ptr,s,len); + c->reply_bytes += zmalloc_size_sds(tail->ptr); } else { - listAddNodeTail(c->reply,createStringObject(s,len)); + robj *o = createStringObject(s,len); + + listAddNodeTail(c->reply,o); + c->reply_bytes += zmalloc_size_sds(o->ptr); } } - c->reply_bytes += len; asyncCloseClientOnOutputBufferLimitReached(c); } @@ -203,7 +236,7 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) { * -------------------------------------------------------------------------- */ void addReply(redisClient *c, robj *obj) { - if (_installWriteEvent(c) != REDIS_OK) return; + if (prepareClientToWrite(c) != REDIS_OK) return; /* This is an important place where we can avoid copy-on-write * when there is a saving child running, avoiding touching the @@ -215,19 +248,31 @@ void addReply(redisClient *c, robj *obj) { if (obj->encoding == REDIS_ENCODING_RAW) { if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) _addReplyObjectToList(c,obj); - } else { - /* FIXME: convert the long into string and use _addReplyToBuffer() - * instead of calling getDecodedObject. As this place in the - * code is too performance critical. */ + } else if (obj->encoding == REDIS_ENCODING_INT) { + /* Optimization: if there is room in the static buffer for 32 bytes + * (more than the max chars a 64 bit integer can take as string) we + * avoid decoding the object and go for the lower level approach. */ + if (listLength(c->reply) == 0 && (sizeof(c->buf) - c->bufpos) >= 32) { + char buf[32]; + int len; + + len = ll2string(buf,sizeof(buf),(long)obj->ptr); + if (_addReplyToBuffer(c,buf,len) == REDIS_OK) + return; + /* else... continue with the normal code path, but should never + * happen actually since we verified there is room. */ + } obj = getDecodedObject(obj); if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) _addReplyObjectToList(c,obj); decrRefCount(obj); + } else { + redisPanic("Wrong obj->encoding in addReply()"); } } void addReplySds(redisClient *c, sds s) { - if (_installWriteEvent(c) != REDIS_OK) { + if (prepareClientToWrite(c) != REDIS_OK) { /* The caller expects the sds to be free'd. */ sdsfree(s); return; @@ -241,19 +286,19 @@ void addReplySds(redisClient *c, sds s) { } void addReplyString(redisClient *c, char *s, size_t len) { - if (_installWriteEvent(c) != REDIS_OK) return; + if (prepareClientToWrite(c) != REDIS_OK) return; if (_addReplyToBuffer(c,s,len) != REDIS_OK) _addReplyStringToList(c,s,len); } -void _addReplyError(redisClient *c, char *s, size_t len) { +void addReplyErrorLength(redisClient *c, char *s, size_t len) { addReplyString(c,"-ERR ",5); addReplyString(c,s,len); addReplyString(c,"\r\n",2); } void addReplyError(redisClient *c, char *err) { - _addReplyError(c,err,strlen(err)); + addReplyErrorLength(c,err,strlen(err)); } void addReplyErrorFormat(redisClient *c, const char *fmt, ...) { @@ -268,18 +313,18 @@ void addReplyErrorFormat(redisClient *c, const char *fmt, ...) { for (j = 0; j < l; j++) { if (s[j] == '\r' || s[j] == '\n') s[j] = ' '; } - _addReplyError(c,s,sdslen(s)); + addReplyErrorLength(c,s,sdslen(s)); sdsfree(s); } -void _addReplyStatus(redisClient *c, char *s, size_t len) { +void addReplyStatusLength(redisClient *c, char *s, size_t len) { addReplyString(c,"+",1); addReplyString(c,s,len); addReplyString(c,"\r\n",2); } void addReplyStatus(redisClient *c, char *status) { - _addReplyStatus(c,status,strlen(status)); + addReplyStatusLength(c,status,strlen(status)); } void addReplyStatusFormat(redisClient *c, const char *fmt, ...) { @@ -287,7 +332,7 @@ void addReplyStatusFormat(redisClient *c, const char *fmt, ...) { va_start(ap,fmt); sds s = sdscatvprintf(sdsempty(),fmt,ap); va_end(ap); - _addReplyStatus(c,s,sdslen(s)); + addReplyStatusLength(c,s,sdslen(s)); sdsfree(s); } @@ -297,7 +342,7 @@ void *addDeferredMultiBulkLength(redisClient *c) { /* Note that we install the write event here even if the object is not * ready to be sent, since we are sure that before returning to the * event loop setDeferredMultiBulkLength() will be called. */ - if (_installWriteEvent(c) != REDIS_OK) return NULL; + if (prepareClientToWrite(c) != REDIS_OK) return NULL; listAddNodeTail(c->reply,createObject(REDIS_STRING,NULL)); return listLast(c->reply); } @@ -312,7 +357,7 @@ void setDeferredMultiBulkLength(redisClient *c, void *node, long length) { len = listNodeValue(ln); len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length); - c->reply_bytes += sdslen(len->ptr); + c->reply_bytes += zmalloc_size_sds(len->ptr); if (ln->next != NULL) { next = listNodeValue(ln->next); @@ -336,9 +381,21 @@ void addReplyDouble(redisClient *c, double d) { /* Add a long long as integer reply or bulk len / multi bulk count. * Basically this is used to output . */ -void _addReplyLongLong(redisClient *c, long long ll, char prefix) { +void addReplyLongLongWithPrefix(redisClient *c, long long ll, char prefix) { char buf[128]; int len; + + /* Things like $3\r\n or *2\r\n are emitted very often by the protocol + * so we have a few shared objects to use if the integer is small + * like it is most of the times. */ + if (prefix == '*' && ll < REDIS_SHARED_BULKHDR_LEN) { + addReply(c,shared.mbulkhdr[ll]); + return; + } else if (prefix == '$' && ll < REDIS_SHARED_BULKHDR_LEN) { + addReply(c,shared.bulkhdr[ll]); + return; + } + buf[0] = prefix; len = ll2string(buf+1,sizeof(buf)-1,ll); buf[len+1] = '\r'; @@ -352,11 +409,11 @@ void addReplyLongLong(redisClient *c, long long ll) { else if (ll == 1) addReply(c,shared.cone); else - _addReplyLongLong(c,ll,':'); + addReplyLongLongWithPrefix(c,ll,':'); } void addReplyMultiBulkLen(redisClient *c, long length) { - _addReplyLongLong(c,length,'*'); + addReplyLongLongWithPrefix(c,length,'*'); } /* Create the length prefix of a bulk reply, example: $2234 */ @@ -378,7 +435,7 @@ void addReplyBulkLen(redisClient *c, robj *obj) { len++; } } - _addReplyLongLong(c,len,'$'); + addReplyLongLongWithPrefix(c,len,'$'); } /* Add a Redis Object as a bulk reply */ @@ -390,7 +447,7 @@ void addReplyBulk(redisClient *c, robj *obj) { /* Add a C buffer as bulk reply */ void addReplyBulkCBuffer(redisClient *c, void *p, size_t len) { - _addReplyLongLong(c,len,'$'); + addReplyLongLongWithPrefix(c,len,'$'); addReplyString(c,p,len); addReply(c,shared.crlf); } @@ -602,6 +659,7 @@ void freeClientsInAsyncFreeQueue(void) { void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { redisClient *c = privdata; int nwritten = 0, totwritten = 0, objlen; + size_t objmem; robj *o; REDIS_NOTUSED(el); REDIS_NOTUSED(mask); @@ -627,6 +685,7 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { } else { o = listNodeValue(listFirst(c->reply)); objlen = sdslen(o->ptr); + objmem = zmalloc_size_sds(o->ptr); if (objlen == 0) { listDelNode(c->reply,listFirst(c->reply)); @@ -647,15 +706,20 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { if (c->sentlen == objlen) { listDelNode(c->reply,listFirst(c->reply)); c->sentlen = 0; - c->reply_bytes -= objlen; + c->reply_bytes -= objmem; } } - /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT + /* Note that we avoid to send more than REDIS_MAX_WRITE_PER_EVENT * bytes, in a single threaded server it's a good idea to serve * other clients as well, even if a very large request comes from * super fast link that is always able to accept data (in real world - * scenario think about 'KEYS *' against the loopback interfae) */ - if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break; + * scenario think about 'KEYS *' against the loopback interface). + * + * However if we are over the maxmemory limit we ignore that and + * just deliver as much data as it is possible to deliver. */ + if (totwritten > REDIS_MAX_WRITE_PER_EVENT && + (server.maxmemory == 0 || + zmalloc_used_memory() < server.maxmemory)) break; } if (nwritten == -1) { if (errno == EAGAIN) { @@ -1179,9 +1243,9 @@ void rewriteClientCommandArgument(redisClient *c, int i, robj *newval) { * * Note: this function is very fast so can be called as many time as * the caller wishes. The main usage of this function currently is - * enforcing the client output lenght limits. */ + * enforcing the client output length limits. */ unsigned long getClientOutputBufferMemoryUsage(redisClient *c) { - unsigned long list_item_size = sizeof(listNode); + unsigned long list_item_size = sizeof(listNode)+sizeof(robj); return c->reply_bytes + (list_item_size*listLength(c->reply)); } @@ -1201,6 +1265,22 @@ int getClientLimitClass(redisClient *c) { return REDIS_CLIENT_LIMIT_CLASS_NORMAL; } +int getClientLimitClassByName(char *name) { + if (!strcasecmp(name,"normal")) return REDIS_CLIENT_LIMIT_CLASS_NORMAL; + else if (!strcasecmp(name,"slave")) return REDIS_CLIENT_LIMIT_CLASS_SLAVE; + else if (!strcasecmp(name,"pubsub")) return REDIS_CLIENT_LIMIT_CLASS_PUBSUB; + else return -1; +} + +char *getClientLimitClassName(int class) { + switch(class) { + case REDIS_CLIENT_LIMIT_CLASS_NORMAL: return "normal"; + case REDIS_CLIENT_LIMIT_CLASS_SLAVE: return "slave"; + case REDIS_CLIENT_LIMIT_CLASS_PUBSUB: return "pubsub"; + default: return NULL; + } +} + /* The function checks if the client reached output buffer soft or hard * limit, and also update the state needed to check the soft limit as * a side effect. @@ -1242,21 +1322,40 @@ int checkClientOutputBufferLimits(redisClient *c) { } /* Asynchronously close a client if soft or hard limit is reached on the - * output buffer size. If the client will be closed 1 is returend, otherwise 0 - * is returned. + * output buffer size. The caller can check if the client will be closed + * checking if the client REDIS_CLOSE_ASAP flag is set. * * Note: we need to close the client asynchronously because this function is * called from contexts where the client can't be freed safely, i.e. from the * lower level functions pushing data inside the client output buffers. */ -int asyncCloseClientOnOutputBufferLimitReached(redisClient *c) { +void asyncCloseClientOnOutputBufferLimitReached(redisClient *c) { + if (c->reply_bytes == 0 || c->flags & REDIS_CLOSE_ASAP) return; if (checkClientOutputBufferLimits(c)) { sds client = getClientInfoString(c); freeClientAsync(c); - redisLog(REDIS_NOTICE,"Client %s scheduled to be closed ASAP for overcoming of output buffer limits."); + redisLog(REDIS_WARNING,"Client %s scheduled to be closed ASAP for overcoming of output buffer limits.", client); sdsfree(client); - return 1; - } else { - return 0; + } +} + +/* Helper function used by freeMemoryIfNeeded() in order to flush slaves + * output buffers without returning control to the event loop. */ +void flushSlavesOutputBuffers(void) { + listIter li; + listNode *ln; + + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + redisClient *slave = listNodeValue(ln); + int events; + + events = aeGetFileEvents(server.el,slave->fd); + if (events & AE_WRITABLE && + slave->replstate == REDIS_REPL_ONLINE && + listLength(slave->reply)) + { + sendReplyToClient(server.el,slave->fd,slave,0); + } } }