X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/89f9f83769a2ad0b4aeb8907b82b453ace6ebe9d..a5062bbab0a1619523a9562cec2256de16149c29:/src/networking.c diff --git a/src/networking.c b/src/networking.c index 96ce5a99..6d232ecf 100644 --- a/src/networking.c +++ b/src/networking.c @@ -11,11 +11,7 @@ int listMatchObjects(void *a, void *b) { } redisClient *createClient(int fd) { - redisClient *c; - - /* Allocate more space to hold a static write buffer. */ - c = zmalloc(sizeof(redisClient)+REDIS_REPLY_CHUNK_BYTES); - c->buflen = REDIS_REPLY_CHUNK_BYTES; + redisClient *c = zmalloc(sizeof(redisClient)); c->bufpos = 0; anetNonBlock(NULL,fd); @@ -32,12 +28,11 @@ redisClient *createClient(int fd) { selectDb(c,0); c->fd = fd; c->querybuf = sdsempty(); + c->reqtype = 0; c->argc = 0; c->argv = NULL; + c->multibulklen = 0; c->bulklen = -1; - c->multibulk = 0; - c->mbargc = 0; - c->mbargv = NULL; c->sentlen = 0; c->flags = 0; c->lastinteraction = time(NULL); @@ -46,8 +41,10 @@ redisClient *createClient(int fd) { c->reply = listCreate(); listSetFreeMethod(c->reply,decrRefCount); listSetDupMethod(c->reply,dupClientReplyValue); - c->blocking_keys = NULL; - c->blocking_keys_num = 0; + c->bpop.keys = NULL; + c->bpop.count = 0; + c->bpop.timeout = 0; + c->bpop.target = NULL; c->io_keys = listCreate(); c->watched_keys = listCreate(); listSetFreeMethod(c->io_keys,decrRefCount); @@ -60,7 +57,12 @@ redisClient *createClient(int fd) { return c; } -int _ensureFileEvent(redisClient *c) { +/* Set the event loop to listen for write events on the client's socket. + * Typically gets called every time a reply is built. */ +int _installWriteEvent(redisClient *c) { + /* When CLOSE_AFTER_REPLY is set, no more replies may be added! */ + redisAssert(!(c->flags & REDIS_CLOSE_AFTER_REPLY)); + if (c->fd <= 0) return REDIS_ERR; if (c->bufpos == 0 && listLength(c->reply) == 0 && (c->replstate == REDIS_REPL_NONE || @@ -87,7 +89,7 @@ robj *dupLastObjectIfNeeded(list *reply) { } int _addReplyToBuffer(redisClient *c, char *s, size_t len) { - size_t available = c->buflen-c->bufpos; + size_t available = sizeof(c->buf)-c->bufpos; /* If there already are entries in the reply list, we cannot * add anything more to the static buffer. */ @@ -164,21 +166,31 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) { } void addReply(redisClient *c, robj *obj) { - if (_ensureFileEvent(c) != REDIS_OK) return; - if (server.vm_enabled && obj->storage != REDIS_VM_MEMORY) { - /* Returns a new object with refcount 1 */ - obj = dupStringObject(obj); + if (_installWriteEvent(c) != REDIS_OK) return; + + /* This is an important place where we can avoid copy-on-write + * when there is a saving child running, avoiding touching the + * refcount field of the object if it's not needed. + * + * If the encoding is RAW and there is room in the static buffer + * we'll be able to send the object to the client without + * messing with its page. */ + if (obj->encoding == REDIS_ENCODING_RAW) { + if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) + _addReplyObjectToList(c,obj); } else { - /* This increments the refcount. */ + /* FIXME: convert the long into string and use _addReplyToBuffer() + * instead of calling getDecodedObject. As this place in the + * code is too performance critical. */ obj = getDecodedObject(obj); + if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) + _addReplyObjectToList(c,obj); + decrRefCount(obj); } - if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK) - _addReplyObjectToList(c,obj); - decrRefCount(obj); } void addReplySds(redisClient *c, sds s) { - if (_ensureFileEvent(c) != REDIS_OK) { + if (_installWriteEvent(c) != REDIS_OK) { /* The caller expects the sds to be free'd. */ sdsfree(s); return; @@ -192,7 +204,7 @@ void addReplySds(redisClient *c, sds s) { } void addReplyString(redisClient *c, char *s, size_t len) { - if (_ensureFileEvent(c) != REDIS_OK) return; + if (_installWriteEvent(c) != REDIS_OK) return; if (_addReplyToBuffer(c,s,len) != REDIS_OK) _addReplyStringToList(c,s,len); } @@ -238,7 +250,10 @@ void addReplyStatusFormat(redisClient *c, const char *fmt, ...) { /* Adds an empty object to the reply list that will contain the multi bulk * length, which is not known when this function is called. */ void *addDeferredMultiBulkLength(redisClient *c) { - if (_ensureFileEvent(c) != REDIS_OK) return NULL; + /* Note that we install the write event here even if the object is not + * ready to be sent, since we are sure that before returning to the + * event loop setDeferredMultiBulkLength() will be called. */ + if (_installWriteEvent(c) != REDIS_OK) return NULL; listAddNodeTail(c->reply,createObject(REDIS_STRING,NULL)); return listLast(c->reply); } @@ -264,6 +279,7 @@ void setDeferredMultiBulkLength(redisClient *c, void *node, long length) { } } +/* Add a duble as a bulk reply */ void addReplyDouble(redisClient *c, double d) { char dbuf[128], sbuf[128]; int dlen, slen; @@ -272,6 +288,8 @@ void addReplyDouble(redisClient *c, double d) { addReplyString(c,sbuf,slen); } +/* Add a long long as integer reply or bulk len / multi bulk count. + * Basically this is used to output . */ void _addReplyLongLong(redisClient *c, long long ll, char prefix) { char buf[128]; int len; @@ -290,6 +308,7 @@ void addReplyMultiBulkLen(redisClient *c, long length) { _addReplyLongLong(c,length,'*'); } +/* Create the length prefix of a bulk reply, example: $2234 */ void addReplyBulkLen(redisClient *c, robj *obj) { size_t len; @@ -311,40 +330,43 @@ void addReplyBulkLen(redisClient *c, robj *obj) { _addReplyLongLong(c,len,'$'); } +/* Add a Redis Object as a bulk reply */ void addReplyBulk(redisClient *c, robj *obj) { addReplyBulkLen(c,obj); addReply(c,obj); addReply(c,shared.crlf); } -/* In the CONFIG command we need to add vanilla C string as bulk replies */ +/* Add a C buffer as bulk reply */ +void addReplyBulkCBuffer(redisClient *c, void *p, size_t len) { + _addReplyLongLong(c,len,'$'); + addReplyString(c,p,len); + addReply(c,shared.crlf); +} + +/* Add a C nul term string as bulk reply */ void addReplyBulkCString(redisClient *c, char *s) { if (s == NULL) { addReply(c,shared.nullbulk); } else { - robj *o = createStringObject(s,strlen(s)); - addReplyBulk(c,o); - decrRefCount(o); + addReplyBulkCBuffer(c,s,strlen(s)); } } -void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) { - int cport, cfd; - char cip[128]; - redisClient *c; - REDIS_NOTUSED(el); - REDIS_NOTUSED(mask); - REDIS_NOTUSED(privdata); +/* Add a long long as a bulk reply */ +void addReplyBulkLongLong(redisClient *c, long long ll) { + char buf[64]; + int len; - cfd = anetAccept(server.neterr, fd, cip, &cport); - if (cfd == AE_ERR) { - redisLog(REDIS_VERBOSE,"Accepting client connection: %s", server.neterr); - return; - } - redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport); - if ((c = createClient(cfd)) == NULL) { + len = ll2string(buf,64,ll); + addReplyBulkCBuffer(c,buf,len); +} + +static void acceptCommonHandler(int fd) { + redisClient *c; + if ((c = createClient(fd)) == NULL) { redisLog(REDIS_WARNING,"Error allocating resoures for the client"); - close(cfd); /* May be already closed, just ingore errors */ + close(fd); /* May be already closed, just ingore errors */ return; } /* If maxclient directive is set and this is one client more... close the @@ -364,15 +386,43 @@ void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) { server.stat_numconnections++; } +void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) { + int cport, cfd; + char cip[128]; + REDIS_NOTUSED(el); + REDIS_NOTUSED(mask); + REDIS_NOTUSED(privdata); + + cfd = anetTcpAccept(server.neterr, fd, cip, &cport); + if (cfd == AE_ERR) { + redisLog(REDIS_VERBOSE,"Accepting client connection: %s", server.neterr); + return; + } + redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport); + acceptCommonHandler(cfd); +} + +void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask) { + int cfd; + REDIS_NOTUSED(el); + REDIS_NOTUSED(mask); + REDIS_NOTUSED(privdata); + + cfd = anetUnixAccept(server.neterr, fd); + if (cfd == AE_ERR) { + redisLog(REDIS_VERBOSE,"Accepting client connection: %s", server.neterr); + return; + } + redisLog(REDIS_VERBOSE,"Accepted connection to %s", server.unixsocket); + acceptCommonHandler(cfd); +} + + static void freeClientArgv(redisClient *c) { int j; - for (j = 0; j < c->argc; j++) decrRefCount(c->argv[j]); - for (j = 0; j < c->mbargc; j++) - decrRefCount(c->mbargv[j]); c->argc = 0; - c->mbargc = 0; } void freeClient(redisClient *c) { @@ -409,7 +459,7 @@ void freeClient(redisClient *c) { /* Remove from the list of clients waiting for swapped keys, or ready * to be restarted, but not yet woken up again. */ if (c->flags & REDIS_IO_WAIT) { - redisAssert(server.vm_enabled); + redisAssert(server.ds_enabled); if (listLength(c->io_keys) == 0) { ln = listSearchKey(server.io_ready_clients,c); @@ -423,7 +473,7 @@ void freeClient(redisClient *c) { dontWaitForSwappedKey(c,ln->value); } } - server.vm_blocked_clients--; + server.cache_blocked_clients--; } listRelease(c->io_keys); /* Master/slave cleanup. @@ -440,6 +490,7 @@ void freeClient(redisClient *c) { /* Case 2: we lost the connection with the master. */ if (c->flags & REDIS_MASTER) { server.master = NULL; + /* FIXME */ server.replstate = REDIS_REPL_CONNECT; /* Since we lost the connection with the master, we should also * close the connection with all our slaves if we have any, so @@ -453,7 +504,6 @@ void freeClient(redisClient *c) { } /* Release memory */ zfree(c->argv); - zfree(c->mbargv); freeClientMultiState(c); zfree(c); } @@ -465,15 +515,6 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { REDIS_NOTUSED(el); REDIS_NOTUSED(mask); - /* Use writev() if we have enough buffers to send */ - if (!server.glueoutputbuf && - listLength(c->reply) > REDIS_WRITEV_THRESHOLD && - !(c->flags & REDIS_MASTER)) - { - sendReplyToClientWritev(el, fd, privdata, mask); - return; - } - while(c->bufpos > 0 || listLength(c->reply)) { if (c->bufpos > 0) { if (c->flags & REDIS_MASTER) { @@ -538,92 +579,18 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) { if (listLength(c->reply) == 0) { c->sentlen = 0; aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE); - } -} - -void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask) -{ - redisClient *c = privdata; - int nwritten = 0, totwritten = 0, objlen, willwrite; - robj *o; - struct iovec iov[REDIS_WRITEV_IOVEC_COUNT]; - int offset, ion = 0; - REDIS_NOTUSED(el); - REDIS_NOTUSED(mask); - - listNode *node; - while (listLength(c->reply)) { - offset = c->sentlen; - ion = 0; - willwrite = 0; - /* fill-in the iov[] array */ - for(node = listFirst(c->reply); node; node = listNextNode(node)) { - o = listNodeValue(node); - objlen = sdslen(o->ptr); - - if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT) - break; - - if(ion == REDIS_WRITEV_IOVEC_COUNT) - break; /* no more iovecs */ - - iov[ion].iov_base = ((char*)o->ptr) + offset; - iov[ion].iov_len = objlen - offset; - willwrite += objlen - offset; - offset = 0; /* just for the first item */ - ion++; - } - - if(willwrite == 0) - break; - - /* write all collected blocks at once */ - if((nwritten = writev(fd, iov, ion)) < 0) { - if (errno != EAGAIN) { - redisLog(REDIS_VERBOSE, - "Error writing to client: %s", strerror(errno)); - freeClient(c); - return; - } - break; - } - - totwritten += nwritten; - offset = c->sentlen; - - /* remove written robjs from c->reply */ - while (nwritten && listLength(c->reply)) { - o = listNodeValue(listFirst(c->reply)); - objlen = sdslen(o->ptr); - - if(nwritten >= objlen - offset) { - listDelNode(c->reply, listFirst(c->reply)); - nwritten -= objlen - offset; - c->sentlen = 0; - } else { - /* partial write */ - c->sentlen += nwritten; - break; - } - offset = 0; - } - } - - if (totwritten > 0) - c->lastinteraction = time(NULL); - - if (listLength(c->reply) == 0) { - c->sentlen = 0; - aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE); + /* Close connection after entire reply has been sent. */ + if (c->flags & REDIS_CLOSE_AFTER_REPLY) freeClient(c); } } /* resetClient prepare the client to process the next command */ void resetClient(redisClient *c) { freeClientArgv(c); + c->reqtype = 0; + c->multibulklen = 0; c->bulklen = -1; - c->multibulk = 0; } void closeTimedoutClients(void) { @@ -646,7 +613,7 @@ void closeTimedoutClients(void) { redisLog(REDIS_VERBOSE,"Closing idle client"); freeClient(c); } else if (c->flags & REDIS_BLOCKED) { - if (c->blockingto != 0 && c->blockingto < now) { + if (c->bpop.timeout != 0 && c->bpop.timeout < now) { addReply(c,shared.nullmultibulk); unblockClientWaitingData(c); } @@ -654,84 +621,172 @@ void closeTimedoutClients(void) { } } -void processInputBuffer(redisClient *c) { -again: - /* Before to process the input buffer, make sure the client is not - * waitig for a blocking operation such as BLPOP. Note that the first - * iteration the client is never blocked, otherwise the processInputBuffer - * would not be called at all, but after the execution of the first commands - * in the input buffer the client may be blocked, and the "goto again" - * will try to reiterate. The following line will make it return asap. */ - if (c->flags & REDIS_BLOCKED || c->flags & REDIS_IO_WAIT) return; - if (c->bulklen == -1) { - /* Read the first line of the query */ - char *p = strchr(c->querybuf,'\n'); - size_t querylen; - - if (p) { - sds query, *argv; - int argc, j; - - query = c->querybuf; - c->querybuf = sdsempty(); - querylen = 1+(p-(query)); - if (sdslen(query) > querylen) { - /* leave data after the first line of the query in the buffer */ - c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen); - } - *p = '\0'; /* remove "\n" */ - if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */ - sdsupdatelen(query); - - /* Now we can split the query in arguments */ - argv = sdssplitlen(query,sdslen(query)," ",1,&argc); - sdsfree(query); - - if (c->argv) zfree(c->argv); - c->argv = zmalloc(sizeof(robj*)*argc); - - for (j = 0; j < argc; j++) { - if (sdslen(argv[j])) { - c->argv[c->argc] = createObject(REDIS_STRING,argv[j]); - c->argc++; - } else { - sdsfree(argv[j]); +int processInlineBuffer(redisClient *c) { + char *newline = strstr(c->querybuf,"\r\n"); + int argc, j; + sds *argv; + size_t querylen; + + /* Nothing to do without a \r\n */ + if (newline == NULL) + return REDIS_ERR; + + /* Split the input buffer up to the \r\n */ + querylen = newline-(c->querybuf); + argv = sdssplitlen(c->querybuf,querylen," ",1,&argc); + + /* Leave data after the first line of the query in the buffer */ + c->querybuf = sdsrange(c->querybuf,querylen+2,-1); + + /* Setup argv array on client structure */ + if (c->argv) zfree(c->argv); + c->argv = zmalloc(sizeof(robj*)*argc); + + /* Create redis objects for all arguments. */ + for (c->argc = 0, j = 0; j < argc; j++) { + if (sdslen(argv[j])) { + c->argv[c->argc] = createObject(REDIS_STRING,argv[j]); + c->argc++; + } else { + sdsfree(argv[j]); + } + } + zfree(argv); + return REDIS_OK; +} + +/* Helper function. Trims query buffer to make the function that processes + * multi bulk requests idempotent. */ +static void setProtocolError(redisClient *c, int pos) { + c->flags |= REDIS_CLOSE_AFTER_REPLY; + c->querybuf = sdsrange(c->querybuf,pos,-1); +} + +int processMultibulkBuffer(redisClient *c) { + char *newline = NULL; + char *eptr; + int pos = 0, tolerr; + long bulklen; + + if (c->multibulklen == 0) { + /* The client should have been reset */ + redisAssert(c->argc == 0); + + /* Multi bulk length cannot be read without a \r\n */ + newline = strstr(c->querybuf,"\r\n"); + if (newline == NULL) + return REDIS_ERR; + + /* We know for sure there is a whole line since newline != NULL, + * so go ahead and find out the multi bulk length. */ + redisAssert(c->querybuf[0] == '*'); + c->multibulklen = strtol(c->querybuf+1,&eptr,10); + pos = (newline-c->querybuf)+2; + if (c->multibulklen <= 0) { + c->querybuf = sdsrange(c->querybuf,pos,-1); + return REDIS_OK; + } else if (c->multibulklen > 1024*1024) { + addReplyError(c,"Protocol error: invalid multibulk length"); + setProtocolError(c,pos); + return REDIS_ERR; + } + + /* Setup argv array on client structure */ + if (c->argv) zfree(c->argv); + c->argv = zmalloc(sizeof(robj*)*c->multibulklen); + + /* Search new newline */ + newline = strstr(c->querybuf+pos,"\r\n"); + } + + redisAssert(c->multibulklen > 0); + while(c->multibulklen) { + /* Read bulk length if unknown */ + if (c->bulklen == -1) { + newline = strstr(c->querybuf+pos,"\r\n"); + if (newline != NULL) { + if (c->querybuf[pos] != '$') { + addReplyErrorFormat(c, + "Protocol error: expected '$', got '%c'", + c->querybuf[pos]); + setProtocolError(c,pos); + return REDIS_ERR; + } + + bulklen = strtol(c->querybuf+pos+1,&eptr,10); + tolerr = (eptr[0] != '\r'); + if (tolerr || bulklen == LONG_MIN || bulklen == LONG_MAX || + bulklen < 0 || bulklen > 512*1024*1024) + { + addReplyError(c,"Protocol error: invalid bulk length"); + setProtocolError(c,pos); + return REDIS_ERR; } + pos += eptr-(c->querybuf+pos)+2; + c->bulklen = bulklen; + } else { + /* No newline in current buffer, so wait for more data */ + break; } - zfree(argv); - if (c->argc) { - /* Execute the command. If the client is still valid - * after processCommand() return and there is something - * on the query buffer try to process the next command. */ - if (processCommand(c) && sdslen(c->querybuf)) goto again; + } + + /* Read bulk argument */ + if (sdslen(c->querybuf)-pos < (unsigned)(c->bulklen+2)) { + /* Not enough data (+2 == trailing \r\n) */ + break; + } else { + c->argv[c->argc++] = createStringObject(c->querybuf+pos,c->bulklen); + pos += c->bulklen+2; + c->bulklen = -1; + c->multibulklen--; + } + } + + /* Trim to pos */ + c->querybuf = sdsrange(c->querybuf,pos,-1); + + /* We're done when c->multibulk == 0 */ + if (c->multibulklen == 0) { + return REDIS_OK; + } + return REDIS_ERR; +} + +void processInputBuffer(redisClient *c) { + /* Keep processing while there is something in the input buffer */ + while(sdslen(c->querybuf)) { + /* Immediately abort if the client is in the middle of something. */ + if (c->flags & REDIS_BLOCKED || c->flags & REDIS_IO_WAIT) return; + + /* REDIS_CLOSE_AFTER_REPLY closes the connection once the reply is + * written to the client. Make sure to not let the reply grow after + * this flag has been set (i.e. don't process more commands). */ + if (c->flags & REDIS_CLOSE_AFTER_REPLY) return; + + /* Determine request type when unknown. */ + if (!c->reqtype) { + if (c->querybuf[0] == '*') { + c->reqtype = REDIS_REQ_MULTIBULK; } else { - /* Nothing to process, argc == 0. Just process the query - * buffer if it's not empty or return to the caller */ - if (sdslen(c->querybuf)) goto again; + c->reqtype = REDIS_REQ_INLINE; } - return; - } else if (sdslen(c->querybuf) >= REDIS_REQUEST_MAX_SIZE) { - redisLog(REDIS_VERBOSE, "Client protocol error"); - freeClient(c); - return; } - } else { - /* Bulk read handling. Note that if we are at this point - the client already sent a command terminated with a newline, - we are reading the bulk data that is actually the last - argument of the command. */ - int qbl = sdslen(c->querybuf); - - if (c->bulklen <= qbl) { - /* Copy everything but the final CRLF as final argument */ - c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2); - c->argc++; - c->querybuf = sdsrange(c->querybuf,c->bulklen,-1); - /* Process the command. If the client is still valid after - * the processing and there is more data in the buffer - * try to parse it. */ - if (processCommand(c) && sdslen(c->querybuf)) goto again; - return; + + if (c->reqtype == REDIS_REQ_INLINE) { + if (processInlineBuffer(c) != REDIS_OK) break; + } else if (c->reqtype == REDIS_REQ_MULTIBULK) { + if (processMultibulkBuffer(c) != REDIS_OK) break; + } else { + redisPanic("Unknown request type"); + } + + /* Multibulk processing could see a <= 0 length. */ + if (c->argc == 0) { + resetClient(c); + } else { + /* Only reset the client when the command was executed. */ + if (processCommand(c) == REDIS_OK) + resetClient(c); } } } @@ -758,7 +813,7 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { return; } if (nread) { - c->querybuf = sdscatlen(c->querybuf, buf, nread); + c->querybuf = sdscatlen(c->querybuf,buf,nread); c->lastinteraction = time(NULL); } else { return;