]> git.saurik.com Git - redis.git/blame - src/networking.c
serious performance enhancement of diskstore
[redis.git] / src / networking.c
CommitLineData
e2641e09 1#include "redis.h"
e2641e09 2#include <sys/uio.h>
3
4void *dupClientReplyValue(void *o) {
5 incrRefCount((robj*)o);
6 return o;
7}
8
9int listMatchObjects(void *a, void *b) {
10 return equalStringObjects(a,b);
11}
12
13redisClient *createClient(int fd) {
f3357792 14 redisClient *c = zmalloc(sizeof(redisClient));
834ef78e 15 c->bufpos = 0;
e2641e09 16
17 anetNonBlock(NULL,fd);
18 anetTcpNoDelay(NULL,fd);
19 if (!c) return NULL;
106bd87a
PN
20 if (aeCreateFileEvent(server.el,fd,AE_READABLE,
21 readQueryFromClient, c) == AE_ERR)
22 {
23 close(fd);
24 zfree(c);
25 return NULL;
26 }
27
e2641e09 28 selectDb(c,0);
29 c->fd = fd;
30 c->querybuf = sdsempty();
cd8788f2 31 c->reqtype = 0;
e2641e09 32 c->argc = 0;
33 c->argv = NULL;
cd8788f2 34 c->multibulklen = 0;
e2641e09 35 c->bulklen = -1;
e2641e09 36 c->sentlen = 0;
37 c->flags = 0;
38 c->lastinteraction = time(NULL);
39 c->authenticated = 0;
40 c->replstate = REDIS_REPL_NONE;
41 c->reply = listCreate();
42 listSetFreeMethod(c->reply,decrRefCount);
43 listSetDupMethod(c->reply,dupClientReplyValue);
e3c51c4b
DJMM
44 c->bpop.keys = NULL;
45 c->bpop.count = 0;
46 c->bpop.timeout = 0;
47 c->bpop.target = NULL;
e2641e09 48 c->io_keys = listCreate();
49 c->watched_keys = listCreate();
50 listSetFreeMethod(c->io_keys,decrRefCount);
51 c->pubsub_channels = dictCreate(&setDictType,NULL);
52 c->pubsub_patterns = listCreate();
53 listSetFreeMethod(c->pubsub_patterns,decrRefCount);
54 listSetMatchMethod(c->pubsub_patterns,listMatchObjects);
e2641e09 55 listAddNodeTail(server.clients,c);
56 initClientMultiState(c);
57 return c;
58}
59
a3a323e0
PN
60/* Set the event loop to listen for write events on the client's socket.
61 * Typically gets called every time a reply is built. */
4c2e506a 62int _installWriteEvent(redisClient *c) {
a3a323e0
PN
63 /* When CLOSE_AFTER_REPLY is set, no more replies may be added! */
64 redisAssert(!(c->flags & REDIS_CLOSE_AFTER_REPLY));
65
57b07380 66 if (c->fd <= 0) return REDIS_ERR;
834ef78e 67 if (c->bufpos == 0 && listLength(c->reply) == 0 &&
e2641e09 68 (c->replstate == REDIS_REPL_NONE ||
69 c->replstate == REDIS_REPL_ONLINE) &&
70 aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
834ef78e
PN
71 sendReplyToClient, c) == AE_ERR) return REDIS_ERR;
72 return REDIS_OK;
73}
74
36c19d03
PN
75/* Create a duplicate of the last object in the reply list when
76 * it is not exclusively owned by the reply list. */
77robj *dupLastObjectIfNeeded(list *reply) {
78 robj *new, *cur;
79 listNode *ln;
80 redisAssert(listLength(reply) > 0);
81 ln = listLast(reply);
82 cur = listNodeValue(ln);
83 if (cur->refcount > 1) {
84 new = dupStringObject(cur);
85 decrRefCount(cur);
86 listNodeValue(ln) = new;
87 }
88 return listNodeValue(ln);
834ef78e
PN
89}
90
36c19d03 91int _addReplyToBuffer(redisClient *c, char *s, size_t len) {
f3357792 92 size_t available = sizeof(c->buf)-c->bufpos;
36c19d03
PN
93
94 /* If there already are entries in the reply list, we cannot
95 * add anything more to the static buffer. */
96 if (listLength(c->reply) > 0) return REDIS_ERR;
97
98 /* Check that the buffer has enough space available for this string. */
99 if (len > available) return REDIS_ERR;
e2641e09 100
36c19d03
PN
101 memcpy(c->buf+c->bufpos,s,len);
102 c->bufpos+=len;
103 return REDIS_OK;
834ef78e
PN
104}
105
36c19d03
PN
106void _addReplyObjectToList(redisClient *c, robj *o) {
107 robj *tail;
108 if (listLength(c->reply) == 0) {
109 incrRefCount(o);
110 listAddNodeTail(c->reply,o);
111 } else {
112 tail = listNodeValue(listLast(c->reply));
113
114 /* Append to this object when possible. */
115 if (tail->ptr != NULL &&
116 sdslen(tail->ptr)+sdslen(o->ptr) <= REDIS_REPLY_CHUNK_BYTES)
117 {
118 tail = dupLastObjectIfNeeded(c->reply);
119 tail->ptr = sdscatlen(tail->ptr,o->ptr,sdslen(o->ptr));
120 } else {
121 incrRefCount(o);
122 listAddNodeTail(c->reply,o);
123 }
124 }
125}
834ef78e 126
36c19d03
PN
127/* This method takes responsibility over the sds. When it is no longer
128 * needed it will be free'd, otherwise it ends up in a robj. */
129void _addReplySdsToList(redisClient *c, sds s) {
130 robj *tail;
131 if (listLength(c->reply) == 0) {
132 listAddNodeTail(c->reply,createObject(REDIS_STRING,s));
133 } else {
134 tail = listNodeValue(listLast(c->reply));
135
136 /* Append to this object when possible. */
137 if (tail->ptr != NULL &&
138 sdslen(tail->ptr)+sdslen(s) <= REDIS_REPLY_CHUNK_BYTES)
139 {
140 tail = dupLastObjectIfNeeded(c->reply);
141 tail->ptr = sdscatlen(tail->ptr,s,sdslen(s));
142 sdsfree(s);
834ef78e 143 } else {
36c19d03 144 listAddNodeTail(c->reply,createObject(REDIS_STRING,s));
834ef78e 145 }
36c19d03
PN
146 }
147}
148
149void _addReplyStringToList(redisClient *c, char *s, size_t len) {
150 robj *tail;
151 if (listLength(c->reply) == 0) {
152 listAddNodeTail(c->reply,createStringObject(s,len));
834ef78e 153 } else {
36c19d03
PN
154 tail = listNodeValue(listLast(c->reply));
155
156 /* Append to this object when possible. */
157 if (tail->ptr != NULL &&
158 sdslen(tail->ptr)+len <= REDIS_REPLY_CHUNK_BYTES)
159 {
160 tail = dupLastObjectIfNeeded(c->reply);
161 tail->ptr = sdscatlen(tail->ptr,s,len);
834ef78e 162 } else {
36c19d03 163 listAddNodeTail(c->reply,createStringObject(s,len));
834ef78e
PN
164 }
165 }
166}
e2641e09 167
834ef78e 168void addReply(redisClient *c, robj *obj) {
4c2e506a 169 if (_installWriteEvent(c) != REDIS_OK) return;
4c2e506a 170
171 /* This is an important place where we can avoid copy-on-write
172 * when there is a saving child running, avoiding touching the
173 * refcount field of the object if it's not needed.
174 *
175 * If the encoding is RAW and there is room in the static buffer
176 * we'll be able to send the object to the client without
177 * messing with its page. */
178 if (obj->encoding == REDIS_ENCODING_RAW) {
179 if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
180 _addReplyObjectToList(c,obj);
834ef78e 181 } else {
d51ebef5 182 /* FIXME: convert the long into string and use _addReplyToBuffer()
183 * instead of calling getDecodedObject. As this place in the
184 * code is too performance critical. */
834ef78e 185 obj = getDecodedObject(obj);
4c2e506a 186 if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
187 _addReplyObjectToList(c,obj);
188 decrRefCount(obj);
e2641e09 189 }
e2641e09 190}
191
192void addReplySds(redisClient *c, sds s) {
4c2e506a 193 if (_installWriteEvent(c) != REDIS_OK) {
cd76bb65
PN
194 /* The caller expects the sds to be free'd. */
195 sdsfree(s);
196 return;
197 }
36c19d03 198 if (_addReplyToBuffer(c,s,sdslen(s)) == REDIS_OK) {
834ef78e
PN
199 sdsfree(s);
200 } else {
36c19d03
PN
201 /* This method free's the sds when it is no longer needed. */
202 _addReplySdsToList(c,s);
834ef78e 203 }
e2641e09 204}
205
834ef78e 206void addReplyString(redisClient *c, char *s, size_t len) {
4c2e506a 207 if (_installWriteEvent(c) != REDIS_OK) return;
36c19d03
PN
208 if (_addReplyToBuffer(c,s,len) != REDIS_OK)
209 _addReplyStringToList(c,s,len);
834ef78e 210}
e2641e09 211
3ab20376
PN
212void _addReplyError(redisClient *c, char *s, size_t len) {
213 addReplyString(c,"-ERR ",5);
214 addReplyString(c,s,len);
215 addReplyString(c,"\r\n",2);
e2641e09 216}
217
3ab20376
PN
218void addReplyError(redisClient *c, char *err) {
219 _addReplyError(c,err,strlen(err));
220}
e2641e09 221
3ab20376
PN
222void addReplyErrorFormat(redisClient *c, const char *fmt, ...) {
223 va_list ap;
224 va_start(ap,fmt);
225 sds s = sdscatvprintf(sdsempty(),fmt,ap);
226 va_end(ap);
227 _addReplyError(c,s,sdslen(s));
228 sdsfree(s);
229}
230
231void _addReplyStatus(redisClient *c, char *s, size_t len) {
232 addReplyString(c,"+",1);
233 addReplyString(c,s,len);
234 addReplyString(c,"\r\n",2);
235}
236
237void addReplyStatus(redisClient *c, char *status) {
238 _addReplyStatus(c,status,strlen(status));
239}
240
241void addReplyStatusFormat(redisClient *c, const char *fmt, ...) {
242 va_list ap;
243 va_start(ap,fmt);
244 sds s = sdscatvprintf(sdsempty(),fmt,ap);
245 va_end(ap);
246 _addReplyStatus(c,s,sdslen(s));
247 sdsfree(s);
248}
249
b301c1fc
PN
250/* Adds an empty object to the reply list that will contain the multi bulk
251 * length, which is not known when this function is called. */
252void *addDeferredMultiBulkLength(redisClient *c) {
4c2e506a 253 /* Note that we install the write event here even if the object is not
254 * ready to be sent, since we are sure that before returning to the
255 * event loop setDeferredMultiBulkLength() will be called. */
256 if (_installWriteEvent(c) != REDIS_OK) return NULL;
36c19d03 257 listAddNodeTail(c->reply,createObject(REDIS_STRING,NULL));
b301c1fc
PN
258 return listLast(c->reply);
259}
260
261/* Populate the length object and try glueing it to the next chunk. */
262void setDeferredMultiBulkLength(redisClient *c, void *node, long length) {
263 listNode *ln = (listNode*)node;
264 robj *len, *next;
265
266 /* Abort when *node is NULL (see addDeferredMultiBulkLength). */
267 if (node == NULL) return;
268
269 len = listNodeValue(ln);
270 len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length);
271 if (ln->next != NULL) {
272 next = listNodeValue(ln->next);
36c19d03 273
49128f0b 274 /* Only glue when the next node is non-NULL (an sds in this case) */
36c19d03 275 if (next->ptr != NULL) {
49128f0b 276 len->ptr = sdscatlen(len->ptr,next->ptr,sdslen(next->ptr));
b301c1fc
PN
277 listDelNode(c->reply,ln->next);
278 }
e2641e09 279 }
b301c1fc
PN
280}
281
d51ebef5 282/* Add a duble as a bulk reply */
834ef78e
PN
283void addReplyDouble(redisClient *c, double d) {
284 char dbuf[128], sbuf[128];
285 int dlen, slen;
286 dlen = snprintf(dbuf,sizeof(dbuf),"%.17g",d);
287 slen = snprintf(sbuf,sizeof(sbuf),"$%d\r\n%s\r\n",dlen,dbuf);
288 addReplyString(c,sbuf,slen);
e2641e09 289}
290
d51ebef5 291/* Add a long long as integer reply or bulk len / multi bulk count.
292 * Basically this is used to output <prefix><long long><crlf>. */
834ef78e 293void _addReplyLongLong(redisClient *c, long long ll, char prefix) {
e2641e09 294 char buf[128];
834ef78e
PN
295 int len;
296 buf[0] = prefix;
e2641e09 297 len = ll2string(buf+1,sizeof(buf)-1,ll);
298 buf[len+1] = '\r';
299 buf[len+2] = '\n';
834ef78e 300 addReplyString(c,buf,len+3);
e2641e09 301}
302
834ef78e
PN
303void addReplyLongLong(redisClient *c, long long ll) {
304 _addReplyLongLong(c,ll,':');
305}
e2641e09 306
0537e7bf
PN
307void addReplyMultiBulkLen(redisClient *c, long length) {
308 _addReplyLongLong(c,length,'*');
e2641e09 309}
310
d51ebef5 311/* Create the length prefix of a bulk reply, example: $2234 */
e2641e09 312void addReplyBulkLen(redisClient *c, robj *obj) {
834ef78e 313 size_t len;
e2641e09 314
315 if (obj->encoding == REDIS_ENCODING_RAW) {
316 len = sdslen(obj->ptr);
317 } else {
318 long n = (long)obj->ptr;
319
320 /* Compute how many bytes will take this integer as a radix 10 string */
321 len = 1;
322 if (n < 0) {
323 len++;
324 n = -n;
325 }
326 while((n = n/10) != 0) {
327 len++;
328 }
329 }
834ef78e 330 _addReplyLongLong(c,len,'$');
e2641e09 331}
332
d51ebef5 333/* Add a Redis Object as a bulk reply */
e2641e09 334void addReplyBulk(redisClient *c, robj *obj) {
335 addReplyBulkLen(c,obj);
336 addReply(c,obj);
337 addReply(c,shared.crlf);
338}
339
d51ebef5 340/* Add a C buffer as bulk reply */
341void addReplyBulkCBuffer(redisClient *c, void *p, size_t len) {
342 _addReplyLongLong(c,len,'$');
343 addReplyString(c,p,len);
344 addReply(c,shared.crlf);
345}
346
347/* Add a C nul term string as bulk reply */
e2641e09 348void addReplyBulkCString(redisClient *c, char *s) {
349 if (s == NULL) {
350 addReply(c,shared.nullbulk);
351 } else {
d51ebef5 352 addReplyBulkCBuffer(c,s,strlen(s));
e2641e09 353 }
354}
355
d51ebef5 356/* Add a long long as a bulk reply */
357void addReplyBulkLongLong(redisClient *c, long long ll) {
358 char buf[64];
359 int len;
360
361 len = ll2string(buf,64,ll);
362 addReplyBulkCBuffer(c,buf,len);
363}
364
ab17b909 365static void acceptCommonHandler(int fd) {
e2641e09 366 redisClient *c;
ab17b909 367 if ((c = createClient(fd)) == NULL) {
e2641e09 368 redisLog(REDIS_WARNING,"Error allocating resoures for the client");
ab17b909 369 close(fd); /* May be already closed, just ingore errors */
e2641e09 370 return;
371 }
372 /* If maxclient directive is set and this is one client more... close the
373 * connection. Note that we create the client instead to check before
374 * for this condition, since now the socket is already set in nonblocking
375 * mode and we can send an error for free using the Kernel I/O */
376 if (server.maxclients && listLength(server.clients) > server.maxclients) {
377 char *err = "-ERR max number of clients reached\r\n";
378
379 /* That's a best effort error message, don't check write errors */
380 if (write(c->fd,err,strlen(err)) == -1) {
381 /* Nothing to do, Just to avoid the warning... */
382 }
383 freeClient(c);
384 return;
385 }
386 server.stat_numconnections++;
387}
388
ab17b909
PN
389void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
390 int cport, cfd;
391 char cip[128];
392 REDIS_NOTUSED(el);
393 REDIS_NOTUSED(mask);
394 REDIS_NOTUSED(privdata);
395
396 cfd = anetTcpAccept(server.neterr, fd, cip, &cport);
397 if (cfd == AE_ERR) {
398 redisLog(REDIS_VERBOSE,"Accepting client connection: %s", server.neterr);
399 return;
400 }
401 redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport);
402 acceptCommonHandler(cfd);
403}
404
405void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
406 int cfd;
ab17b909
PN
407 REDIS_NOTUSED(el);
408 REDIS_NOTUSED(mask);
409 REDIS_NOTUSED(privdata);
410
4fe83b55 411 cfd = anetUnixAccept(server.neterr, fd);
ab17b909
PN
412 if (cfd == AE_ERR) {
413 redisLog(REDIS_VERBOSE,"Accepting client connection: %s", server.neterr);
414 return;
415 }
416 redisLog(REDIS_VERBOSE,"Accepted connection to %s", server.unixsocket);
417 acceptCommonHandler(cfd);
418}
419
420
e2641e09 421static void freeClientArgv(redisClient *c) {
422 int j;
e2641e09 423 for (j = 0; j < c->argc; j++)
424 decrRefCount(c->argv[j]);
e2641e09 425 c->argc = 0;
e2641e09 426}
427
428void freeClient(redisClient *c) {
429 listNode *ln;
430
431 /* Note that if the client we are freeing is blocked into a blocking
432 * call, we have to set querybuf to NULL *before* to call
433 * unblockClientWaitingData() to avoid processInputBuffer() will get
434 * called. Also it is important to remove the file events after
435 * this, because this call adds the READABLE event. */
436 sdsfree(c->querybuf);
437 c->querybuf = NULL;
438 if (c->flags & REDIS_BLOCKED)
439 unblockClientWaitingData(c);
440
441 /* UNWATCH all the keys */
442 unwatchAllKeys(c);
443 listRelease(c->watched_keys);
444 /* Unsubscribe from all the pubsub channels */
445 pubsubUnsubscribeAllChannels(c,0);
446 pubsubUnsubscribeAllPatterns(c,0);
447 dictRelease(c->pubsub_channels);
448 listRelease(c->pubsub_patterns);
449 /* Obvious cleanup */
450 aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
451 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
452 listRelease(c->reply);
453 freeClientArgv(c);
454 close(c->fd);
455 /* Remove from the list of clients */
456 ln = listSearchKey(server.clients,c);
457 redisAssert(ln != NULL);
458 listDelNode(server.clients,ln);
1a71fb96 459 /* Remove from the list of clients waiting for swapped keys, or ready
460 * to be restarted, but not yet woken up again. */
461 if (c->flags & REDIS_IO_WAIT) {
697af434 462 redisAssert(server.ds_enabled);
1a71fb96 463 if (listLength(c->io_keys) == 0) {
464 ln = listSearchKey(server.io_ready_clients,c);
465
466 /* When this client is waiting to be woken up (REDIS_IO_WAIT),
467 * it should be present in the list io_ready_clients */
468 redisAssert(ln != NULL);
e2641e09 469 listDelNode(server.io_ready_clients,ln);
1a71fb96 470 } else {
471 while (listLength(c->io_keys)) {
472 ln = listFirst(c->io_keys);
473 dontWaitForSwappedKey(c,ln->value);
474 }
e2641e09 475 }
697af434 476 server.cache_blocked_clients--;
e2641e09 477 }
478 listRelease(c->io_keys);
778b2210 479 /* Master/slave cleanup.
480 * Case 1: we lost the connection with a slave. */
e2641e09 481 if (c->flags & REDIS_SLAVE) {
482 if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
483 close(c->repldbfd);
484 list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
485 ln = listSearchKey(l,c);
486 redisAssert(ln != NULL);
487 listDelNode(l,ln);
488 }
778b2210 489
490 /* Case 2: we lost the connection with the master. */
e2641e09 491 if (c->flags & REDIS_MASTER) {
492 server.master = NULL;
f4aa600b 493 /* FIXME */
e2641e09 494 server.replstate = REDIS_REPL_CONNECT;
778b2210 495 /* Since we lost the connection with the master, we should also
496 * close the connection with all our slaves if we have any, so
497 * when we'll resync with the master the other slaves will sync again
498 * with us as well. Note that also when the slave is not connected
499 * to the master it will keep refusing connections by other slaves. */
500 while (listLength(server.slaves)) {
501 ln = listFirst(server.slaves);
502 freeClient((redisClient*)ln->value);
503 }
e2641e09 504 }
505 /* Release memory */
506 zfree(c->argv);
e2641e09 507 freeClientMultiState(c);
508 zfree(c);
509}
510
e2641e09 511void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
512 redisClient *c = privdata;
513 int nwritten = 0, totwritten = 0, objlen;
514 robj *o;
515 REDIS_NOTUSED(el);
516 REDIS_NOTUSED(mask);
517
518 /* Use writev() if we have enough buffers to send */
519 if (!server.glueoutputbuf &&
520 listLength(c->reply) > REDIS_WRITEV_THRESHOLD &&
521 !(c->flags & REDIS_MASTER))
522 {
523 sendReplyToClientWritev(el, fd, privdata, mask);
524 return;
525 }
526
834ef78e
PN
527 while(c->bufpos > 0 || listLength(c->reply)) {
528 if (c->bufpos > 0) {
529 if (c->flags & REDIS_MASTER) {
530 /* Don't reply to a master */
531 nwritten = c->bufpos - c->sentlen;
532 } else {
533 nwritten = write(fd,c->buf+c->sentlen,c->bufpos-c->sentlen);
534 if (nwritten <= 0) break;
535 }
536 c->sentlen += nwritten;
537 totwritten += nwritten;
538
539 /* If the buffer was sent, set bufpos to zero to continue with
540 * the remainder of the reply. */
541 if (c->sentlen == c->bufpos) {
542 c->bufpos = 0;
543 c->sentlen = 0;
544 }
545 } else {
546 o = listNodeValue(listFirst(c->reply));
547 objlen = sdslen(o->ptr);
e2641e09 548
834ef78e
PN
549 if (objlen == 0) {
550 listDelNode(c->reply,listFirst(c->reply));
551 continue;
552 }
e2641e09 553
834ef78e
PN
554 if (c->flags & REDIS_MASTER) {
555 /* Don't reply to a master */
556 nwritten = objlen - c->sentlen;
557 } else {
558 nwritten = write(fd, ((char*)o->ptr)+c->sentlen,objlen-c->sentlen);
559 if (nwritten <= 0) break;
560 }
561 c->sentlen += nwritten;
562 totwritten += nwritten;
e2641e09 563
834ef78e
PN
564 /* If we fully sent the object on head go to the next one */
565 if (c->sentlen == objlen) {
566 listDelNode(c->reply,listFirst(c->reply));
567 c->sentlen = 0;
568 }
e2641e09 569 }
570 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
571 * bytes, in a single threaded server it's a good idea to serve
572 * other clients as well, even if a very large request comes from
573 * super fast link that is always able to accept data (in real world
574 * scenario think about 'KEYS *' against the loopback interfae) */
575 if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
576 }
577 if (nwritten == -1) {
578 if (errno == EAGAIN) {
579 nwritten = 0;
580 } else {
581 redisLog(REDIS_VERBOSE,
582 "Error writing to client: %s", strerror(errno));
583 freeClient(c);
584 return;
585 }
586 }
587 if (totwritten > 0) c->lastinteraction = time(NULL);
588 if (listLength(c->reply) == 0) {
589 c->sentlen = 0;
590 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
941c9fa2
PN
591
592 /* Close connection after entire reply has been sent. */
cd8788f2 593 if (c->flags & REDIS_CLOSE_AFTER_REPLY) freeClient(c);
e2641e09 594 }
595}
596
597void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask)
598{
599 redisClient *c = privdata;
600 int nwritten = 0, totwritten = 0, objlen, willwrite;
601 robj *o;
602 struct iovec iov[REDIS_WRITEV_IOVEC_COUNT];
603 int offset, ion = 0;
604 REDIS_NOTUSED(el);
605 REDIS_NOTUSED(mask);
606
607 listNode *node;
608 while (listLength(c->reply)) {
609 offset = c->sentlen;
610 ion = 0;
611 willwrite = 0;
612
613 /* fill-in the iov[] array */
614 for(node = listFirst(c->reply); node; node = listNextNode(node)) {
615 o = listNodeValue(node);
616 objlen = sdslen(o->ptr);
617
618 if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT)
619 break;
620
621 if(ion == REDIS_WRITEV_IOVEC_COUNT)
622 break; /* no more iovecs */
623
624 iov[ion].iov_base = ((char*)o->ptr) + offset;
625 iov[ion].iov_len = objlen - offset;
626 willwrite += objlen - offset;
627 offset = 0; /* just for the first item */
628 ion++;
629 }
630
631 if(willwrite == 0)
632 break;
633
634 /* write all collected blocks at once */
635 if((nwritten = writev(fd, iov, ion)) < 0) {
636 if (errno != EAGAIN) {
637 redisLog(REDIS_VERBOSE,
638 "Error writing to client: %s", strerror(errno));
639 freeClient(c);
640 return;
641 }
642 break;
643 }
644
645 totwritten += nwritten;
646 offset = c->sentlen;
647
648 /* remove written robjs from c->reply */
649 while (nwritten && listLength(c->reply)) {
650 o = listNodeValue(listFirst(c->reply));
651 objlen = sdslen(o->ptr);
652
653 if(nwritten >= objlen - offset) {
654 listDelNode(c->reply, listFirst(c->reply));
655 nwritten -= objlen - offset;
656 c->sentlen = 0;
657 } else {
658 /* partial write */
659 c->sentlen += nwritten;
660 break;
661 }
662 offset = 0;
663 }
664 }
665
666 if (totwritten > 0)
667 c->lastinteraction = time(NULL);
668
669 if (listLength(c->reply) == 0) {
670 c->sentlen = 0;
671 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
672 }
673}
674
675/* resetClient prepare the client to process the next command */
676void resetClient(redisClient *c) {
677 freeClientArgv(c);
cd8788f2
PN
678 c->reqtype = 0;
679 c->multibulklen = 0;
e2641e09 680 c->bulklen = -1;
e2641e09 681}
682
683void closeTimedoutClients(void) {
684 redisClient *c;
685 listNode *ln;
686 time_t now = time(NULL);
687 listIter li;
688
689 listRewind(server.clients,&li);
690 while ((ln = listNext(&li)) != NULL) {
691 c = listNodeValue(ln);
692 if (server.maxidletime &&
693 !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
694 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
e452436a 695 !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */
e2641e09 696 dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */
697 listLength(c->pubsub_patterns) == 0 &&
698 (now - c->lastinteraction > server.maxidletime))
699 {
700 redisLog(REDIS_VERBOSE,"Closing idle client");
701 freeClient(c);
702 } else if (c->flags & REDIS_BLOCKED) {
e3c51c4b 703 if (c->bpop.timeout != 0 && c->bpop.timeout < now) {
e2641e09 704 addReply(c,shared.nullmultibulk);
705 unblockClientWaitingData(c);
706 }
707 }
708 }
709}
710
cd8788f2
PN
711int processInlineBuffer(redisClient *c) {
712 char *newline = strstr(c->querybuf,"\r\n");
713 int argc, j;
714 sds *argv;
715 size_t querylen;
716
717 /* Nothing to do without a \r\n */
718 if (newline == NULL)
719 return REDIS_ERR;
720
721 /* Split the input buffer up to the \r\n */
722 querylen = newline-(c->querybuf);
723 argv = sdssplitlen(c->querybuf,querylen," ",1,&argc);
724
725 /* Leave data after the first line of the query in the buffer */
726 c->querybuf = sdsrange(c->querybuf,querylen+2,-1);
727
728 /* Setup argv array on client structure */
729 if (c->argv) zfree(c->argv);
730 c->argv = zmalloc(sizeof(robj*)*argc);
731
732 /* Create redis objects for all arguments. */
733 for (c->argc = 0, j = 0; j < argc; j++) {
734 if (sdslen(argv[j])) {
735 c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
736 c->argc++;
737 } else {
738 sdsfree(argv[j]);
739 }
740 }
741 zfree(argv);
742 return REDIS_OK;
743}
744
745/* Helper function. Trims query buffer to make the function that processes
746 * multi bulk requests idempotent. */
747static void setProtocolError(redisClient *c, int pos) {
748 c->flags |= REDIS_CLOSE_AFTER_REPLY;
749 c->querybuf = sdsrange(c->querybuf,pos,-1);
750}
751
752int processMultibulkBuffer(redisClient *c) {
753 char *newline = NULL;
754 char *eptr;
755 int pos = 0, tolerr;
756 long bulklen;
757
758 if (c->multibulklen == 0) {
759 /* The client should have been reset */
760 redisAssert(c->argc == 0);
761
762 /* Multi bulk length cannot be read without a \r\n */
763 newline = strstr(c->querybuf,"\r\n");
764 if (newline == NULL)
765 return REDIS_ERR;
766
767 /* We know for sure there is a whole line since newline != NULL,
768 * so go ahead and find out the multi bulk length. */
769 redisAssert(c->querybuf[0] == '*');
770 c->multibulklen = strtol(c->querybuf+1,&eptr,10);
771 pos = (newline-c->querybuf)+2;
772 if (c->multibulklen <= 0) {
773 c->querybuf = sdsrange(c->querybuf,pos,-1);
774 return REDIS_OK;
b19c33d4
PN
775 } else if (c->multibulklen > 1024*1024) {
776 addReplyError(c,"Protocol error: invalid multibulk length");
777 setProtocolError(c,pos);
778 return REDIS_ERR;
cd8788f2
PN
779 }
780
781 /* Setup argv array on client structure */
782 if (c->argv) zfree(c->argv);
783 c->argv = zmalloc(sizeof(robj*)*c->multibulklen);
784
785 /* Search new newline */
786 newline = strstr(c->querybuf+pos,"\r\n");
787 }
788
789 redisAssert(c->multibulklen > 0);
790 while(c->multibulklen) {
791 /* Read bulk length if unknown */
792 if (c->bulklen == -1) {
793 newline = strstr(c->querybuf+pos,"\r\n");
794 if (newline != NULL) {
795 if (c->querybuf[pos] != '$') {
796 addReplyErrorFormat(c,
797 "Protocol error: expected '$', got '%c'",
798 c->querybuf[pos]);
799 setProtocolError(c,pos);
800 return REDIS_ERR;
e2641e09 801 }
cd8788f2
PN
802
803 bulklen = strtol(c->querybuf+pos+1,&eptr,10);
804 tolerr = (eptr[0] != '\r');
805 if (tolerr || bulklen == LONG_MIN || bulklen == LONG_MAX ||
401c3e21 806 bulklen < 0 || bulklen > 512*1024*1024)
cd8788f2
PN
807 {
808 addReplyError(c,"Protocol error: invalid bulk length");
809 setProtocolError(c,pos);
810 return REDIS_ERR;
811 }
812 pos += eptr-(c->querybuf+pos)+2;
813 c->bulklen = bulklen;
814 } else {
815 /* No newline in current buffer, so wait for more data */
816 break;
e2641e09 817 }
cd8788f2
PN
818 }
819
820 /* Read bulk argument */
821 if (sdslen(c->querybuf)-pos < (unsigned)(c->bulklen+2)) {
822 /* Not enough data (+2 == trailing \r\n) */
823 break;
824 } else {
825 c->argv[c->argc++] = createStringObject(c->querybuf+pos,c->bulklen);
826 pos += c->bulklen+2;
827 c->bulklen = -1;
828 c->multibulklen--;
829 }
830 }
831
832 /* Trim to pos */
833 c->querybuf = sdsrange(c->querybuf,pos,-1);
834
835 /* We're done when c->multibulk == 0 */
836 if (c->multibulklen == 0) {
837 return REDIS_OK;
838 }
839 return REDIS_ERR;
840}
841
842void processInputBuffer(redisClient *c) {
843 /* Keep processing while there is something in the input buffer */
844 while(sdslen(c->querybuf)) {
4794d88f 845 /* Immediately abort if the client is in the middle of something. */
cd8788f2
PN
846 if (c->flags & REDIS_BLOCKED || c->flags & REDIS_IO_WAIT) return;
847
5e78edb3
PN
848 /* REDIS_CLOSE_AFTER_REPLY closes the connection once the reply is
849 * written to the client. Make sure to not let the reply grow after
850 * this flag has been set (i.e. don't process more commands). */
851 if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;
cd8788f2
PN
852
853 /* Determine request type when unknown. */
854 if (!c->reqtype) {
855 if (c->querybuf[0] == '*') {
856 c->reqtype = REDIS_REQ_MULTIBULK;
e2641e09 857 } else {
cd8788f2 858 c->reqtype = REDIS_REQ_INLINE;
e2641e09 859 }
e2641e09 860 }
cd8788f2
PN
861
862 if (c->reqtype == REDIS_REQ_INLINE) {
863 if (processInlineBuffer(c) != REDIS_OK) break;
864 } else if (c->reqtype == REDIS_REQ_MULTIBULK) {
865 if (processMultibulkBuffer(c) != REDIS_OK) break;
866 } else {
867 redisPanic("Unknown request type");
e2641e09 868 }
cd8788f2
PN
869
870 /* Multibulk processing could see a <= 0 length. */
9da6caac
PN
871 if (c->argc == 0) {
872 resetClient(c);
873 } else {
874 /* Only reset the client when the command was executed. */
875 if (processCommand(c) == REDIS_OK)
876 resetClient(c);
877 }
e2641e09 878 }
879}
880
881void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
882 redisClient *c = (redisClient*) privdata;
883 char buf[REDIS_IOBUF_LEN];
884 int nread;
885 REDIS_NOTUSED(el);
886 REDIS_NOTUSED(mask);
887
888 nread = read(fd, buf, REDIS_IOBUF_LEN);
889 if (nread == -1) {
890 if (errno == EAGAIN) {
891 nread = 0;
892 } else {
893 redisLog(REDIS_VERBOSE, "Reading from client: %s",strerror(errno));
894 freeClient(c);
895 return;
896 }
897 } else if (nread == 0) {
898 redisLog(REDIS_VERBOSE, "Client closed connection");
899 freeClient(c);
900 return;
901 }
902 if (nread) {
cd8788f2 903 c->querybuf = sdscatlen(c->querybuf,buf,nread);
e2641e09 904 c->lastinteraction = time(NULL);
905 } else {
906 return;
907 }
908 processInputBuffer(c);
909}