From 0cf10e8e867274397f2b7201e51b00996263a143 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 2 May 2012 22:41:50 +0200 Subject: [PATCH 1/1] syncio.c read / write functions reworked for correctness and performance. The new implementation start reading / writing before blocking with aeWait(), likely the descriptor can accept writes or has buffered data inside and we can go faster, otherwise we get an error and wait. This change has effects on speed but also on correctness: on socket errors when we perform non blocking connect(2) write is performed ASAP and the error is returned ASAP before waiting. So the practical effect is that now a Redis slave is more available if it can not connect to the master, previously the slave continued to block on syncWrite() trying to send SYNC, and serving commands very slowly. --- src/syncio.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/src/syncio.c b/src/syncio.c index b0c2969e..0c202c9e 100644 --- a/src/syncio.c +++ b/src/syncio.c @@ -42,10 +42,10 @@ #define REDIS_SYNCIO_RESOLUTION 10 /* Resolution in milliseconds */ -/* Write the specified payload to 'fd'. If writing the whole payload will be done - * within 'timeout' milliseconds the operation succeeds and 'size' is returned. - * Otherwise the operation fails, -1 is returned, and an unspecified partial write - * could be performed against the file descriptor. */ +/* Write the specified payload to 'fd'. If writing the whole payload will be + * done within 'timeout' milliseconds the operation succeeds and 'size' is + * returned. Otherwise the operation fails, -1 is returned, and an unspecified + * partial write could be performed against the file descriptor. */ ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout) { ssize_t nwritten, ret = size; long long start = mstime(); @@ -56,13 +56,19 @@ ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout) { remaining : REDIS_SYNCIO_RESOLUTION; long long elapsed; - if (aeWait(fd,AE_WRITABLE,wait) & AE_WRITABLE) { - nwritten = write(fd,ptr,size); - if (nwritten == -1) return -1; + /* Optimistically try to write before checking if the file descriptor + * is actually writable. At worst we get EAGAIN. */ + nwritten = write(fd,ptr,size); + if (nwritten == -1) { + if (errno != EAGAIN) return -1; + } else { ptr += nwritten; size -= nwritten; - if (size == 0) return ret; } + if (size == 0) return ret; + + /* Wait */ + aeWait(fd,AE_WRITABLE,wait); elapsed = mstime() - start; if (elapsed >= timeout) { errno = ETIMEDOUT; @@ -72,8 +78,8 @@ ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout) { } } -/* Read the specified amount of bytes from 'fd'. If all the bytes are read within - * 'timeout' milliseconds the operation succeed and 'size' is returned. +/* Read the specified amount of bytes from 'fd'. If all the bytes are read + * within 'timeout' milliseconds the operation succeed and 'size' is returned. * Otherwise the operation fails, -1 is returned, and an unspecified amount of * data could be read from the file descriptor. */ ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout) { @@ -81,19 +87,27 @@ ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout) { long long start = mstime(); long long remaining = timeout; + if (size == 0) return 0; while(1) { long long wait = (remaining > REDIS_SYNCIO_RESOLUTION) ? remaining : REDIS_SYNCIO_RESOLUTION; long long elapsed; - if (aeWait(fd,AE_READABLE,wait) & AE_READABLE) { - nread = read(fd,ptr,size); - if (nread <= 0) return -1; + /* Optimistically try to read before checking if the file descriptor + * is actually readable. At worst we get EAGAIN. */ + nread = read(fd,ptr,size); + if (nread == 0) return -1; /* short read. */ + if (nread == -1) { + if (errno != EAGAIN) return -1; + } else { ptr += nread; size -= nread; totread += nread; - if (size == 0) return totread; } + if (size == 0) return totread; + + /* Wait */ + aeWait(fd,AE_READABLE,wait); elapsed = mstime() - start; if (elapsed >= timeout) { errno = ETIMEDOUT; -- 2.45.2