############################### ADVANCED CONFIG ###############################
-# Glue small output buffers together in order to send small replies in a
-# single TCP packet. Uses a bit more CPU but most of the times it is a win
-# in terms of number of queries per second. Use 'yes' if unsure.
-glueoutputbuf yes
-
# Hashes are encoded in a special way (much more memory efficient) when they
# have at max a given numer of elements, and the biggest element does not
# exceed a given threshold. You can configure this limits with the following
* Note that fcntl(2) for F_GETFL and F_SETFL can't be
* interrupted by a signal. */
if ((flags = fcntl(fd, F_GETFL)) == -1) {
- anetSetError(err, "fcntl(F_GETFL): %s\n", strerror(errno));
+ anetSetError(err, "fcntl(F_GETFL): %s", strerror(errno));
return ANET_ERR;
}
if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) {
- anetSetError(err, "fcntl(F_SETFL,O_NONBLOCK): %s\n", strerror(errno));
+ anetSetError(err, "fcntl(F_SETFL,O_NONBLOCK): %s", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
int yes = 1;
if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &yes, sizeof(yes)) == -1)
{
- anetSetError(err, "setsockopt TCP_NODELAY: %s\n", strerror(errno));
+ anetSetError(err, "setsockopt TCP_NODELAY: %s", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
{
if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buffsize, sizeof(buffsize)) == -1)
{
- anetSetError(err, "setsockopt SO_SNDBUF: %s\n", strerror(errno));
+ anetSetError(err, "setsockopt SO_SNDBUF: %s", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
{
int yes = 1;
if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &yes, sizeof(yes)) == -1) {
- anetSetError(err, "setsockopt SO_KEEPALIVE: %s\n", strerror(errno));
+ anetSetError(err, "setsockopt SO_KEEPALIVE: %s", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
he = gethostbyname(host);
if (he == NULL) {
- anetSetError(err, "can't resolve: %s\n", host);
+ anetSetError(err, "can't resolve: %s", host);
return ANET_ERR;
}
memcpy(&sa.sin_addr, he->h_addr, sizeof(struct in_addr));
static int anetCreateSocket(char *err, int domain) {
int s, on = 1;
if ((s = socket(domain, SOCK_STREAM, 0)) == -1) {
- anetSetError(err, "creating socket: %s\n", strerror(errno));
+ anetSetError(err, "creating socket: %s", strerror(errno));
return ANET_ERR;
}
/* Make sure connection-intensive things like the redis benckmark
* will be able to close/open sockets a zillion of times */
if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) {
- anetSetError(err, "setsockopt SO_REUSEADDR: %s\n", strerror(errno));
+ anetSetError(err, "setsockopt SO_REUSEADDR: %s", strerror(errno));
return ANET_ERR;
}
return s;
he = gethostbyname(addr);
if (he == NULL) {
- anetSetError(err, "can't resolve: %s\n", addr);
+ anetSetError(err, "can't resolve: %s", addr);
close(s);
return ANET_ERR;
}
flags & ANET_CONNECT_NONBLOCK)
return s;
- anetSetError(err, "connect: %s\n", strerror(errno));
+ anetSetError(err, "connect: %s", strerror(errno));
close(s);
return ANET_ERR;
}
flags & ANET_CONNECT_NONBLOCK)
return s;
- anetSetError(err, "connect: %s\n", strerror(errno));
+ anetSetError(err, "connect: %s", strerror(errno));
close(s);
return ANET_ERR;
}
static int anetListen(char *err, int s, struct sockaddr *sa, socklen_t len) {
if (bind(s,sa,len) == -1) {
- anetSetError(err, "bind: %s\n", strerror(errno));
+ anetSetError(err, "bind: %s", strerror(errno));
close(s);
return ANET_ERR;
}
if (listen(s, 511) == -1) { /* the magic 511 constant is from nginx */
- anetSetError(err, "listen: %s\n", strerror(errno));
+ anetSetError(err, "listen: %s", strerror(errno));
close(s);
return ANET_ERR;
}
sa.sin_port = htons(port);
sa.sin_addr.s_addr = htonl(INADDR_ANY);
if (bindaddr && inet_aton(bindaddr, &sa.sin_addr) == 0) {
- anetSetError(err, "Invalid bind address\n");
+ anetSetError(err, "invalid bind address");
close(s);
return ANET_ERR;
}
if (errno == EINTR)
continue;
else {
- anetSetError(err, "accept: %s\n", strerror(errno));
+ anetSetError(err, "accept: %s", strerror(errno));
return ANET_ERR;
}
}
if ((server.repl_serve_stale_data = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
- } else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) {
- if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) {
- err = "argument must be 'yes' or 'no'"; goto loaderr;
- }
+ } else if (!strcasecmp(argv[0],"glueoutputbuf")) {
+ redisLog(REDIS_WARNING, "Deprecated configuration directive: \"%s\"", argv[0]);
} else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) {
if ((server.rdbcompression = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
}
}
+
+int dsRdbSave(char *filename) {
+ char tmpfile[256];
+ int j, i;
+ time_t now = time(NULL);
+
+ snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
+ fp = fopen(tmpfile,"w");
+ if (!fp) {
+ redisLog(REDIS_WARNING, "Failed saving the DB: %s", strerror(errno));
+ return REDIS_ERR;
+ }
+ if (fwrite("REDIS0001",9,1,fp) == 0) goto werr;
+
+ /* Scan all diskstore dirs looking for keys */
+ for (j = 0; j < 256; j++) {
+ for (i = 0; i < 256; i++) {
+ snprintf(buf,1024,"%s/%02x/%02x",server.ds_path,j,i);
+
+ /* Write the SELECT DB opcode */
+ if (rdbSaveType(fp,REDIS_SELECTDB) == -1) goto werr;
+ if (rdbSaveLen(fp,j) == -1) goto werr;
+ }
+ }
+
+ /* Make sure data will not remain on the OS's output buffers */
+ fflush(fp);
+ fsync(fileno(fp));
+ fclose(fp);
+
+ /* Use RENAME to make sure the DB file is changed atomically only
+ * if the generate DB file is ok. */
+ if (rename(tmpfile,filename) == -1) {
+ redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
+ unlink(tmpfile);
+ return REDIS_ERR;
+ }
+ redisLog(REDIS_NOTICE,"DB saved on disk");
+ server.dirty = 0;
+ server.lastsave = time(NULL);
+ return REDIS_OK;
+
+werr:
+ fclose(fp);
+ unlink(tmpfile);
+ redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
+}
#if 0
/* If there are new jobs we need to signal the thread to
- * process the next one. */
+ * process the next one. FIXME: drop this if useless. */
redisLog(REDIS_DEBUG,"waitEmptyIOJobsQueue: new %d, processing %d",
listLength(server.io_newjobs),
listLength(server.io_processing));
spawnIOThread();
}
-void dsCreateIOJob(int type, redisDb *db, robj *key, robj *val) {
+/* Consume all the IO scheduled operations, and all the thread IO jobs
+ * so that eventually the state of diskstore is a point-in-time snapshot.
+ *
+ * This is useful when we need to BGSAVE with diskstore enabled. */
+void cacheForcePointInTime(void) {
+ redisLog(REDIS_NOTICE,"Diskstore: synching on disk to reach point-in-time state.");
+ while (listLength(server.cache_io_queue) != 0) {
+ cacheScheduleIOPushJobs(REDIS_IO_ASAP);
+ processActiveIOJobs(1);
+ }
+ waitEmptyIOJobsQueue();
+ processAllPendingIOJobs();
+}
+
+void cacheCreateIOJob(int type, redisDb *db, robj *key, robj *val) {
iojob *j;
j = zmalloc(sizeof(*j));
op->type == REDIS_IO_LOAD ? "load" : "save", op->key->ptr);
if (op->type == REDIS_IO_LOAD) {
- dsCreateIOJob(REDIS_IOJOB_LOAD,op->db,op->key,NULL);
+ cacheCreateIOJob(REDIS_IOJOB_LOAD,op->db,op->key,NULL);
} else {
/* Lookup the key, in order to put the current value in the IO
* Job. Otherwise if the key does not exists we schedule a disk
* the key on disk. */
val = NULL;
}
- dsCreateIOJob(REDIS_IOJOB_SAVE,op->db,op->key,val);
+ cacheCreateIOJob(REDIS_IOJOB_SAVE,op->db,op->key,val);
}
/* Mark the operation as in progress. */
cacheScheduleIODelFlag(op->db,op->key,op->type);
REDIS_NOTUSED(el);
REDIS_NOTUSED(mask);
- /* Use writev() if we have enough buffers to send */
- if (!server.glueoutputbuf &&
- listLength(c->reply) > REDIS_WRITEV_THRESHOLD &&
- !(c->flags & REDIS_MASTER))
- {
- sendReplyToClientWritev(el, fd, privdata, mask);
- return;
- }
-
while(c->bufpos > 0 || listLength(c->reply)) {
if (c->bufpos > 0) {
if (c->flags & REDIS_MASTER) {
}
}
-void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask)
-{
- redisClient *c = privdata;
- int nwritten = 0, totwritten = 0, objlen, willwrite;
- robj *o;
- struct iovec iov[REDIS_WRITEV_IOVEC_COUNT];
- int offset, ion = 0;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
-
- listNode *node;
- while (listLength(c->reply)) {
- offset = c->sentlen;
- ion = 0;
- willwrite = 0;
-
- /* fill-in the iov[] array */
- for(node = listFirst(c->reply); node; node = listNextNode(node)) {
- o = listNodeValue(node);
- objlen = sdslen(o->ptr);
-
- if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT)
- break;
-
- if(ion == REDIS_WRITEV_IOVEC_COUNT)
- break; /* no more iovecs */
-
- iov[ion].iov_base = ((char*)o->ptr) + offset;
- iov[ion].iov_len = objlen - offset;
- willwrite += objlen - offset;
- offset = 0; /* just for the first item */
- ion++;
- }
-
- if(willwrite == 0)
- break;
-
- /* write all collected blocks at once */
- if((nwritten = writev(fd, iov, ion)) < 0) {
- if (errno != EAGAIN) {
- redisLog(REDIS_VERBOSE,
- "Error writing to client: %s", strerror(errno));
- freeClient(c);
- return;
- }
- break;
- }
-
- totwritten += nwritten;
- offset = c->sentlen;
-
- /* remove written robjs from c->reply */
- while (nwritten && listLength(c->reply)) {
- o = listNodeValue(listFirst(c->reply));
- objlen = sdslen(o->ptr);
-
- if(nwritten >= objlen - offset) {
- listDelNode(c->reply, listFirst(c->reply));
- nwritten -= objlen - offset;
- c->sentlen = 0;
- } else {
- /* partial write */
- c->sentlen += nwritten;
- break;
- }
- offset = 0;
- }
- }
-
- if (totwritten > 0)
- c->lastinteraction = time(NULL);
-
- if (listLength(c->reply) == 0) {
- c->sentlen = 0;
- aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
- }
-}
-
/* resetClient prepare the client to process the next command */
void resetClient(redisClient *c) {
freeClientArgv(c);
int j;
time_t now = time(NULL);
- /* FIXME: implement .rdb save for disk store properly */
- redisAssert(server.ds_enabled == 0);
+ if (server.ds_enabled) {
+ cacheForcePointInTime();
+ return dsRdbSave(filename);
+ }
snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
fp = fopen(tmpfile,"w");
pid_t childpid;
if (server.bgsavechildpid != -1) return REDIS_ERR;
- redisAssert(server.ds_enabled == 0);
+
server.dirty_before_bgsave = server.dirty;
+
if ((childpid = fork()) == 0) {
+ int retval;
+
/* Child */
if (server.ipfd > 0) close(server.ipfd);
if (server.sofd > 0) close(server.sofd);
- if (rdbSave(filename) == REDIS_OK) {
- _exit(0);
+ if (server.ds_enabled) {
+ cacheForcePointInTime();
+ dsRdbSave(filename);
} else {
- _exit(1);
+ rdbSave(filename);
}
+ _exit((retval == REDIS_OK) ? 0 : 1);
} else {
/* Parent */
if (childpid == -1) {
server.syslog_enabled = 0;
server.syslog_ident = zstrdup("redis");
server.syslog_facility = LOG_LOCAL0;
- server.glueoutputbuf = 1;
server.daemonize = 0;
server.appendonly = 0;
server.appendfsync = APPENDFSYNC_EVERYSEC;
#define REDIS_REPLY_CHUNK_BYTES (5*1500) /* 5 TCP packets with default MTU */
#define REDIS_MAX_LOGMSG_LEN 1024 /* Default maximum length of syslog messages */
-/* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
-#define REDIS_WRITEV_THRESHOLD 3
-/* Max number of iovecs used for each writev call */
-#define REDIS_WRITEV_IOVEC_COUNT 256
-
/* Hash table parameters */
#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
long long stat_keyspace_misses; /* number of failed lookups of keys */
/* Configuration */
int verbosity;
- int glueoutputbuf;
int maxidletime;
int dbnum;
int daemonize;
void freeClient(redisClient *c);
void resetClient(redisClient *c);
void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask);
-void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask);
void addReply(redisClient *c, robj *obj);
void *addDeferredMultiBulkLength(redisClient *c);
void setDeferredMultiBulkLength(redisClient *c, void *node, long length);
int cacheKeyMayExist(redisDb *db, robj *key);
void cacheSetKeyMayExist(redisDb *db, robj *key);
void cacheSetKeyDoesNotExist(redisDb *db, robj *key);
+void cacheForcePointInTime(void);
/* Set data type */
robj *setTypeCreate(robj *value);
--- /dev/null
+# This script is from http://poormansprofiler.org/
+
+#!/bin/bash
+nsamples=1
+sleeptime=0
+pid=$(pidof redis-server)
+
+for x in $(seq 1 $nsamples)
+ do
+ gdb -ex "set pagination 0" -ex "thread apply all bt" -batch -p $pid
+ sleep $sleeptime
+ done | \
+awk '
+ BEGIN { s = ""; }
+ /Thread/ { print s; s = ""; }
+ /^\#/ { if (s != "" ) { s = s "," $4} else { s = $4 } }
+ END { print s }' | \
+sort | uniq -c | sort -r -n -k 1,1