#define REDIS_VM_MAX_NEAR_PAGES 65536
#define REDIS_VM_MAX_RANDOM_JUMP 4096
#define REDIS_VM_MAX_THREADS 32
+#define REDIS_THREAD_STACK_SIZE (1024*1024*4)
/* The following is the number of completed I/O jobs to process when the
* handelr is called. 1 is the minimum, and also the default, as it allows
* to block as little as possible other accessing clients. While Virtual
pthread_mutex_t io_mutex; /* lock to access io_jobs/io_done/io_thread_job */
pthread_mutex_t obj_freelist_mutex; /* safe redis objects creation/free */
pthread_mutex_t io_swapfile_mutex; /* So we can lseek + write */
+ pthread_attr_t io_threads_attr; /* attributes for threads creation */
int io_active_threads; /* Number of running I/O threads */
int vm_max_threads; /* Max number of I/O threads running at the same time */
/* Our main thread is blocked on the event loop, locking for sockets ready
static void queueIOJob(iojob *j);
static int vmWriteObjectOnSwap(robj *o, off_t page);
static robj *vmReadObjectFromSwap(off_t page, int type);
+static void waitZeroActiveThreads(void);
static void authCommand(redisClient *c);
static void pingCommand(redisClient *c);
redisClient *c;
listNode *ln;
time_t now = time(NULL);
+ listIter li;
- listRewind(server.clients);
- while ((ln = listYield(server.clients)) != NULL) {
+ listRewind(server.clients,&li);
+ while ((ln = listNext(&li)) != NULL) {
c = listNodeValue(ln);
if (server.maxidletime &&
!(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
while (server.vm_enabled && zmalloc_used_memory() >
server.vm_max_memory)
{
+ int retval;
+
if (tryFreeOneObjectFromFreelist() == REDIS_OK) continue;
- if (vmSwapOneObjectThreaded() == REDIS_ERR) {
- if ((loops % 30) == 0 && zmalloc_used_memory() >
- (server.vm_max_memory+server.vm_max_memory/10)) {
- redisLog(REDIS_WARNING,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
- }
+ retval = (server.vm_max_threads == 0) ?
+ vmSwapOneObjectBlocking() :
+ vmSwapOneObjectThreaded();
+ if (retval == REDIS_ERR && (loops % 30) == 0 &&
+ zmalloc_used_memory() >
+ (server.vm_max_memory+server.vm_max_memory/10))
+ {
+ redisLog(REDIS_WARNING,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
}
- /* Note that we freed just one object, because anyway when
- * the I/O thread in charge to swap this object out will
- * do its work, the handler of completed jobs will try to swap
- * more objects if we are out of memory. */
- break;
+ /* Note that when using threade I/O we free just one object,
+ * because anyway when the I/O thread in charge to swap this
+ * object out will finish, the handler of completed jobs
+ * will try to swap more objects if we are still out of memory. */
+ if (retval == REDIS_ERR || server.vm_max_threads > 0) break;
}
}
int copylen = 0;
char buf[GLUEREPLY_UP_TO];
listNode *ln;
+ listIter li;
robj *o;
- listRewind(c->reply);
- while((ln = listYield(c->reply))) {
+ listRewind(c->reply,&li);
+ while((ln = listNext(&li))) {
int objlen;
o = ln->value;
static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc) {
listNode *ln;
+ listIter li;
int outc = 0, j;
robj **outv;
/* (args*2)+1 is enough room for args, spaces, newlines */
* be sure to free objects if there is no slave in a replication state
* able to be feed with commands */
for (j = 0; j < outc; j++) incrRefCount(outv[j]);
- listRewind(slaves);
- while((ln = listYield(slaves))) {
+ listRewind(slaves,&li);
+ while((ln = listNext(&li))) {
redisClient *slave = ln->value;
/* Don't feed slaves that are still waiting for BGSAVE to start */
} else if (o->type == REDIS_LIST) {
/* Save a list value */
list *list = o->ptr;
+ listIter li;
listNode *ln;
- listRewind(list);
if (rdbSaveLen(fp,listLength(list)) == -1) return -1;
- while((ln = listYield(list))) {
+ listRewind(list,&li);
+ while((ln = listNext(&li))) {
robj *eleobj = listNodeValue(ln);
if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
pid_t childpid;
if (server.bgsavechildpid != -1) return REDIS_ERR;
+ if (server.vm_enabled) waitZeroActiveThreads();
if ((childpid = fork()) == 0) {
/* Child */
close(server.fd);
if (sortval->type == REDIS_LIST) {
list *list = sortval->ptr;
listNode *ln;
+ listIter li;
- listRewind(list);
- while((ln = listYield(list))) {
+ listRewind(list,&li);
+ while((ln = listNext(&li))) {
robj *ele = ln->value;
vector[j].obj = ele;
vector[j].u.score = 0;
addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen));
for (j = start; j <= end; j++) {
listNode *ln;
+ listIter li;
+
if (!getop) {
addReplyBulkLen(c,vector[j].obj);
addReply(c,vector[j].obj);
addReply(c,shared.crlf);
}
- listRewind(operations);
- while((ln = listYield(operations))) {
+ listRewind(operations,&li);
+ while((ln = listNext(&li))) {
redisSortOperation *sop = ln->value;
robj *val = lookupKeyByPattern(c->db,sop->pattern,
vector[j].obj);
/* STORE option specified, set the sorting result as a List object */
for (j = start; j <= end; j++) {
listNode *ln;
+ listIter li;
+
if (!getop) {
listAddNodeTail(listPtr,vector[j].obj);
incrRefCount(vector[j].obj);
}
- listRewind(operations);
- while((ln = listYield(operations))) {
+ listRewind(operations,&li);
+ while((ln = listNext(&li))) {
redisSortOperation *sop = ln->value;
robj *val = lookupKeyByPattern(c->db,sop->pattern,
vector[j].obj);
* registering differences since the server forked to save */
redisClient *slave;
listNode *ln;
+ listIter li;
- listRewind(server.slaves);
- while((ln = listYield(server.slaves))) {
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
slave = ln->value;
if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
}
static void updateSlavesWaitingBgsave(int bgsaveerr) {
listNode *ln;
int startbgsave = 0;
+ listIter li;
- listRewind(server.slaves);
- while((ln = listYield(server.slaves))) {
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
redisClient *slave = ln->value;
if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
}
if (startbgsave) {
if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
- listRewind(server.slaves);
+ listIter li;
+
+ listRewind(server.slaves,&li);
redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
- while((ln = listYield(server.slaves))) {
+ while((ln = listNext(&li))) {
redisClient *slave = ln->value;
if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
/* Emit the RPUSHes needed to rebuild the list */
list *list = o->ptr;
listNode *ln;
+ listIter li;
- listRewind(list);
- while((ln = listYield(list))) {
+ listRewind(list,&li);
+ while((ln = listNext(&li))) {
char cmd[]="*3\r\n$5\r\nRPUSH\r\n";
robj *eleobj = listNodeValue(ln);
pid_t childpid;
if (server.bgrewritechildpid != -1) return REDIS_ERR;
+ if (server.vm_enabled) waitZeroActiveThreads();
if ((childpid = fork()) == 0) {
/* Child */
char tmpfile[256];
static void vmInit(void) {
off_t totsize;
int pipefds[2];
+ size_t stacksize;
server.vm_fp = fopen("/tmp/redisvm","w+b");
if (server.vm_fp == NULL) {
server.io_ready_pipe_read = pipefds[0];
server.io_ready_pipe_write = pipefds[1];
redisAssert(anetNonBlock(NULL,server.io_ready_pipe_read) != ANET_ERR);
+ /* LZF requires a lot of stack */
+ pthread_attr_init(&server.io_threads_attr);
+ pthread_attr_getstacksize(&server.io_threads_attr, &stacksize);
+ while (stacksize < REDIS_THREAD_STACK_SIZE) stacksize *= 2;
+ pthread_attr_setstacksize(&server.io_threads_attr, stacksize);
/* Listen for events in the threaded I/O pipe */
if (aeCreateFileEvent(server.el, server.io_ready_pipe_read, AE_READABLE,
vmThreadedIOCompletedJob, NULL) == AE_ERR)
* note: I implemented this function just after watching an episode of
* Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
*/
-static int vmFindContiguousPages(off_t *first, int n) {
+static int vmFindContiguousPages(off_t *first, off_t n) {
off_t base, offset = 0, since_jump = 0, numfree = 0;
if (server.vm_near_pages == REDIS_VM_MAX_NEAR_PAGES) {
/* Ooops... no space! */
freeIOJob(j);
} else {
+ /* Note that we need to mark this pages as used now,
+ * if the job will be canceled, we'll mark them as freed
+ * again. */
+ vmMarkPagesUsed(j->page,j->pages);
j->type = REDIS_IOJOB_DO_SWAP;
lockThreadedIO();
queueIOJob(j);
key->vtype = j->val->type;
decrRefCount(val); /* Deallocate the object from memory. */
dictGetEntryVal(de) = NULL;
- vmMarkPagesUsed(j->page,j->pages);
redisLog(REDIS_DEBUG,
"VM: object %s swapped out at %lld (%lld pages) (threaded)",
(unsigned char*) key->ptr,
/* Search for a matching key in one of the queues */
for (i = 0; i < 3; i++) {
listNode *ln;
+ listIter li;
- listRewind(lists[i]);
- while ((ln = listYield(lists[i])) != NULL) {
+ listRewind(lists[i],&li);
+ while ((ln = listNext(&li)) != NULL) {
iojob *job = ln->value;
if (job->canceled) continue; /* Skip this, already canceled. */
if (compareStringObjects(job->key,o) == 0) {
redisLog(REDIS_DEBUG,"*** CANCELED %p (%s)\n",
(void*)job, (char*)o->ptr);
+ /* Mark the pages as free since the swap didn't happened
+ * or happened but is now discarded. */
+ if (job->type == REDIS_IOJOB_DO_SWAP)
+ vmMarkPagesFree(job->page,job->pages);
+ /* Cancel the job. It depends on the list the job is
+ * living in. */
switch(i) {
case 0: /* io_newjobs */
/* If the job was yet not processed the best thing to do
job->canceled = 1;
break;
}
+ /* Finally we have to adjust the storage type of the object
+ * in order to "UNDO" the operaiton. */
if (o->storage == REDIS_VM_LOADING)
o->storage = REDIS_VM_SWAPPED;
else if (o->storage == REDIS_VM_SWAPPING)
lockThreadedIO();
if (listLength(server.io_newjobs) == 0) {
/* No new jobs in queue, exit. */
- redisLog(REDIS_DEBUG,"Thread %lld exiting, nothing to do\n",
+ redisLog(REDIS_DEBUG,"Thread %lld exiting, nothing to do",
(long long) pthread_self());
server.io_active_threads--;
unlockThreadedIO();
listAddNodeTail(server.io_processing,j);
ln = listLast(server.io_processing); /* We use ln later to remove it */
unlockThreadedIO();
- redisLog(REDIS_DEBUG,"Thread %lld got a new job (type %d): %p about key '%s'\n",
+ redisLog(REDIS_DEBUG,"Thread %lld got a new job (type %d): %p about key '%s'",
(long long) pthread_self(), j->type, (void*)j, (char*)j->key->ptr);
/* Process the Job */
}
/* Done: insert the job into the processed queue */
- redisLog(REDIS_DEBUG,"Thread %lld completed the job: %p (key %s)\n",
+ redisLog(REDIS_DEBUG,"Thread %lld completed the job: %p (key %s)",
(long long) pthread_self(), (void*)j, (char*)j->key->ptr);
lockThreadedIO();
listDelNode(server.io_processing,ln);
static void spawnIOThread(void) {
pthread_t thread;
- pthread_create(&thread,NULL,IOThreadEntryPoint,NULL);
+ pthread_create(&thread,&server.io_threads_attr,IOThreadEntryPoint,NULL);
server.io_active_threads++;
}
+/* We need to wait for the last thread to exit before we are able to
+ * fork() in order to BGSAVE or BGREWRITEAOF. */
+static void waitZeroActiveThreads(void) {
+ while(1) {
+ lockThreadedIO();
+ if (server.io_active_threads == 0) {
+ unlockThreadedIO();
+ return;
+ }
+ unlockThreadedIO();
+ usleep(10000); /* 10 milliseconds */
+ }
+}
+
/* This function must be called while with threaded IO locked */
static void queueIOJob(iojob *j) {
redisLog(REDIS_DEBUG,"Queued IO Job %p type %d about key '%s'\n",