return REDIS_OK;
}
+/*-----------------------------------------------------------------------------
+ * Hooks for key space changes.
+ *
+ * Every time a key in the database is modified the function
+ * signalModifiedKey() is called.
+ *
+ * Every time a DB is flushed the function signalFlushDb() is called.
+ *----------------------------------------------------------------------------*/
+
+void signalModifiedKey(redisDb *db, robj *key) {
+ touchWatchedKey(db,key);
+ if (server.ds_enabled)
+ cacheScheduleForFlush(db,key);
+}
+
+void signalFlushedDb(int dbid) {
+ touchWatchedKeysOnFlush(dbid);
+ if (server.ds_enabled)
+ dsFlushDb(dbid);
+}
+
/*-----------------------------------------------------------------------------
* Type agnostic commands operating on the key space
*----------------------------------------------------------------------------*/
void flushdbCommand(redisClient *c) {
server.dirty += dictSize(c->db->dict);
- touchWatchedKeysOnFlush(c->db->id);
+ signalFlushedDb(c->db->id);
dictEmpty(c->db->dict);
dictEmpty(c->db->expires);
addReply(c,shared.ok);
}
void flushallCommand(redisClient *c) {
- touchWatchedKeysOnFlush(-1);
+ signalFlushedDb(-1);
server.dirty += emptyDb();
addReply(c,shared.ok);
if (server.bgsavechildpid != -1) {
for (j = 1; j < c->argc; j++) {
if (dbDelete(c->db,c->argv[j])) {
- touchWatchedKey(c->db,c->argv[j]);
+ signalModifiedKey(c->db,c->argv[j]);
server.dirty++;
deleted++;
}
dbReplace(c->db,c->argv[2],o);
}
dbDelete(c->db,c->argv[1]);
- touchWatchedKey(c->db,c->argv[1]);
- touchWatchedKey(c->db,c->argv[2]);
+ signalModifiedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[2]);
server.dirty++;
addReply(c,nx ? shared.cone : shared.ok);
}
if (seconds <= 0) {
if (dbDelete(c->db,key)) server.dirty++;
addReply(c, shared.cone);
- touchWatchedKey(c->db,key);
+ signalModifiedKey(c->db,key);
return;
} else {
time_t when = time(NULL)+seconds;
setExpire(c->db,key,when);
addReply(c,shared.cone);
- touchWatchedKey(c->db,key);
+ signalModifiedKey(c->db,key);
server.dirty++;
return;
}
int dsExists(redisDb *db, robj *key) {
}
+
+int dsFlushDb(int dbid) {
+}
/* Ok, this object can be encoded...
*
* Can I use a shared object? Only if the object is inside a given
- * range and if this is the main thread, since when VM is enabled we
- * have the constraint that I/O thread should only handle non-shared
- * objects, in order to avoid race conditions (we don't have per-object
- * locking).
+ * range and if the back end in use is in-memory. For disk store every
+ * object in memory used as value should be independent.
*
* Note that we also avoid using shared integers when maxmemory is used
- * because very object needs to have a private LRU field for the LRU
+ * because every object needs to have a private LRU field for the LRU
* algorithm to work well. */
- if (server.maxmemory == 0 && value >= 0 && value < REDIS_SHARED_INTEGERS &&
- pthread_equal(pthread_self(),server.mainthread)) {
+ if (server.ds_enabled == 0 &&
+ server.maxmemory == 0 && value >= 0 && value < REDIS_SHARED_INTEGERS &&
+ pthread_equal(pthread_self(),server.mainthread))
+ {
decrRefCount(o);
incrRefCount(shared.integers[value]);
return shared.integers[value];
FILE *fp;
uint32_t dbid;
int type, retval, rdbver;
- int swap_all_values = 0;
redisDb *db = server.db+0;
char buf[1024];
time_t expiretime, now = time(NULL);
/* Set the expire time if needed */
if (expiretime != -1) setExpire(db,key,expiretime);
- /* Handle swapping while loading big datasets when VM is on */
-
- /* If we detecter we are hopeless about fitting something in memory
- * we just swap every new key on disk. Directly...
- * Note that's important to check for this condition before resorting
- * to random sampling, otherwise we may try to swap already
- * swapped keys. */
- if (swap_all_values) {
- dictEntry *de = dictFind(db->dict,key->ptr);
-
- /* de may be NULL since the key already expired */
- if (de) {
- vmpointer *vp;
- val = dictGetEntryVal(de);
-
- if (val->refcount == 1 &&
- (vp = vmSwapObjectBlocking(val)) != NULL)
- dictGetEntryVal(de) = vp;
- }
- decrRefCount(key);
- continue;
- }
decrRefCount(key);
}
fclose(fp);
server.slaves = listCreate();
server.monitors = listCreate();
server.unblocked_clients = listCreate();
+ server.cache_flush_queue = listCreate();
+ server.cache_flush_delay = 0;
+
createSharedObjects();
server.el = aeCreateEventLoop();
server.db = zmalloc(sizeof(redisDb)*server.dbnum);
/* Blocked clients */
unsigned int bpop_blocked_clients;
unsigned int cache_blocked_clients;
- list *unblocked_clients;
+ list *unblocked_clients; /* list of clients to unblock before next loop */
+ list *cache_flush_queue; /* keys to flush on disk */
+ int cache_flush_delay; /* seconds to wait before flushing keys */
/* Sort parameters - qsort_r() is only available under BSD so we
* have to take this state global, in order to pass it to sortCompare() */
int sort_desc;
* field is populated by the I/O thread for REDIS_IOJOB_LOAD. */
} iojob;
+/* When diskstore is enabled and a flush operation is requested we push
+ * one of this structures into server.cache_flush_queue. */
+typedef struct dirtykey {
+ redisDb *db;
+ robj *key;
+ time_t ctime; /* This is the creation time of the entry. */
+} dirtykey;
+
/* Structure to hold list iteration abstraction. */
typedef struct {
robj *subject;
robj *dsGet(redisDb *db, robj *key);
int dsDel(redisDb *db, robj *key);
int dsExists(redisDb *db, robj *key);
+int dsFlushDb(int dbid);
/* Disk Store Cache */
-void vmInit(void);
-void vmMarkPagesFree(off_t page, off_t count);
-robj *vmLoadObject(robj *o);
-robj *vmPreviewObject(robj *o);
-int vmSwapOneObjectBlocking(void);
-int vmSwapOneObjectThreaded(void);
-int vmCanSwapOut(void);
+void dsInit(void);
void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, int mask);
-void vmCancelThreadedIOJob(robj *o);
void lockThreadedIO(void);
void unlockThreadedIO(void);
-int vmSwapObjectThreaded(robj *key, robj *val, redisDb *db);
void freeIOJob(iojob *j);
void queueIOJob(iojob *j);
-int vmWriteObjectOnSwap(robj *o, off_t page);
-robj *vmReadObjectFromSwap(off_t page, int type);
void waitEmptyIOJobsQueue(void);
-void vmReopenSwapFile(void);
-int vmFreePage(off_t page);
void zunionInterBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
void execBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
int blockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd);
int dontWaitForSwappedKey(redisClient *c, robj *key);
void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key);
-vmpointer *vmSwapObjectBlocking(robj *val);
+int cacheFreeOneEntry(void);
/* Set data type */
robj *setTypeCreate(robj *value);
int dbDelete(redisDb *db, robj *key);
long long emptyDb();
int selectDb(redisClient *c, int id);
+void signalModifiedKey(redisDb *db, robj *key);
+void signalFlushedDb(int dbid);
/* Git SHA1 */
char *redisGitSHA1(void);
* SORT result is empty a new key is set and maybe the old content
* replaced. */
server.dirty += 1+outputlen;
- touchWatchedKey(c->db,storekey);
+ signalModifiedKey(c->db,storekey);
addReplyLongLong(c,outputlen);
}
hashTypeTryObjectEncoding(o,&c->argv[2], &c->argv[3]);
update = hashTypeSet(o,c->argv[2],c->argv[3]);
addReply(c, update ? shared.czero : shared.cone);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
hashTypeTryObjectEncoding(o,&c->argv[2], &c->argv[3]);
hashTypeSet(o,c->argv[2],c->argv[3]);
addReply(c, shared.cone);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
}
hashTypeSet(o,c->argv[i],c->argv[i+1]);
}
addReply(c, shared.ok);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
hashTypeSet(o,c->argv[2],new);
decrRefCount(new);
addReplyLongLong(c,value);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
if (hashTypeDelete(o,c->argv[2])) {
if (hashTypeLength(o) == 0) dbDelete(c->db,c->argv[1]);
addReply(c,shared.cone);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
} else {
addReply(c,shared.czero);
return;
}
if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
addReply(c,shared.cone);
return;
}
}
listTypePush(lobj,c->argv[2],where);
addReplyLongLong(c,listTypeLength(lobj));
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
ziplistLen(subject->ptr) > server.list_max_ziplist_entries)
listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
} else {
/* Notify client of a failed insert */
}
} else {
listTypePush(subject,val,where);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
o->ptr = ziplistInsert(o->ptr,p,value->ptr,sdslen(value->ptr));
decrRefCount(value);
addReply(c,shared.ok);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
} else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
listNodeValue(ln) = value;
incrRefCount(value);
addReply(c,shared.ok);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
} else {
addReplyBulk(c,value);
decrRefCount(value);
if (listTypeLength(o) == 0) dbDelete(c->db,c->argv[1]);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
}
redisPanic("Unknown list encoding");
}
if (listTypeLength(o) == 0) dbDelete(c->db,c->argv[1]);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
addReply(c,shared.ok);
}
if (listTypeLength(subject) == 0) dbDelete(c->db,c->argv[1]);
addReplyLongLong(c,removed);
- if (removed) touchWatchedKey(c->db,c->argv[1]);
+ if (removed) signalModifiedKey(c->db,c->argv[1]);
}
/* This is the semantic of this command:
dstobj = createZiplistObject();
dbAdd(c->db,dstkey,dstobj);
} else {
- touchWatchedKey(c->db,dstkey);
+ signalModifiedKey(c->db,dstkey);
server.dirty++;
}
listTypePush(dstobj,value,REDIS_HEAD);
/* Delete the source list when it is empty */
if (listTypeLength(sobj) == 0) dbDelete(c->db,c->argv[1]);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
}
}
}
if (setTypeAdd(set,c->argv[2])) {
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
addReply(c,shared.cone);
} else {
c->argv[2] = tryObjectEncoding(c->argv[2]);
if (setTypeRemove(set,c->argv[2])) {
if (setTypeSize(set) == 0) dbDelete(c->db,c->argv[1]);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
addReply(c,shared.cone);
} else {
/* Remove the src set from the database when empty */
if (setTypeSize(srcset) == 0) dbDelete(c->db,c->argv[1]);
- touchWatchedKey(c->db,c->argv[1]);
- touchWatchedKey(c->db,c->argv[2]);
+ signalModifiedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[2]);
server.dirty++;
/* Create the destination set when it doesn't exist */
setTypeRemove(set,ele);
}
if (setTypeSize(set) == 0) dbDelete(c->db,c->argv[1]);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
zfree(sets);
if (dstkey) {
if (dbDelete(c->db,dstkey)) {
- touchWatchedKey(c->db,dstkey);
+ signalModifiedKey(c->db,dstkey);
server.dirty++;
}
addReply(c,shared.czero);
decrRefCount(dstset);
addReply(c,shared.czero);
}
- touchWatchedKey(c->db,dstkey);
+ signalModifiedKey(c->db,dstkey);
server.dirty++;
} else {
setDeferredMultiBulkLength(c,replylen,cardinality);
decrRefCount(dstset);
addReply(c,shared.czero);
}
- touchWatchedKey(c->db,dstkey);
+ signalModifiedKey(c->db,dstkey);
server.dirty++;
}
zfree(sets);
} else {
incrRefCount(val);
}
- touchWatchedKey(c->db,key);
+ signalModifiedKey(c->db,key);
server.dirty++;
removeExpire(c->db,key);
if (expire) setExpire(c->db,key,time(NULL)+seconds);
c->argv[2] = tryObjectEncoding(c->argv[2]);
dbReplace(c->db,c->argv[1],c->argv[2]);
incrRefCount(c->argv[2]);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
removeExpire(c->db,c->argv[1]);
}
byteval &= ~(1 << bit);
byteval |= ((on & 0x1) << bit);
((char*)o->ptr)[byte] = byteval;
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
addReply(c, bitval ? shared.cone : shared.czero);
}
if (sdslen(value) > 0) {
o->ptr = sdsgrowzero(o->ptr,offset+sdslen(value));
memcpy((char*)o->ptr+offset,value,sdslen(value));
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
addReplyLongLong(c,sdslen(o->ptr));
dbReplace(c->db,c->argv[j],c->argv[j+1]);
incrRefCount(c->argv[j+1]);
removeExpire(c->db,c->argv[j]);
- touchWatchedKey(c->db,c->argv[j]);
+ signalModifiedKey(c->db,c->argv[j]);
}
server.dirty += (c->argc-1)/2;
addReply(c, nx ? shared.cone : shared.ok);
}
o = createStringObjectFromLongLong(value);
dbReplace(c->db,c->argv[1],o);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
addReply(c,shared.colon);
addReply(c,o);
o->ptr = sdscatlen(o->ptr,append->ptr,sdslen(append->ptr));
totlen = sdslen(o->ptr);
}
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
addReplyLongLong(c,totlen);
}
de = dictFind(zs->dict,ele);
redisAssert(de != NULL);
dictGetEntryVal(de) = &znode->score;
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
if (incr)
addReplyDouble(c,score);
/* Update the score in the current dict entry */
dictGetEntryVal(de) = &znode->score;
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
if (incr)
dictDelete(zs->dict,c->argv[2]);
if (htNeedsResize(zs->dict)) dictResize(zs->dict);
if (dictSize(zs->dict) == 0) dbDelete(c->db,c->argv[1]);
- touchWatchedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
addReply(c,shared.cone);
}
deleted = zslDeleteRangeByScore(zs->zsl,range,zs->dict);
if (htNeedsResize(zs->dict)) dictResize(zs->dict);
if (dictSize(zs->dict) == 0) dbDelete(c->db,c->argv[1]);
- if (deleted) touchWatchedKey(c->db,c->argv[1]);
+ if (deleted) signalModifiedKey(c->db,c->argv[1]);
server.dirty += deleted;
addReplyLongLong(c,deleted);
}
deleted = zslDeleteRangeByRank(zs->zsl,start+1,end+1,zs->dict);
if (htNeedsResize(zs->dict)) dictResize(zs->dict);
if (dictSize(zs->dict) == 0) dbDelete(c->db,c->argv[1]);
- if (deleted) touchWatchedKey(c->db,c->argv[1]);
+ if (deleted) signalModifiedKey(c->db,c->argv[1]);
server.dirty += deleted;
addReplyLongLong(c, deleted);
}
}
if (dbDelete(c->db,dstkey)) {
- touchWatchedKey(c->db,dstkey);
+ signalModifiedKey(c->db,dstkey);
touched = 1;
server.dirty++;
}
if (dstzset->zsl->length) {
dbAdd(c->db,dstkey,dstobj);
addReplyLongLong(c, dstzset->zsl->length);
- if (!touched) touchWatchedKey(c->db,dstkey);
+ if (!touched) signalModifiedKey(c->db,dstkey);
server.dirty++;
} else {
decrRefCount(dstobj);