#
# maxmemory-samples 3
+################################# MDB ARCHIVAL ################################
+
+# When keys are delete due to the memory limit, they are forever lost. In
+# some situations, it is valuable to instead "archive" them by storing them
+# in another database, even if that database is slower than Redis. Turning
+# on keyarchive will store these keys to OpenLDAP's MDB, a very fast embedded
+# key/value storage system. When keys are next used, they will be recovered
+# back into Redis; further edits will not be saved back to the key archival
+# system until the key is again spilled to disk.
+
+keyarchive no
+
+# The directory in which the database files will reside.
+#
+# mdb-environment archive
+
+# Set the size of the memory map to use for this environment.
+#
+# mdb-mapsize 10485760
+
############################## APPEND ONLY MODE ###############################
# By default Redis asynchronously dumps the dataset on disk. This mode is
endif
# Include paths to dependencies
-FINAL_CFLAGS+= -I../deps/hiredis -I../deps/linenoise -I../deps/lua/src
+FINAL_CFLAGS+= -I../deps/hiredis -I../deps/liblmdb -I../deps/linenoise -I../deps/lua/src
ifeq ($(MALLOC),tcmalloc)
FINAL_CFLAGS+= -DUSE_TCMALLOC
FINAL_LIBS+= ../deps/jemalloc/lib/libjemalloc.a -ldl
endif
+FINAL_LIBS+= ../deps/liblmdb/liblmdb.a
+
REDIS_CC=$(QUIET_CC)$(CC) $(FINAL_CFLAGS)
REDIS_LD=$(QUIET_LINK)$(CC) $(FINAL_LDFLAGS)
REDIS_INSTALL=$(QUIET_INSTALL)$(INSTALL)
if ((server.daemonize = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
+ } else if (!strcasecmp(argv[0],"keyarchive") && argc == 2) {
+ int yes;
+
+ if ((yes = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ server.mdb_state = yes ? REDIS_MDB_ON : REDIS_MDB_OFF;
+ } else if (!strcasecmp(argv[0],"mdb-environment") && argc == 2) {
+ zfree(server.mdb_environment);
+ server.mdb_environment = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"mdb-mapsize") && argc == 2) {
+ server.mdb_mapsize = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
int yes;
val->lru = server.lruclock;
return val;
} else {
- return NULL;
+ return recover(db, key);
}
}
robj *lookupKeyWrite(redisDb *db, robj *key) {
expireIfNeeded(db,key);
- return lookupKey(db,key);
+ robj *val = lookupKey(db,key);
+ if (val) val->archived = 0;
+ return val;
}
robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply) {
#include "redis.h"
#include "endianconv.h"
+#include <sys/stat.h>
+#include <lmdb.h>
+
/* -----------------------------------------------------------------------------
* DUMP, RESTORE and MIGRATE commands
* -------------------------------------------------------------------------- */
+MDB_env *env;
+MDB_dbi dbi;
+
/* Generates a DUMP-format representation of the object 'o', adding it to the
* io stream pointed by 'rio'. This function can't fail. */
void createDumpPayload(rio *payload, robj *o) {
close(fd);
return;
}
+
+void mdb(void) {
+ int ret;
+
+ if (env != NULL)
+ return;
+
+ ret = mdb_env_create(&env);
+ redisAssert(ret == 0);
+
+ ret = mdb_env_set_mapsize(env, server.mdb_mapsize);
+ redisAssert(ret == 0);
+
+ ret = mdb_env_set_maxdbs(env, 1);
+ redisAssert(ret == 0);
+
+ mkdir(server.mdb_environment, 0644);
+
+ ret = mdb_env_open(env, server.mdb_environment, MDB_FIXEDMAP | MDB_NOSYNC, 0664);
+ redisAssert(ret == 0);
+
+ MDB_txn *txn;
+ ret = mdb_txn_begin(env, NULL, 0, &txn);
+ redisAssert(ret == 0);
+
+ ret = mdb_open(txn, NULL, 0, &dbi);
+ redisAssert(ret == 0);
+
+ mdb_txn_commit(txn);
+}
+
+int archive(redisDb *db, robj *key) {
+ if (server.mdb_state == REDIS_MDB_OFF)
+ return 1;
+
+ mdb();
+
+ MDB_val kval;
+ kval.mv_data = key->ptr;
+ kval.mv_size = sdslen((sds)key->ptr);
+
+ robj *object;
+ object = lookupKey(db, key);
+ if (object == NULL)
+ return 0;
+
+ if (object->archived != 0)
+ return 0;
+
+ rio payload;
+ createDumpPayload(&payload, object);
+
+ MDB_val dval;
+ dval.mv_size = sdslen(payload.io.buffer.ptr);
+ dval.mv_data = payload.io.buffer.ptr;
+
+ int ret;
+
+ MDB_txn *txn;
+ ret = mdb_txn_begin(env, NULL, 0, &txn);
+ if (ret != 0)
+ goto archive_err;
+
+ ret = mdb_put(txn, dbi, &kval, &dval, 0);
+ if (ret != 0) {
+ mdb_txn_abort(txn);
+ goto archive_err;
+ }
+
+ mdb_txn_commit(txn);
+ sdsfree(payload.io.buffer.ptr);
+ return 1;
+
+archive_err:
+ sdsfree(payload.io.buffer.ptr);
+ redisAssert(0);
+ return 0;
+}
+
+robj *recover(redisDb *db, robj *key) {
+ if (server.mdb_state == REDIS_MDB_OFF)
+ return NULL;
+
+ int ret;
+
+ mdb();
+
+ MDB_val kval;
+ kval.mv_data = key->ptr;
+ kval.mv_size = sdslen((sds)key->ptr);
+
+ MDB_txn *txn;
+ ret = mdb_txn_begin(env, NULL, 0, &txn);
+ if (ret != 0)
+ return NULL;
+
+ MDB_cursor *cursor;
+ ret = mdb_cursor_open(txn, dbi, &cursor);
+ if (ret != 0) {
+ mdb_txn_abort(txn);
+ return NULL;
+ }
+
+ MDB_val pval;
+ ret = mdb_cursor_get(cursor, &kval, &pval, MDB_SET);
+ if (ret != 0) {
+ mdb_txn_abort(txn);
+ return NULL;
+ }
+
+ sds sval = sdsnewlen(pval.mv_data, pval.mv_size);
+ mdb_cursor_close(cursor);
+ mdb_txn_abort(txn);
+
+ rio payload;
+ rioInitWithBuffer(&payload, sval);
+
+ int type = rdbLoadObjectType(&payload);
+ if (type == -1)
+ goto recover_err;
+
+ robj *object = rdbLoadObject(type, &payload);
+ if (object == NULL)
+ goto recover_err;
+
+ object->archived = 1;
+
+ dbAdd(db, key, object);
+ signalModifiedKey(db, key);
+ server.dirty++;
+
+ sdsfree(sval);
+ return object;
+
+recover_err:
+ sdsfree(sval);
+ return NULL;
+}
robj *createObject(int type, void *ptr) {
robj *o = zmalloc(sizeof(*o));
o->type = type;
+ o->archived = 0;
o->encoding = REDIS_ENCODING_RAW;
o->ptr = ptr;
o->refcount = 1;
server.pidfile = zstrdup("/var/run/redis.pid");
server.rdb_filename = zstrdup("dump.rdb");
server.aof_filename = zstrdup("appendonly.aof");
+ server.mdb_state = REDIS_MDB_OFF;
+ server.mdb_environment = zstrdup("archive");
+ server.mdb_mapsize = 10485760;
server.requirepass = NULL;
server.rdb_compression = 1;
server.rdb_checksum = 1;
long long delta;
robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
+ int archived = archive(db, keyobj);
+ redisAssert(archived != 0);
propagateExpire(db,keyobj);
/* We compute the amount of memory freed by dbDelete() alone.
* It is possible that actually the memory needed to propagate
#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
+/* MDB states */
+#define REDIS_MDB_OFF 0 /* MDB is off */
+#define REDIS_MDB_ON 1 /* MDB is on */
+
/* AOF states */
#define REDIS_AOF_OFF 0 /* AOF is off */
#define REDIS_AOF_ON 1 /* AOF is on */
#define REDIS_LRU_CLOCK_RESOLUTION 10 /* LRU clock resolution in seconds */
typedef struct redisObject {
unsigned type:4;
- unsigned notused:2; /* Not used */
+ unsigned archived:1;
+ unsigned notused:1; /* Not used */
unsigned encoding:4;
unsigned lru:22; /* lru time (relative to server.lruclock) */
int refcount;
time_t rdb_save_time_start; /* Current RDB save start time. */
int lastbgsave_status; /* REDIS_OK or REDIS_ERR */
int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
+ /* MDB archival */
+ int mdb_state; /* REDIS_MDB_(ON|OFF) */
+ char *mdb_environment; /* Name of the MDB file */
+ size_t mdb_mapsize; /* Map size for use with MDB */
/* Propagation of commands in AOF / replication */
redisOpArray also_propagate; /* Additional command to propagate. */
/* Logging */
void signalFlushedDb(int dbid);
unsigned int GetKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count);
+/* external database archival */
+robj *recover(redisDb *db, robj *key);
+int archive(redisDb *db, robj *key);
+
/* API to get key arguments from commands */
#define REDIS_GETKEYS_ALL 0
#define REDIS_GETKEYS_PRELOAD 1