From: Jay Freeman (saurik) Date: Sun, 23 Dec 2012 07:34:59 +0000 (+0000) Subject: Archive (do not delete) keys due to memory limits. X-Git-Url: https://git.saurik.com/redis.git/commitdiff_plain/8e2a225aa6cd1848a282d3d96742759cc24fc7e8?ds=sidebyside Archive (do not delete) keys due to memory limits. (This archival is done using OpenLDAP's MDB.) --- diff --git a/redis.conf b/redis.conf index 97aea334..0920fe73 100644 --- a/redis.conf +++ b/redis.conf @@ -311,6 +311,26 @@ slave-priority 100 # # maxmemory-samples 3 +################################# MDB ARCHIVAL ################################ + +# When keys are delete due to the memory limit, they are forever lost. In +# some situations, it is valuable to instead "archive" them by storing them +# in another database, even if that database is slower than Redis. Turning +# on keyarchive will store these keys to OpenLDAP's MDB, a very fast embedded +# key/value storage system. When keys are next used, they will be recovered +# back into Redis; further edits will not be saved back to the key archival +# system until the key is again spilled to disk. + +keyarchive no + +# The directory in which the database files will reside. +# +# mdb-environment archive + +# Set the size of the memory map to use for this environment. +# +# mdb-mapsize 10485760 + ############################## APPEND ONLY MODE ############################### # By default Redis asynchronously dumps the dataset on disk. This mode is diff --git a/src/Makefile b/src/Makefile index 4049442b..daf9821a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -58,7 +58,7 @@ else endif # Include paths to dependencies -FINAL_CFLAGS+= -I../deps/hiredis -I../deps/linenoise -I../deps/lua/src +FINAL_CFLAGS+= -I../deps/hiredis -I../deps/liblmdb -I../deps/linenoise -I../deps/lua/src ifeq ($(MALLOC),tcmalloc) FINAL_CFLAGS+= -DUSE_TCMALLOC @@ -76,6 +76,8 @@ ifeq ($(MALLOC),jemalloc) FINAL_LIBS+= ../deps/jemalloc/lib/libjemalloc.a -ldl endif +FINAL_LIBS+= ../deps/liblmdb/liblmdb.a + REDIS_CC=$(QUIET_CC)$(CC) $(FINAL_CFLAGS) REDIS_LD=$(QUIET_LINK)$(CC) $(FINAL_LDFLAGS) REDIS_INSTALL=$(QUIET_INSTALL)$(INSTALL) diff --git a/src/config.c b/src/config.c index 7e8f8a2b..c4e45c8b 100644 --- a/src/config.c +++ b/src/config.c @@ -256,6 +256,18 @@ void loadServerConfigFromString(char *config) { if ((server.daemonize = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; } + } else if (!strcasecmp(argv[0],"keyarchive") && argc == 2) { + int yes; + + if ((yes = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } + server.mdb_state = yes ? REDIS_MDB_ON : REDIS_MDB_OFF; + } else if (!strcasecmp(argv[0],"mdb-environment") && argc == 2) { + zfree(server.mdb_environment); + server.mdb_environment = zstrdup(argv[1]); + } else if (!strcasecmp(argv[0],"mdb-mapsize") && argc == 2) { + server.mdb_mapsize = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) { int yes; diff --git a/src/db.c b/src/db.c index 59479574..ad6e7114 100644 --- a/src/db.c +++ b/src/db.c @@ -51,7 +51,7 @@ robj *lookupKey(redisDb *db, robj *key) { val->lru = server.lruclock; return val; } else { - return NULL; + return recover(db, key); } } @@ -69,7 +69,9 @@ robj *lookupKeyRead(redisDb *db, robj *key) { robj *lookupKeyWrite(redisDb *db, robj *key) { expireIfNeeded(db,key); - return lookupKey(db,key); + robj *val = lookupKey(db,key); + if (val) val->archived = 0; + return val; } robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply) { diff --git a/src/migrate.c b/src/migrate.c index 1dc9a753..a00f0e63 100644 --- a/src/migrate.c +++ b/src/migrate.c @@ -1,10 +1,16 @@ #include "redis.h" #include "endianconv.h" +#include +#include + /* ----------------------------------------------------------------------------- * DUMP, RESTORE and MIGRATE commands * -------------------------------------------------------------------------- */ +MDB_env *env; +MDB_dbi dbi; + /* Generates a DUMP-format representation of the object 'o', adding it to the * io stream pointed by 'rio'. This function can't fail. */ void createDumpPayload(rio *payload, robj *o) { @@ -245,3 +251,141 @@ socket_rd_err: close(fd); return; } + +void mdb(void) { + int ret; + + if (env != NULL) + return; + + ret = mdb_env_create(&env); + redisAssert(ret == 0); + + ret = mdb_env_set_mapsize(env, server.mdb_mapsize); + redisAssert(ret == 0); + + ret = mdb_env_set_maxdbs(env, 1); + redisAssert(ret == 0); + + mkdir(server.mdb_environment, 0644); + + ret = mdb_env_open(env, server.mdb_environment, MDB_FIXEDMAP | MDB_NOSYNC, 0664); + redisAssert(ret == 0); + + MDB_txn *txn; + ret = mdb_txn_begin(env, NULL, 0, &txn); + redisAssert(ret == 0); + + ret = mdb_open(txn, NULL, 0, &dbi); + redisAssert(ret == 0); + + mdb_txn_commit(txn); +} + +int archive(redisDb *db, robj *key) { + if (server.mdb_state == REDIS_MDB_OFF) + return 1; + + mdb(); + + MDB_val kval; + kval.mv_data = key->ptr; + kval.mv_size = sdslen((sds)key->ptr); + + robj *object; + object = lookupKey(db, key); + if (object == NULL) + return 0; + + if (object->archived != 0) + return 0; + + rio payload; + createDumpPayload(&payload, object); + + MDB_val dval; + dval.mv_size = sdslen(payload.io.buffer.ptr); + dval.mv_data = payload.io.buffer.ptr; + + int ret; + + MDB_txn *txn; + ret = mdb_txn_begin(env, NULL, 0, &txn); + if (ret != 0) + goto archive_err; + + ret = mdb_put(txn, dbi, &kval, &dval, 0); + if (ret != 0) { + mdb_txn_abort(txn); + goto archive_err; + } + + mdb_txn_commit(txn); + sdsfree(payload.io.buffer.ptr); + return 1; + +archive_err: + sdsfree(payload.io.buffer.ptr); + redisAssert(0); + return 0; +} + +robj *recover(redisDb *db, robj *key) { + if (server.mdb_state == REDIS_MDB_OFF) + return NULL; + + int ret; + + mdb(); + + MDB_val kval; + kval.mv_data = key->ptr; + kval.mv_size = sdslen((sds)key->ptr); + + MDB_txn *txn; + ret = mdb_txn_begin(env, NULL, 0, &txn); + if (ret != 0) + return NULL; + + MDB_cursor *cursor; + ret = mdb_cursor_open(txn, dbi, &cursor); + if (ret != 0) { + mdb_txn_abort(txn); + return NULL; + } + + MDB_val pval; + ret = mdb_cursor_get(cursor, &kval, &pval, MDB_SET); + if (ret != 0) { + mdb_txn_abort(txn); + return NULL; + } + + sds sval = sdsnewlen(pval.mv_data, pval.mv_size); + mdb_cursor_close(cursor); + mdb_txn_abort(txn); + + rio payload; + rioInitWithBuffer(&payload, sval); + + int type = rdbLoadObjectType(&payload); + if (type == -1) + goto recover_err; + + robj *object = rdbLoadObject(type, &payload); + if (object == NULL) + goto recover_err; + + object->archived = 1; + + dbAdd(db, key, object); + signalModifiedKey(db, key); + server.dirty++; + + sdsfree(sval); + return object; + +recover_err: + sdsfree(sval); + return NULL; +} diff --git a/src/object.c b/src/object.c index c2b89709..38987e26 100644 --- a/src/object.c +++ b/src/object.c @@ -35,6 +35,7 @@ robj *createObject(int type, void *ptr) { robj *o = zmalloc(sizeof(*o)); o->type = type; + o->archived = 0; o->encoding = REDIS_ENCODING_RAW; o->ptr = ptr; o->refcount = 1; diff --git a/src/redis.c b/src/redis.c index eb4418e7..b1c34ebb 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1119,6 +1119,9 @@ void initServerConfig() { server.pidfile = zstrdup("/var/run/redis.pid"); server.rdb_filename = zstrdup("dump.rdb"); server.aof_filename = zstrdup("appendonly.aof"); + server.mdb_state = REDIS_MDB_OFF; + server.mdb_environment = zstrdup("archive"); + server.mdb_mapsize = 10485760; server.requirepass = NULL; server.rdb_compression = 1; server.rdb_checksum = 1; @@ -2337,6 +2340,8 @@ int freeMemoryIfNeeded(void) { long long delta; robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); + int archived = archive(db, keyobj); + redisAssert(archived != 0); propagateExpire(db,keyobj); /* We compute the amount of memory freed by dbDelete() alone. * It is possible that actually the memory needed to propagate diff --git a/src/redis.h b/src/redis.h index 071cb534..6fe6cfc6 100644 --- a/src/redis.h +++ b/src/redis.h @@ -163,6 +163,10 @@ #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */ #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */ +/* MDB states */ +#define REDIS_MDB_OFF 0 /* MDB is off */ +#define REDIS_MDB_ON 1 /* MDB is on */ + /* AOF states */ #define REDIS_AOF_OFF 0 /* AOF is off */ #define REDIS_AOF_ON 1 /* AOF is on */ @@ -310,7 +314,8 @@ #define REDIS_LRU_CLOCK_RESOLUTION 10 /* LRU clock resolution in seconds */ typedef struct redisObject { unsigned type:4; - unsigned notused:2; /* Not used */ + unsigned archived:1; + unsigned notused:1; /* Not used */ unsigned encoding:4; unsigned lru:22; /* lru time (relative to server.lruclock) */ int refcount; @@ -589,6 +594,10 @@ struct redisServer { time_t rdb_save_time_start; /* Current RDB save start time. */ int lastbgsave_status; /* REDIS_OK or REDIS_ERR */ int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */ + /* MDB archival */ + int mdb_state; /* REDIS_MDB_(ON|OFF) */ + char *mdb_environment; /* Name of the MDB file */ + size_t mdb_mapsize; /* Map size for use with MDB */ /* Propagation of commands in AOF / replication */ redisOpArray also_propagate; /* Additional command to propagate. */ /* Logging */ @@ -1024,6 +1033,10 @@ void signalModifiedKey(redisDb *db, robj *key); void signalFlushedDb(int dbid); unsigned int GetKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count); +/* external database archival */ +robj *recover(redisDb *db, robj *key); +int archive(redisDb *db, robj *key); + /* API to get key arguments from commands */ #define REDIS_GETKEYS_ALL 0 #define REDIS_GETKEYS_PRELOAD 1