#include "redis.h"
+#include "endianconv.h"
+
+#include <sys/stat.h>
+#include <lmdb.h>
/* -----------------------------------------------------------------------------
- * RESTORE and MIGRATE commands
+ * DUMP, RESTORE and MIGRATE commands
* -------------------------------------------------------------------------- */
+MDB_env *env;
+MDB_dbi dbi;
+
+/* Generates a DUMP-format representation of the object 'o', adding it to the
+ * io stream pointed by 'rio'. This function can't fail. */
+void createDumpPayload(rio *payload, robj *o) {
+ unsigned char buf[2];
+ uint64_t crc;
+
+ /* Serialize the object in a RDB-like format. It consist of an object type
+ * byte followed by the serialized object. This is understood by RESTORE. */
+ rioInitWithBuffer(payload,sdsempty());
+ redisAssert(rdbSaveObjectType(payload,o));
+ redisAssert(rdbSaveObject(payload,o));
+
+ /* Write the footer, this is how it looks like:
+ * ----------------+---------------------+---------------+
+ * ... RDB payload | 2 bytes RDB version | 8 bytes CRC64 |
+ * ----------------+---------------------+---------------+
+ * RDB version and CRC are both in little endian.
+ */
+
+ /* RDB version */
+ buf[0] = REDIS_RDB_VERSION & 0xff;
+ buf[1] = (REDIS_RDB_VERSION >> 8) & 0xff;
+ payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,buf,2);
+
+ /* CRC64 */
+ crc = crc64(0,(unsigned char*)payload->io.buffer.ptr,
+ sdslen(payload->io.buffer.ptr));
+ memrev64ifbe(&crc);
+ payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,&crc,8);
+}
+
+/* Verify that the RDB version of the dump payload matches the one of this Redis
+ * instance and that the checksum is ok.
+ * If the DUMP payload looks valid REDIS_OK is returned, otherwise REDIS_ERR
+ * is returned. */
+int verifyDumpPayload(unsigned char *p, size_t len) {
+ unsigned char *footer;
+ uint16_t rdbver;
+ uint64_t crc;
+
+ /* At least 2 bytes of RDB version and 8 of CRC64 should be present. */
+ if (len < 10) return REDIS_ERR;
+ footer = p+(len-10);
+
+ /* Verify RDB version */
+ rdbver = (footer[1] << 8) | footer[0];
+ if (rdbver != REDIS_RDB_VERSION) return REDIS_ERR;
+
+ /* Verify CRC64 */
+ crc = crc64(0,p,len-8);
+ memrev64ifbe(&crc);
+ return (memcmp(&crc,footer+2,8) == 0) ? REDIS_OK : REDIS_ERR;
+}
+
+/* DUMP keyname
+ * DUMP is actually not used by Redis Cluster but it is the obvious
+ * complement of RESTORE and can be useful for different applications. */
+void dumpCommand(redisClient *c) {
+ robj *o, *dumpobj;
+ rio payload;
+
+ /* Check if the key is here. */
+ if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) {
+ addReply(c,shared.nullbulk);
+ return;
+ }
+
+ /* Create the DUMP encoded representation. */
+ createDumpPayload(&payload,o);
+
+ /* Transfer to the client */
+ dumpobj = createObject(REDIS_STRING,payload.io.buffer.ptr);
+ addReplyBulk(c,dumpobj);
+ decrRefCount(dumpobj);
+ return;
+}
+
/* RESTORE key ttl serialized-value */
void restoreCommand(redisClient *c) {
long ttl;
return;
}
+ /* Verify RDB version and data checksum. */
+ if (verifyDumpPayload(c->argv[3]->ptr,sdslen(c->argv[3]->ptr)) == REDIS_ERR) {
+ addReplyError(c,"DUMP payload version or checksum are wrong");
+ return;
+ }
+
rioInitWithBuffer(&payload,c->argv[3]->ptr);
if (((type = rdbLoadObjectType(&payload)) == -1) ||
((obj = rdbLoadObject(type,&payload)) == NULL))
/* Create the key and set the TTL if any */
dbAdd(c->db,c->argv[1],obj);
- if (ttl) setExpire(c->db,c->argv[1],time(NULL)+ttl);
+ if (ttl) setExpire(c->db,c->argv[1],mstime()+ttl);
signalModifiedKey(c->db,c->argv[1]);
addReply(c,shared.ok);
server.dirty++;
int fd;
long timeout;
long dbid;
- time_t ttl;
+ long long ttl = 0, expireat;
robj *o;
rio cmd, payload;
return;
if (getLongFromObjectOrReply(c,c->argv[4],&dbid,NULL) != REDIS_OK)
return;
- if (timeout <= 0) timeout = 1;
+ if (timeout <= 0) timeout = 1000;
/* Check if the key is here. If not we reply with success as there is
* nothing to migrate (for instance the key expired in the meantime), but
server.neterr);
return;
}
- if ((aeWait(fd,AE_WRITABLE,timeout*1000) & AE_WRITABLE) == 0) {
- addReplyError(c,"Timeout connecting to the client");
+ if ((aeWait(fd,AE_WRITABLE,timeout) & AE_WRITABLE) == 0) {
+ close(fd);
+ addReplySds(c,sdsnew("-IOERR error or timeout connecting to the client\r\n"));
return;
}
+ /* Create RESTORE payload and generate the protocol to call the command. */
rioInitWithBuffer(&cmd,sdsempty());
redisAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',2));
redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"SELECT",6));
redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,dbid));
- ttl = getExpire(c->db,c->argv[3]);
+ expireat = getExpire(c->db,c->argv[3]);
+ if (expireat != -1) {
+ ttl = expireat-mstime();
+ if (ttl < 1) ttl = 1;
+ }
redisAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',4));
redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"RESTORE",7));
redisAssertWithInfo(c,NULL,c->argv[3]->encoding == REDIS_ENCODING_RAW);
redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,c->argv[3]->ptr,sdslen(c->argv[3]->ptr)));
- redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,(ttl == -1) ? 0 : ttl));
+ redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,ttl));
/* Finally the last argument that is the serailized object payload
- * in the form: <type><rdb-serialized-object>. */
- rioInitWithBuffer(&payload,sdsempty());
- redisAssertWithInfo(c,NULL,rdbSaveObjectType(&payload,o));
- redisAssertWithInfo(c,NULL,rdbSaveObject(&payload,o) != -1);
- redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,payload.io.buffer.ptr,sdslen(payload.io.buffer.ptr)));
+ * in the DUMP format. */
+ createDumpPayload(&payload,o);
+ redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,payload.io.buffer.ptr,
+ sdslen(payload.io.buffer.ptr)));
sdsfree(payload.io.buffer.ptr);
/* Tranfer the query to the other node in 64K chunks. */
while ((towrite = sdslen(buf)-pos) > 0) {
towrite = (towrite > (64*1024) ? (64*1024) : towrite);
- nwritten = syncWrite(fd,buf+nwritten,towrite,timeout);
+ nwritten = syncWrite(fd,buf+pos,towrite,timeout);
if (nwritten != (signed)towrite) goto socket_wr_err;
pos += nwritten;
}
return;
socket_wr_err:
- redisLog(REDIS_NOTICE,"Can't write to target node for MIGRATE: %s",
- strerror(errno));
- addReplyErrorFormat(c,"MIGRATE failed, writing to target node: %s.",
- strerror(errno));
+ addReplySds(c,sdsnew("-IOERR error or timeout writing to target instance\r\n"));
sdsfree(cmd.io.buffer.ptr);
close(fd);
return;
socket_rd_err:
- redisLog(REDIS_NOTICE,"Can't read from target node for MIGRATE: %s",
- strerror(errno));
- addReplyErrorFormat(c,"MIGRATE failed, reading from target node: %s.",
- strerror(errno));
+ addReplySds(c,sdsnew("-IOERR error or timeout reading from target node\r\n"));
sdsfree(cmd.io.buffer.ptr);
close(fd);
return;
}
-/* DUMP keyname
- * DUMP is actually not used by Redis Cluster but it is the obvious
- * complement of RESTORE and can be useful for different applications. */
-void dumpCommand(redisClient *c) {
- robj *o, *dumpobj;
+void stopKeyArchive(void) {
+ redisAssert(env != NULL);
+
+ mdb_dbi_close(env, dbi);
+ mdb_env_close(env);
+ env = NULL;
+
+ server.mdb_state = REDIS_MDB_OFF;
+}
+
+int startKeyArchive(void) {
+ redisAssert(env == NULL);
+
+ int ret;
+
+ ret = mdb_env_create(&env);
+ if (ret != 0) return ret;
+
+ ret = mdb_env_set_mapsize(env, server.mdb_mapsize);
+ if (ret != 0) return ret;
+
+ ret = mdb_env_set_maxdbs(env, 1);
+ if (ret != 0) return ret;
+
+ mkdir(server.mdb_environment, 0755);
+
+ ret = mdb_env_open(env, server.mdb_environment, MDB_FIXEDMAP | MDB_NOSYNC, 0644);
+ if (ret != 0) return ret;
+
+ MDB_txn *txn;
+ ret = mdb_txn_begin(env, NULL, 0, &txn);
+ if (ret != 0) return ret;
+
+ ret = mdb_dbi_open(txn, NULL, 0, &dbi);
+ if (ret != 0) return ret;
+
+ mdb_txn_commit(txn);
+
+ server.mdb_state = REDIS_MDB_ON;
+ return 0;
+}
+
+int archive(redisDb *db, robj *key) {
+ if (server.mdb_state == REDIS_MDB_OFF)
+ return 1;
+ redisAssert(env != NULL);
+
+ MDB_val kval;
+ kval.mv_data = key->ptr;
+ kval.mv_size = sdslen((sds)key->ptr);
+
+ robj *object;
+ object = lookupKey(db, key);
+ if (object == NULL)
+ return 0;
+
+ if (object->archived != 0)
+ return 1;
+
rio payload;
+ createDumpPayload(&payload, object);
- /* Check if the key is here. */
- if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) {
- addReply(c,shared.nullbulk);
- return;
+ MDB_val dval;
+ dval.mv_size = sdslen(payload.io.buffer.ptr);
+ dval.mv_data = payload.io.buffer.ptr;
+
+ int ret;
+
+ MDB_txn *txn;
+ ret = mdb_txn_begin(env, NULL, 0, &txn);
+ if (ret != 0)
+ goto archive_err;
+
+ ret = mdb_put(txn, dbi, &kval, &dval, 0);
+ if (ret != 0) {
+ mdb_txn_abort(txn);
+ goto archive_err;
}
- /* Serialize the object in a RDB-like format. It consist of an object type
- * byte followed by the serialized object. This is understood by RESTORE. */
- rioInitWithBuffer(&payload,sdsempty());
- redisAssertWithInfo(c,NULL,rdbSaveObjectType(&payload,o));
- redisAssertWithInfo(c,NULL,rdbSaveObject(&payload,o));
+ mdb_txn_commit(txn);
+ sdsfree(payload.io.buffer.ptr);
+ return 1;
- /* Transfer to the client */
- dumpobj = createObject(REDIS_STRING,payload.io.buffer.ptr);
- addReplyBulk(c,dumpobj);
- decrRefCount(dumpobj);
- return;
+archive_err:
+ sdsfree(payload.io.buffer.ptr);
+ redisAssert(0);
+ return 0;
+}
+
+robj *recover(redisDb *db, robj *key) {
+ if (server.mdb_state == REDIS_MDB_OFF)
+ return NULL;
+
+ int ret;
+
+ MDB_val kval;
+ kval.mv_data = key->ptr;
+ kval.mv_size = sdslen((sds)key->ptr);
+
+ MDB_txn *txn;
+ ret = mdb_txn_begin(env, NULL, 0, &txn);
+ if (ret != 0)
+ return NULL;
+
+ MDB_cursor *cursor;
+ ret = mdb_cursor_open(txn, dbi, &cursor);
+ if (ret != 0) {
+ mdb_txn_abort(txn);
+ return NULL;
+ }
+
+ MDB_val pval;
+ ret = mdb_cursor_get(cursor, &kval, &pval, MDB_SET);
+ if (ret != 0) {
+ mdb_txn_abort(txn);
+ return NULL;
+ }
+
+ sds sval = sdsnewlen(pval.mv_data, pval.mv_size);
+ mdb_cursor_close(cursor);
+ mdb_txn_abort(txn);
+
+ rio payload;
+ rioInitWithBuffer(&payload, sval);
+
+ int type = rdbLoadObjectType(&payload);
+ if (type == -1)
+ goto recover_err;
+
+ robj *object = rdbLoadObject(type, &payload);
+ if (object == NULL)
+ goto recover_err;
+
+ object->archived = 1;
+
+ dbAdd(db, key, object);
+ signalModifiedKey(db, key);
+ server.dirty++;
+
+ sdsfree(sval);
+ return object;
+
+recover_err:
+ sdsfree(sval);
+ return NULL;
+}
+
+void purge(robj *key) {
+ if (server.mdb_state == REDIS_MDB_OFF)
+ return;
+
+ int ret;
+
+ MDB_val kval;
+ kval.mv_data = key->ptr;
+ kval.mv_size = sdslen((sds)key->ptr);
+
+ MDB_txn *txn;
+ ret = mdb_txn_begin(env, NULL, 0, &txn);
+ if (ret != 0)
+ return;
+
+ ret = mdb_del(txn, dbi, &kval, NULL);
+ if (ret != 0) {
+ mdb_txn_abort(txn);
+ return;
+ }
+
+ mdb_txn_commit(txn);
}