X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/571e257db12eaa6cdd47811f5663ac1003e32b1b..64b2834657e4c215dc0489417177a37c80d9ac46:/src/migrate.c diff --git a/src/migrate.c b/src/migrate.c index f7a5b730..6d8aad7f 100644 --- a/src/migrate.c +++ b/src/migrate.c @@ -1,9 +1,93 @@ #include "redis.h" +#include "endianconv.h" + +#include +#include /* ----------------------------------------------------------------------------- - * RESTORE and MIGRATE commands + * DUMP, RESTORE and MIGRATE commands * -------------------------------------------------------------------------- */ +MDB_env *env; +MDB_dbi dbi; + +/* Generates a DUMP-format representation of the object 'o', adding it to the + * io stream pointed by 'rio'. This function can't fail. */ +void createDumpPayload(rio *payload, robj *o) { + unsigned char buf[2]; + uint64_t crc; + + /* Serialize the object in a RDB-like format. It consist of an object type + * byte followed by the serialized object. This is understood by RESTORE. */ + rioInitWithBuffer(payload,sdsempty()); + redisAssert(rdbSaveObjectType(payload,o)); + redisAssert(rdbSaveObject(payload,o)); + + /* Write the footer, this is how it looks like: + * ----------------+---------------------+---------------+ + * ... RDB payload | 2 bytes RDB version | 8 bytes CRC64 | + * ----------------+---------------------+---------------+ + * RDB version and CRC are both in little endian. + */ + + /* RDB version */ + buf[0] = REDIS_RDB_VERSION & 0xff; + buf[1] = (REDIS_RDB_VERSION >> 8) & 0xff; + payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,buf,2); + + /* CRC64 */ + crc = crc64(0,(unsigned char*)payload->io.buffer.ptr, + sdslen(payload->io.buffer.ptr)); + memrev64ifbe(&crc); + payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,&crc,8); +} + +/* Verify that the RDB version of the dump payload matches the one of this Redis + * instance and that the checksum is ok. + * If the DUMP payload looks valid REDIS_OK is returned, otherwise REDIS_ERR + * is returned. */ +int verifyDumpPayload(unsigned char *p, size_t len) { + unsigned char *footer; + uint16_t rdbver; + uint64_t crc; + + /* At least 2 bytes of RDB version and 8 of CRC64 should be present. */ + if (len < 10) return REDIS_ERR; + footer = p+(len-10); + + /* Verify RDB version */ + rdbver = (footer[1] << 8) | footer[0]; + if (rdbver != REDIS_RDB_VERSION) return REDIS_ERR; + + /* Verify CRC64 */ + crc = crc64(0,p,len-8); + memrev64ifbe(&crc); + return (memcmp(&crc,footer+2,8) == 0) ? REDIS_OK : REDIS_ERR; +} + +/* DUMP keyname + * DUMP is actually not used by Redis Cluster but it is the obvious + * complement of RESTORE and can be useful for different applications. */ +void dumpCommand(redisClient *c) { + robj *o, *dumpobj; + rio payload; + + /* Check if the key is here. */ + if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) { + addReply(c,shared.nullbulk); + return; + } + + /* Create the DUMP encoded representation. */ + createDumpPayload(&payload,o); + + /* Transfer to the client */ + dumpobj = createObject(REDIS_STRING,payload.io.buffer.ptr); + addReplyBulk(c,dumpobj); + decrRefCount(dumpobj); + return; +} + /* RESTORE key ttl serialized-value */ void restoreCommand(redisClient *c) { long ttl; @@ -25,6 +109,12 @@ void restoreCommand(redisClient *c) { return; } + /* Verify RDB version and data checksum. */ + if (verifyDumpPayload(c->argv[3]->ptr,sdslen(c->argv[3]->ptr)) == REDIS_ERR) { + addReplyError(c,"DUMP payload version or checksum are wrong"); + return; + } + rioInitWithBuffer(&payload,c->argv[3]->ptr); if (((type = rdbLoadObjectType(&payload)) == -1) || ((obj = rdbLoadObject(type,&payload)) == NULL)) @@ -35,7 +125,7 @@ void restoreCommand(redisClient *c) { /* Create the key and set the TTL if any */ dbAdd(c->db,c->argv[1],obj); - if (ttl) setExpire(c->db,c->argv[1],time(NULL)+ttl); + if (ttl) setExpire(c->db,c->argv[1],mstime()+ttl); signalModifiedKey(c->db,c->argv[1]); addReply(c,shared.ok); server.dirty++; @@ -46,7 +136,7 @@ void migrateCommand(redisClient *c) { int fd; long timeout; long dbid; - time_t ttl; + long long ttl = 0, expireat; robj *o; rio cmd, payload; @@ -55,7 +145,7 @@ void migrateCommand(redisClient *c) { return; if (getLongFromObjectOrReply(c,c->argv[4],&dbid,NULL) != REDIS_OK) return; - if (timeout <= 0) timeout = 1; + if (timeout <= 0) timeout = 1000; /* Check if the key is here. If not we reply with success as there is * nothing to migrate (for instance the key expired in the meantime), but @@ -73,29 +163,34 @@ void migrateCommand(redisClient *c) { server.neterr); return; } - if ((aeWait(fd,AE_WRITABLE,timeout*1000) & AE_WRITABLE) == 0) { - addReplyError(c,"Timeout connecting to the client"); + if ((aeWait(fd,AE_WRITABLE,timeout) & AE_WRITABLE) == 0) { + close(fd); + addReplySds(c,sdsnew("-IOERR error or timeout connecting to the client\r\n")); return; } + /* Create RESTORE payload and generate the protocol to call the command. */ rioInitWithBuffer(&cmd,sdsempty()); redisAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',2)); redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"SELECT",6)); redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,dbid)); - ttl = getExpire(c->db,c->argv[3]); + expireat = getExpire(c->db,c->argv[3]); + if (expireat != -1) { + ttl = expireat-mstime(); + if (ttl < 1) ttl = 1; + } redisAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',4)); redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"RESTORE",7)); redisAssertWithInfo(c,NULL,c->argv[3]->encoding == REDIS_ENCODING_RAW); redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,c->argv[3]->ptr,sdslen(c->argv[3]->ptr))); - redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,(ttl == -1) ? 0 : ttl)); + redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,ttl)); /* Finally the last argument that is the serailized object payload - * in the form: . */ - rioInitWithBuffer(&payload,sdsempty()); - redisAssertWithInfo(c,NULL,rdbSaveObjectType(&payload,o)); - redisAssertWithInfo(c,NULL,rdbSaveObject(&payload,o) != -1); - redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,payload.io.buffer.ptr,sdslen(payload.io.buffer.ptr))); + * in the DUMP format. */ + createDumpPayload(&payload,o); + redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,payload.io.buffer.ptr, + sdslen(payload.io.buffer.ptr))); sdsfree(payload.io.buffer.ptr); /* Tranfer the query to the other node in 64K chunks. */ @@ -106,7 +201,7 @@ void migrateCommand(redisClient *c) { while ((towrite = sdslen(buf)-pos) > 0) { towrite = (towrite > (64*1024) ? (64*1024) : towrite); - nwritten = syncWrite(fd,buf+nwritten,towrite,timeout); + nwritten = syncWrite(fd,buf+pos,towrite,timeout); if (nwritten != (signed)towrite) goto socket_wr_err; pos += nwritten; } @@ -145,46 +240,185 @@ void migrateCommand(redisClient *c) { return; socket_wr_err: - redisLog(REDIS_NOTICE,"Can't write to target node for MIGRATE: %s", - strerror(errno)); - addReplyErrorFormat(c,"MIGRATE failed, writing to target node: %s.", - strerror(errno)); + addReplySds(c,sdsnew("-IOERR error or timeout writing to target instance\r\n")); sdsfree(cmd.io.buffer.ptr); close(fd); return; socket_rd_err: - redisLog(REDIS_NOTICE,"Can't read from target node for MIGRATE: %s", - strerror(errno)); - addReplyErrorFormat(c,"MIGRATE failed, reading from target node: %s.", - strerror(errno)); + addReplySds(c,sdsnew("-IOERR error or timeout reading from target node\r\n")); sdsfree(cmd.io.buffer.ptr); close(fd); return; } -/* DUMP keyname - * DUMP is actually not used by Redis Cluster but it is the obvious - * complement of RESTORE and can be useful for different applications. */ -void dumpCommand(redisClient *c) { - robj *o, *dumpobj; +void stopKeyArchive(void) { + redisAssert(env != NULL); + + mdb_dbi_close(env, dbi); + mdb_env_close(env); + env = NULL; + + server.mdb_state = REDIS_MDB_OFF; +} + +int startKeyArchive(void) { + redisAssert(env == NULL); + + int ret; + + ret = mdb_env_create(&env); + if (ret != 0) return ret; + + ret = mdb_env_set_mapsize(env, server.mdb_mapsize); + if (ret != 0) return ret; + + ret = mdb_env_set_maxdbs(env, 1); + if (ret != 0) return ret; + + mkdir(server.mdb_environment, 0755); + + ret = mdb_env_open(env, server.mdb_environment, MDB_FIXEDMAP | MDB_NOSYNC, 0644); + if (ret != 0) return ret; + + MDB_txn *txn; + ret = mdb_txn_begin(env, NULL, 0, &txn); + if (ret != 0) return ret; + + ret = mdb_dbi_open(txn, NULL, 0, &dbi); + if (ret != 0) return ret; + + mdb_txn_commit(txn); + + server.mdb_state = REDIS_MDB_ON; + return 0; +} + +int archive(redisDb *db, robj *key) { + if (server.mdb_state == REDIS_MDB_OFF) + return 1; + redisAssert(env != NULL); + + MDB_val kval; + kval.mv_data = key->ptr; + kval.mv_size = sdslen((sds)key->ptr); + + robj *object; + object = lookupKey(db, key); + if (object == NULL) + return 0; + + if (object->archived != 0) + return 1; + rio payload; + createDumpPayload(&payload, object); - /* Check if the key is here. */ - if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) { - addReply(c,shared.nullbulk); - return; + MDB_val dval; + dval.mv_size = sdslen(payload.io.buffer.ptr); + dval.mv_data = payload.io.buffer.ptr; + + int ret; + + MDB_txn *txn; + ret = mdb_txn_begin(env, NULL, 0, &txn); + if (ret != 0) + goto archive_err; + + ret = mdb_put(txn, dbi, &kval, &dval, 0); + if (ret != 0) { + mdb_txn_abort(txn); + goto archive_err; } - /* Serialize the object in a RDB-like format. It consist of an object type - * byte followed by the serialized object. This is understood by RESTORE. */ - rioInitWithBuffer(&payload,sdsempty()); - redisAssertWithInfo(c,NULL,rdbSaveObjectType(&payload,o)); - redisAssertWithInfo(c,NULL,rdbSaveObject(&payload,o)); + mdb_txn_commit(txn); + sdsfree(payload.io.buffer.ptr); + return 1; - /* Transfer to the client */ - dumpobj = createObject(REDIS_STRING,payload.io.buffer.ptr); - addReplyBulk(c,dumpobj); - decrRefCount(dumpobj); - return; +archive_err: + sdsfree(payload.io.buffer.ptr); + redisAssert(0); + return 0; +} + +robj *recover(redisDb *db, robj *key) { + if (server.mdb_state == REDIS_MDB_OFF) + return NULL; + + int ret; + + MDB_val kval; + kval.mv_data = key->ptr; + kval.mv_size = sdslen((sds)key->ptr); + + MDB_txn *txn; + ret = mdb_txn_begin(env, NULL, 0, &txn); + if (ret != 0) + return NULL; + + MDB_cursor *cursor; + ret = mdb_cursor_open(txn, dbi, &cursor); + if (ret != 0) { + mdb_txn_abort(txn); + return NULL; + } + + MDB_val pval; + ret = mdb_cursor_get(cursor, &kval, &pval, MDB_SET); + if (ret != 0) { + mdb_txn_abort(txn); + return NULL; + } + + sds sval = sdsnewlen(pval.mv_data, pval.mv_size); + mdb_cursor_close(cursor); + mdb_txn_abort(txn); + + rio payload; + rioInitWithBuffer(&payload, sval); + + int type = rdbLoadObjectType(&payload); + if (type == -1) + goto recover_err; + + robj *object = rdbLoadObject(type, &payload); + if (object == NULL) + goto recover_err; + + object->archived = 1; + + dbAdd(db, key, object); + signalModifiedKey(db, key); + server.dirty++; + + sdsfree(sval); + return object; + +recover_err: + sdsfree(sval); + return NULL; +} + +void purge(robj *key) { + if (server.mdb_state == REDIS_MDB_OFF) + return; + + int ret; + + MDB_val kval; + kval.mv_data = key->ptr; + kval.mv_size = sdslen((sds)key->ptr); + + MDB_txn *txn; + ret = mdb_txn_begin(env, NULL, 0, &txn); + if (ret != 0) + return; + + ret = mdb_del(txn, dbi, &kval, NULL); + if (ret != 0) { + mdb_txn_abort(txn); + return; + } + + mdb_txn_commit(txn); }