]> git.saurik.com Git - redis.git/blob - src/migrate.c
a998c5f08841e65703949bdbd626b66e4b2cab20
[redis.git] / src / migrate.c
1 #include "redis.h"
2 #include "endianconv.h"
3
4 #include <sys/stat.h>
5 #include <lmdb.h>
6
7 /* -----------------------------------------------------------------------------
8 * DUMP, RESTORE and MIGRATE commands
9 * -------------------------------------------------------------------------- */
10
11 MDB_env *env;
12 MDB_dbi dbi;
13
14 /* Generates a DUMP-format representation of the object 'o', adding it to the
15 * io stream pointed by 'rio'. This function can't fail. */
16 void createDumpPayload(rio *payload, robj *o) {
17 unsigned char buf[2];
18 uint64_t crc;
19
20 /* Serialize the object in a RDB-like format. It consist of an object type
21 * byte followed by the serialized object. This is understood by RESTORE. */
22 rioInitWithBuffer(payload,sdsempty());
23 redisAssert(rdbSaveObjectType(payload,o));
24 redisAssert(rdbSaveObject(payload,o));
25
26 /* Write the footer, this is how it looks like:
27 * ----------------+---------------------+---------------+
28 * ... RDB payload | 2 bytes RDB version | 8 bytes CRC64 |
29 * ----------------+---------------------+---------------+
30 * RDB version and CRC are both in little endian.
31 */
32
33 /* RDB version */
34 buf[0] = REDIS_RDB_VERSION & 0xff;
35 buf[1] = (REDIS_RDB_VERSION >> 8) & 0xff;
36 payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,buf,2);
37
38 /* CRC64 */
39 crc = crc64(0,(unsigned char*)payload->io.buffer.ptr,
40 sdslen(payload->io.buffer.ptr));
41 memrev64ifbe(&crc);
42 payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,&crc,8);
43 }
44
45 /* Verify that the RDB version of the dump payload matches the one of this Redis
46 * instance and that the checksum is ok.
47 * If the DUMP payload looks valid REDIS_OK is returned, otherwise REDIS_ERR
48 * is returned. */
49 int verifyDumpPayload(unsigned char *p, size_t len) {
50 unsigned char *footer;
51 uint16_t rdbver;
52 uint64_t crc;
53
54 /* At least 2 bytes of RDB version and 8 of CRC64 should be present. */
55 if (len < 10) return REDIS_ERR;
56 footer = p+(len-10);
57
58 /* Verify RDB version */
59 rdbver = (footer[1] << 8) | footer[0];
60 if (rdbver != REDIS_RDB_VERSION) return REDIS_ERR;
61
62 /* Verify CRC64 */
63 crc = crc64(0,p,len-8);
64 memrev64ifbe(&crc);
65 return (memcmp(&crc,footer+2,8) == 0) ? REDIS_OK : REDIS_ERR;
66 }
67
68 /* DUMP keyname
69 * DUMP is actually not used by Redis Cluster but it is the obvious
70 * complement of RESTORE and can be useful for different applications. */
71 void dumpCommand(redisClient *c) {
72 robj *o, *dumpobj;
73 rio payload;
74
75 /* Check if the key is here. */
76 if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) {
77 addReply(c,shared.nullbulk);
78 return;
79 }
80
81 /* Create the DUMP encoded representation. */
82 createDumpPayload(&payload,o);
83
84 /* Transfer to the client */
85 dumpobj = createObject(REDIS_STRING,payload.io.buffer.ptr);
86 addReplyBulk(c,dumpobj);
87 decrRefCount(dumpobj);
88 return;
89 }
90
91 /* RESTORE key ttl serialized-value */
92 void restoreCommand(redisClient *c) {
93 long ttl;
94 rio payload;
95 int type;
96 robj *obj;
97
98 /* Make sure this key does not already exist here... */
99 if (lookupKeyWrite(c->db,c->argv[1]) != NULL) {
100 addReplyError(c,"Target key name is busy.");
101 return;
102 }
103
104 /* Check if the TTL value makes sense */
105 if (getLongFromObjectOrReply(c,c->argv[2],&ttl,NULL) != REDIS_OK) {
106 return;
107 } else if (ttl < 0) {
108 addReplyError(c,"Invalid TTL value, must be >= 0");
109 return;
110 }
111
112 /* Verify RDB version and data checksum. */
113 if (verifyDumpPayload(c->argv[3]->ptr,sdslen(c->argv[3]->ptr)) == REDIS_ERR) {
114 addReplyError(c,"DUMP payload version or checksum are wrong");
115 return;
116 }
117
118 rioInitWithBuffer(&payload,c->argv[3]->ptr);
119 if (((type = rdbLoadObjectType(&payload)) == -1) ||
120 ((obj = rdbLoadObject(type,&payload)) == NULL))
121 {
122 addReplyError(c,"Bad data format");
123 return;
124 }
125
126 /* Create the key and set the TTL if any */
127 dbAdd(c->db,c->argv[1],obj);
128 if (ttl) setExpire(c->db,c->argv[1],mstime()+ttl);
129 signalModifiedKey(c->db,c->argv[1]);
130 addReply(c,shared.ok);
131 server.dirty++;
132 }
133
134 /* MIGRATE host port key dbid timeout */
135 void migrateCommand(redisClient *c) {
136 int fd;
137 long timeout;
138 long dbid;
139 long long ttl = 0, expireat;
140 robj *o;
141 rio cmd, payload;
142
143 /* Sanity check */
144 if (getLongFromObjectOrReply(c,c->argv[5],&timeout,NULL) != REDIS_OK)
145 return;
146 if (getLongFromObjectOrReply(c,c->argv[4],&dbid,NULL) != REDIS_OK)
147 return;
148 if (timeout <= 0) timeout = 1000;
149
150 /* Check if the key is here. If not we reply with success as there is
151 * nothing to migrate (for instance the key expired in the meantime), but
152 * we include such information in the reply string. */
153 if ((o = lookupKeyRead(c->db,c->argv[3])) == NULL) {
154 addReplySds(c,sdsnew("+NOKEY\r\n"));
155 return;
156 }
157
158 /* Connect */
159 fd = anetTcpNonBlockConnect(server.neterr,c->argv[1]->ptr,
160 atoi(c->argv[2]->ptr));
161 if (fd == -1) {
162 addReplyErrorFormat(c,"Can't connect to target node: %s",
163 server.neterr);
164 return;
165 }
166 if ((aeWait(fd,AE_WRITABLE,timeout) & AE_WRITABLE) == 0) {
167 close(fd);
168 addReplySds(c,sdsnew("-IOERR error or timeout connecting to the client\r\n"));
169 return;
170 }
171
172 /* Create RESTORE payload and generate the protocol to call the command. */
173 rioInitWithBuffer(&cmd,sdsempty());
174 redisAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',2));
175 redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"SELECT",6));
176 redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,dbid));
177
178 expireat = getExpire(c->db,c->argv[3]);
179 if (expireat != -1) {
180 ttl = expireat-mstime();
181 if (ttl < 1) ttl = 1;
182 }
183 redisAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',4));
184 redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"RESTORE",7));
185 redisAssertWithInfo(c,NULL,c->argv[3]->encoding == REDIS_ENCODING_RAW);
186 redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,c->argv[3]->ptr,sdslen(c->argv[3]->ptr)));
187 redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,ttl));
188
189 /* Finally the last argument that is the serailized object payload
190 * in the DUMP format. */
191 createDumpPayload(&payload,o);
192 redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,payload.io.buffer.ptr,
193 sdslen(payload.io.buffer.ptr)));
194 sdsfree(payload.io.buffer.ptr);
195
196 /* Tranfer the query to the other node in 64K chunks. */
197 {
198 sds buf = cmd.io.buffer.ptr;
199 size_t pos = 0, towrite;
200 int nwritten = 0;
201
202 while ((towrite = sdslen(buf)-pos) > 0) {
203 towrite = (towrite > (64*1024) ? (64*1024) : towrite);
204 nwritten = syncWrite(fd,buf+pos,towrite,timeout);
205 if (nwritten != (signed)towrite) goto socket_wr_err;
206 pos += nwritten;
207 }
208 }
209
210 /* Read back the reply. */
211 {
212 char buf1[1024];
213 char buf2[1024];
214
215 /* Read the two replies */
216 if (syncReadLine(fd, buf1, sizeof(buf1), timeout) <= 0)
217 goto socket_rd_err;
218 if (syncReadLine(fd, buf2, sizeof(buf2), timeout) <= 0)
219 goto socket_rd_err;
220 if (buf1[0] == '-' || buf2[0] == '-') {
221 addReplyErrorFormat(c,"Target instance replied with error: %s",
222 (buf1[0] == '-') ? buf1+1 : buf2+1);
223 } else {
224 robj *aux;
225
226 dbDelete(c->db,c->argv[3]);
227 signalModifiedKey(c->db,c->argv[3]);
228 addReply(c,shared.ok);
229 server.dirty++;
230
231 /* Translate MIGRATE as DEL for replication/AOF. */
232 aux = createStringObject("DEL",3);
233 rewriteClientCommandVector(c,2,aux,c->argv[3]);
234 decrRefCount(aux);
235 }
236 }
237
238 sdsfree(cmd.io.buffer.ptr);
239 close(fd);
240 return;
241
242 socket_wr_err:
243 addReplySds(c,sdsnew("-IOERR error or timeout writing to target instance\r\n"));
244 sdsfree(cmd.io.buffer.ptr);
245 close(fd);
246 return;
247
248 socket_rd_err:
249 addReplySds(c,sdsnew("-IOERR error or timeout reading from target node\r\n"));
250 sdsfree(cmd.io.buffer.ptr);
251 close(fd);
252 return;
253 }
254
255 void stopKeyArchive(void) {
256 redisAssert(env != NULL);
257
258 MDB_txn *txn;
259 int ret = mdb_txn_begin(env, NULL, 0, &txn);
260 if (ret != 0)
261 mdb_txn_abort(txn);
262 else {
263 mdb_dbi_close(env, dbi);
264 mdb_txn_commit(txn);
265 }
266
267 mdb_env_close(env);
268 env = NULL;
269
270 server.mdb_state = REDIS_MDB_OFF;
271 }
272
273 int startKeyArchive(void) {
274 redisAssert(env == NULL);
275
276 int ret;
277
278 ret = mdb_env_create(&env);
279 if (ret != 0) return ret;
280
281 ret = mdb_env_set_mapsize(env, server.mdb_mapsize);
282 if (ret != 0) return ret;
283
284 ret = mdb_env_set_maxdbs(env, 1);
285 if (ret != 0) return ret;
286
287 mkdir(server.mdb_environment, 0755);
288
289 ret = mdb_env_open(env, server.mdb_environment, MDB_FIXEDMAP | MDB_NOSYNC, 0644);
290 if (ret != 0) return ret;
291
292 MDB_txn *txn;
293 ret = mdb_txn_begin(env, NULL, 0, &txn);
294 if (ret != 0) return ret;
295
296 ret = mdb_dbi_open(txn, NULL, 0, &dbi);
297 if (ret != 0) return ret;
298
299 mdb_txn_commit(txn);
300
301 server.mdb_state = REDIS_MDB_ON;
302 return 0;
303 }
304
305 int archive(redisDb *db, robj *key) {
306 if (server.mdb_state == REDIS_MDB_OFF)
307 return 1;
308 redisAssert(env != NULL);
309
310 MDB_val kval;
311 kval.mv_data = key->ptr;
312 kval.mv_size = sdslen((sds)key->ptr);
313
314 robj *object;
315 object = lookupKey(db, key);
316 if (object == NULL)
317 return 0;
318
319 if (object->archived != 0)
320 return 1;
321
322 rio payload;
323 createDumpPayload(&payload, object);
324
325 MDB_val dval;
326 dval.mv_size = sdslen(payload.io.buffer.ptr);
327 dval.mv_data = payload.io.buffer.ptr;
328
329 int ret;
330
331 MDB_txn *txn;
332 ret = mdb_txn_begin(env, NULL, 0, &txn);
333 if (ret != 0)
334 goto archive_err;
335
336 ret = mdb_put(txn, dbi, &kval, &dval, 0);
337 if (ret != 0) {
338 mdb_txn_abort(txn);
339 goto archive_err;
340 }
341
342 mdb_txn_commit(txn);
343 sdsfree(payload.io.buffer.ptr);
344 return 1;
345
346 archive_err:
347 sdsfree(payload.io.buffer.ptr);
348 redisAssert(0);
349 return 0;
350 }
351
352 robj *recover(redisDb *db, robj *key) {
353 if (server.mdb_state == REDIS_MDB_OFF)
354 return NULL;
355
356 int ret;
357
358 MDB_val kval;
359 kval.mv_data = key->ptr;
360 kval.mv_size = sdslen((sds)key->ptr);
361
362 MDB_txn *txn;
363 ret = mdb_txn_begin(env, NULL, 0, &txn);
364 if (ret != 0)
365 return NULL;
366
367 MDB_cursor *cursor;
368 ret = mdb_cursor_open(txn, dbi, &cursor);
369 if (ret != 0) {
370 mdb_txn_abort(txn);
371 return NULL;
372 }
373
374 MDB_val pval;
375 ret = mdb_cursor_get(cursor, &kval, &pval, MDB_SET);
376 if (ret != 0) {
377 mdb_txn_abort(txn);
378 return NULL;
379 }
380
381 sds sval = sdsnewlen(pval.mv_data, pval.mv_size);
382 mdb_cursor_close(cursor);
383 mdb_txn_abort(txn);
384
385 rio payload;
386 rioInitWithBuffer(&payload, sval);
387
388 int type = rdbLoadObjectType(&payload);
389 if (type == -1)
390 goto recover_err;
391
392 robj *object = rdbLoadObject(type, &payload);
393 if (object == NULL)
394 goto recover_err;
395
396 object->archived = 1;
397
398 dbAdd(db, key, object);
399 signalModifiedKey(db, key);
400 server.dirty++;
401
402 sdsfree(sval);
403 return object;
404
405 recover_err:
406 sdsfree(sval);
407 return NULL;
408 }
409
410 void purge(robj *key) {
411 if (server.mdb_state == REDIS_MDB_OFF)
412 return;
413
414 int ret;
415
416 MDB_val kval;
417 kval.mv_data = key->ptr;
418 kval.mv_size = sdslen((sds)key->ptr);
419
420 MDB_txn *txn;
421 ret = mdb_txn_begin(env, NULL, 0, &txn);
422 if (ret != 0)
423 return;
424
425 ret = mdb_del(txn, dbi, &kval, NULL);
426 if (ret != 0) {
427 mdb_txn_abort(txn);
428 return;
429 }
430
431 mdb_txn_commit(txn);
432 }
433
434 int rummage(redisClient *c, unsigned long *numkeys) {
435 if (server.mdb_state == REDIS_MDB_OFF)
436 return REDIS_OK;
437
438 int ret;
439
440 MDB_txn *txn;
441 ret = mdb_txn_begin(env, NULL, 0, &txn);
442 if (ret != 0)
443 return REDIS_ERR;
444
445 MDB_cursor *cursor;
446 ret = mdb_cursor_open(txn, dbi, &cursor);
447 if (ret != 0) {
448 mdb_txn_abort(txn);
449 return REDIS_ERR;
450 }
451
452 MDB_val kval;
453 while ((ret = mdb_cursor_get(cursor, &kval, NULL, MDB_NEXT)) == 0) {
454 robj *key = createStringObject(kval.mv_data, kval.mv_size);
455 addReplyBulk(c, key);
456 ++*numkeys;
457 decrRefCount(key);
458 }
459
460 mdb_cursor_close(cursor);
461 mdb_txn_abort(txn);
462 return REDIS_OK;
463 }