]>
Commit | Line | Data |
---|---|---|
1 | #include "redis.h" | |
2 | #include "endianconv.h" | |
3 | ||
4 | #include <sys/stat.h> | |
5 | #include <lmdb.h> | |
6 | ||
7 | /* ----------------------------------------------------------------------------- | |
8 | * DUMP, RESTORE and MIGRATE commands | |
9 | * -------------------------------------------------------------------------- */ | |
10 | ||
11 | MDB_env *env; | |
12 | MDB_dbi dbi; | |
13 | ||
14 | /* Generates a DUMP-format representation of the object 'o', adding it to the | |
15 | * io stream pointed by 'rio'. This function can't fail. */ | |
16 | void createDumpPayload(rio *payload, robj *o) { | |
17 | unsigned char buf[2]; | |
18 | uint64_t crc; | |
19 | ||
20 | /* Serialize the object in a RDB-like format. It consist of an object type | |
21 | * byte followed by the serialized object. This is understood by RESTORE. */ | |
22 | rioInitWithBuffer(payload,sdsempty()); | |
23 | redisAssert(rdbSaveObjectType(payload,o)); | |
24 | redisAssert(rdbSaveObject(payload,o)); | |
25 | ||
26 | /* Write the footer, this is how it looks like: | |
27 | * ----------------+---------------------+---------------+ | |
28 | * ... RDB payload | 2 bytes RDB version | 8 bytes CRC64 | | |
29 | * ----------------+---------------------+---------------+ | |
30 | * RDB version and CRC are both in little endian. | |
31 | */ | |
32 | ||
33 | /* RDB version */ | |
34 | buf[0] = REDIS_RDB_VERSION & 0xff; | |
35 | buf[1] = (REDIS_RDB_VERSION >> 8) & 0xff; | |
36 | payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,buf,2); | |
37 | ||
38 | /* CRC64 */ | |
39 | crc = crc64(0,(unsigned char*)payload->io.buffer.ptr, | |
40 | sdslen(payload->io.buffer.ptr)); | |
41 | memrev64ifbe(&crc); | |
42 | payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,&crc,8); | |
43 | } | |
44 | ||
45 | /* Verify that the RDB version of the dump payload matches the one of this Redis | |
46 | * instance and that the checksum is ok. | |
47 | * If the DUMP payload looks valid REDIS_OK is returned, otherwise REDIS_ERR | |
48 | * is returned. */ | |
49 | int verifyDumpPayload(unsigned char *p, size_t len) { | |
50 | unsigned char *footer; | |
51 | uint16_t rdbver; | |
52 | uint64_t crc; | |
53 | ||
54 | /* At least 2 bytes of RDB version and 8 of CRC64 should be present. */ | |
55 | if (len < 10) return REDIS_ERR; | |
56 | footer = p+(len-10); | |
57 | ||
58 | /* Verify RDB version */ | |
59 | rdbver = (footer[1] << 8) | footer[0]; | |
60 | if (rdbver != REDIS_RDB_VERSION) return REDIS_ERR; | |
61 | ||
62 | /* Verify CRC64 */ | |
63 | crc = crc64(0,p,len-8); | |
64 | memrev64ifbe(&crc); | |
65 | return (memcmp(&crc,footer+2,8) == 0) ? REDIS_OK : REDIS_ERR; | |
66 | } | |
67 | ||
68 | /* DUMP keyname | |
69 | * DUMP is actually not used by Redis Cluster but it is the obvious | |
70 | * complement of RESTORE and can be useful for different applications. */ | |
71 | void dumpCommand(redisClient *c) { | |
72 | robj *o, *dumpobj; | |
73 | rio payload; | |
74 | ||
75 | /* Check if the key is here. */ | |
76 | if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) { | |
77 | addReply(c,shared.nullbulk); | |
78 | return; | |
79 | } | |
80 | ||
81 | /* Create the DUMP encoded representation. */ | |
82 | createDumpPayload(&payload,o); | |
83 | ||
84 | /* Transfer to the client */ | |
85 | dumpobj = createObject(REDIS_STRING,payload.io.buffer.ptr); | |
86 | addReplyBulk(c,dumpobj); | |
87 | decrRefCount(dumpobj); | |
88 | return; | |
89 | } | |
90 | ||
91 | /* RESTORE key ttl serialized-value */ | |
92 | void restoreCommand(redisClient *c) { | |
93 | long ttl; | |
94 | rio payload; | |
95 | int type; | |
96 | robj *obj; | |
97 | ||
98 | /* Make sure this key does not already exist here... */ | |
99 | if (lookupKeyWrite(c->db,c->argv[1]) != NULL) { | |
100 | addReplyError(c,"Target key name is busy."); | |
101 | return; | |
102 | } | |
103 | ||
104 | /* Check if the TTL value makes sense */ | |
105 | if (getLongFromObjectOrReply(c,c->argv[2],&ttl,NULL) != REDIS_OK) { | |
106 | return; | |
107 | } else if (ttl < 0) { | |
108 | addReplyError(c,"Invalid TTL value, must be >= 0"); | |
109 | return; | |
110 | } | |
111 | ||
112 | /* Verify RDB version and data checksum. */ | |
113 | if (verifyDumpPayload(c->argv[3]->ptr,sdslen(c->argv[3]->ptr)) == REDIS_ERR) { | |
114 | addReplyError(c,"DUMP payload version or checksum are wrong"); | |
115 | return; | |
116 | } | |
117 | ||
118 | rioInitWithBuffer(&payload,c->argv[3]->ptr); | |
119 | if (((type = rdbLoadObjectType(&payload)) == -1) || | |
120 | ((obj = rdbLoadObject(type,&payload)) == NULL)) | |
121 | { | |
122 | addReplyError(c,"Bad data format"); | |
123 | return; | |
124 | } | |
125 | ||
126 | /* Create the key and set the TTL if any */ | |
127 | dbAdd(c->db,c->argv[1],obj); | |
128 | if (ttl) setExpire(c->db,c->argv[1],mstime()+ttl); | |
129 | signalModifiedKey(c->db,c->argv[1]); | |
130 | addReply(c,shared.ok); | |
131 | server.dirty++; | |
132 | } | |
133 | ||
134 | /* MIGRATE host port key dbid timeout */ | |
135 | void migrateCommand(redisClient *c) { | |
136 | int fd; | |
137 | long timeout; | |
138 | long dbid; | |
139 | long long ttl = 0, expireat; | |
140 | robj *o; | |
141 | rio cmd, payload; | |
142 | ||
143 | /* Sanity check */ | |
144 | if (getLongFromObjectOrReply(c,c->argv[5],&timeout,NULL) != REDIS_OK) | |
145 | return; | |
146 | if (getLongFromObjectOrReply(c,c->argv[4],&dbid,NULL) != REDIS_OK) | |
147 | return; | |
148 | if (timeout <= 0) timeout = 1000; | |
149 | ||
150 | /* Check if the key is here. If not we reply with success as there is | |
151 | * nothing to migrate (for instance the key expired in the meantime), but | |
152 | * we include such information in the reply string. */ | |
153 | if ((o = lookupKeyRead(c->db,c->argv[3])) == NULL) { | |
154 | addReplySds(c,sdsnew("+NOKEY\r\n")); | |
155 | return; | |
156 | } | |
157 | ||
158 | /* Connect */ | |
159 | fd = anetTcpNonBlockConnect(server.neterr,c->argv[1]->ptr, | |
160 | atoi(c->argv[2]->ptr)); | |
161 | if (fd == -1) { | |
162 | addReplyErrorFormat(c,"Can't connect to target node: %s", | |
163 | server.neterr); | |
164 | return; | |
165 | } | |
166 | if ((aeWait(fd,AE_WRITABLE,timeout) & AE_WRITABLE) == 0) { | |
167 | close(fd); | |
168 | addReplySds(c,sdsnew("-IOERR error or timeout connecting to the client\r\n")); | |
169 | return; | |
170 | } | |
171 | ||
172 | /* Create RESTORE payload and generate the protocol to call the command. */ | |
173 | rioInitWithBuffer(&cmd,sdsempty()); | |
174 | redisAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',2)); | |
175 | redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"SELECT",6)); | |
176 | redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,dbid)); | |
177 | ||
178 | expireat = getExpire(c->db,c->argv[3]); | |
179 | if (expireat != -1) { | |
180 | ttl = expireat-mstime(); | |
181 | if (ttl < 1) ttl = 1; | |
182 | } | |
183 | redisAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',4)); | |
184 | redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"RESTORE",7)); | |
185 | redisAssertWithInfo(c,NULL,c->argv[3]->encoding == REDIS_ENCODING_RAW); | |
186 | redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,c->argv[3]->ptr,sdslen(c->argv[3]->ptr))); | |
187 | redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,ttl)); | |
188 | ||
189 | /* Finally the last argument that is the serailized object payload | |
190 | * in the DUMP format. */ | |
191 | createDumpPayload(&payload,o); | |
192 | redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,payload.io.buffer.ptr, | |
193 | sdslen(payload.io.buffer.ptr))); | |
194 | sdsfree(payload.io.buffer.ptr); | |
195 | ||
196 | /* Tranfer the query to the other node in 64K chunks. */ | |
197 | { | |
198 | sds buf = cmd.io.buffer.ptr; | |
199 | size_t pos = 0, towrite; | |
200 | int nwritten = 0; | |
201 | ||
202 | while ((towrite = sdslen(buf)-pos) > 0) { | |
203 | towrite = (towrite > (64*1024) ? (64*1024) : towrite); | |
204 | nwritten = syncWrite(fd,buf+pos,towrite,timeout); | |
205 | if (nwritten != (signed)towrite) goto socket_wr_err; | |
206 | pos += nwritten; | |
207 | } | |
208 | } | |
209 | ||
210 | /* Read back the reply. */ | |
211 | { | |
212 | char buf1[1024]; | |
213 | char buf2[1024]; | |
214 | ||
215 | /* Read the two replies */ | |
216 | if (syncReadLine(fd, buf1, sizeof(buf1), timeout) <= 0) | |
217 | goto socket_rd_err; | |
218 | if (syncReadLine(fd, buf2, sizeof(buf2), timeout) <= 0) | |
219 | goto socket_rd_err; | |
220 | if (buf1[0] == '-' || buf2[0] == '-') { | |
221 | addReplyErrorFormat(c,"Target instance replied with error: %s", | |
222 | (buf1[0] == '-') ? buf1+1 : buf2+1); | |
223 | } else { | |
224 | robj *aux; | |
225 | ||
226 | dbDelete(c->db,c->argv[3]); | |
227 | signalModifiedKey(c->db,c->argv[3]); | |
228 | addReply(c,shared.ok); | |
229 | server.dirty++; | |
230 | ||
231 | /* Translate MIGRATE as DEL for replication/AOF. */ | |
232 | aux = createStringObject("DEL",3); | |
233 | rewriteClientCommandVector(c,2,aux,c->argv[3]); | |
234 | decrRefCount(aux); | |
235 | } | |
236 | } | |
237 | ||
238 | sdsfree(cmd.io.buffer.ptr); | |
239 | close(fd); | |
240 | return; | |
241 | ||
242 | socket_wr_err: | |
243 | addReplySds(c,sdsnew("-IOERR error or timeout writing to target instance\r\n")); | |
244 | sdsfree(cmd.io.buffer.ptr); | |
245 | close(fd); | |
246 | return; | |
247 | ||
248 | socket_rd_err: | |
249 | addReplySds(c,sdsnew("-IOERR error or timeout reading from target node\r\n")); | |
250 | sdsfree(cmd.io.buffer.ptr); | |
251 | close(fd); | |
252 | return; | |
253 | } | |
254 | ||
255 | void stopKeyArchive(void) { | |
256 | redisAssert(env != NULL); | |
257 | ||
258 | MDB_txn *txn; | |
259 | int ret = mdb_txn_begin(env, NULL, 0, &txn); | |
260 | if (ret != 0) | |
261 | mdb_txn_abort(txn); | |
262 | else { | |
263 | mdb_dbi_close(env, dbi); | |
264 | mdb_txn_commit(txn); | |
265 | } | |
266 | ||
267 | mdb_env_close(env); | |
268 | env = NULL; | |
269 | ||
270 | server.mdb_state = REDIS_MDB_OFF; | |
271 | } | |
272 | ||
273 | int startKeyArchive(void) { | |
274 | redisAssert(env == NULL); | |
275 | ||
276 | int ret; | |
277 | ||
278 | ret = mdb_env_create(&env); | |
279 | if (ret != 0) return ret; | |
280 | ||
281 | ret = mdb_env_set_mapsize(env, server.mdb_mapsize); | |
282 | if (ret != 0) return ret; | |
283 | ||
284 | ret = mdb_env_set_maxdbs(env, 1); | |
285 | if (ret != 0) return ret; | |
286 | ||
287 | mkdir(server.mdb_environment, 0755); | |
288 | ||
289 | ret = mdb_env_open(env, server.mdb_environment, MDB_FIXEDMAP | MDB_NOSYNC, 0644); | |
290 | if (ret != 0) return ret; | |
291 | ||
292 | MDB_txn *txn; | |
293 | ret = mdb_txn_begin(env, NULL, 0, &txn); | |
294 | if (ret != 0) return ret; | |
295 | ||
296 | ret = mdb_dbi_open(txn, NULL, 0, &dbi); | |
297 | if (ret != 0) return ret; | |
298 | ||
299 | mdb_txn_commit(txn); | |
300 | ||
301 | server.mdb_state = REDIS_MDB_ON; | |
302 | return 0; | |
303 | } | |
304 | ||
305 | int archive(redisDb *db, robj *key) { | |
306 | if (server.mdb_state == REDIS_MDB_OFF) | |
307 | return 1; | |
308 | redisAssert(env != NULL); | |
309 | ||
310 | MDB_val kval; | |
311 | kval.mv_data = key->ptr; | |
312 | kval.mv_size = sdslen((sds)key->ptr); | |
313 | ||
314 | robj *object; | |
315 | object = lookupKey(db, key); | |
316 | if (object == NULL) | |
317 | return 0; | |
318 | ||
319 | if (object->archived != 0) | |
320 | return 1; | |
321 | ||
322 | rio payload; | |
323 | createDumpPayload(&payload, object); | |
324 | ||
325 | MDB_val dval; | |
326 | dval.mv_size = sdslen(payload.io.buffer.ptr); | |
327 | dval.mv_data = payload.io.buffer.ptr; | |
328 | ||
329 | int ret; | |
330 | ||
331 | MDB_txn *txn; | |
332 | ret = mdb_txn_begin(env, NULL, 0, &txn); | |
333 | if (ret != 0) | |
334 | goto archive_err; | |
335 | ||
336 | ret = mdb_put(txn, dbi, &kval, &dval, 0); | |
337 | if (ret != 0) { | |
338 | mdb_txn_abort(txn); | |
339 | goto archive_err; | |
340 | } | |
341 | ||
342 | mdb_txn_commit(txn); | |
343 | sdsfree(payload.io.buffer.ptr); | |
344 | return 1; | |
345 | ||
346 | archive_err: | |
347 | sdsfree(payload.io.buffer.ptr); | |
348 | redisAssert(0); | |
349 | return 0; | |
350 | } | |
351 | ||
352 | robj *recover(redisDb *db, robj *key) { | |
353 | if (server.mdb_state == REDIS_MDB_OFF) | |
354 | return NULL; | |
355 | ||
356 | int ret; | |
357 | ||
358 | MDB_val kval; | |
359 | kval.mv_data = key->ptr; | |
360 | kval.mv_size = sdslen((sds)key->ptr); | |
361 | ||
362 | MDB_txn *txn; | |
363 | ret = mdb_txn_begin(env, NULL, 0, &txn); | |
364 | if (ret != 0) | |
365 | return NULL; | |
366 | ||
367 | MDB_cursor *cursor; | |
368 | ret = mdb_cursor_open(txn, dbi, &cursor); | |
369 | if (ret != 0) { | |
370 | mdb_txn_abort(txn); | |
371 | return NULL; | |
372 | } | |
373 | ||
374 | MDB_val pval; | |
375 | ret = mdb_cursor_get(cursor, &kval, &pval, MDB_SET); | |
376 | if (ret != 0) { | |
377 | mdb_txn_abort(txn); | |
378 | return NULL; | |
379 | } | |
380 | ||
381 | sds sval = sdsnewlen(pval.mv_data, pval.mv_size); | |
382 | mdb_cursor_close(cursor); | |
383 | mdb_txn_abort(txn); | |
384 | ||
385 | rio payload; | |
386 | rioInitWithBuffer(&payload, sval); | |
387 | ||
388 | int type = rdbLoadObjectType(&payload); | |
389 | if (type == -1) | |
390 | goto recover_err; | |
391 | ||
392 | robj *object = rdbLoadObject(type, &payload); | |
393 | if (object == NULL) | |
394 | goto recover_err; | |
395 | ||
396 | object->archived = 1; | |
397 | ||
398 | dbAdd(db, key, object); | |
399 | signalModifiedKey(db, key); | |
400 | server.dirty++; | |
401 | ||
402 | sdsfree(sval); | |
403 | return object; | |
404 | ||
405 | recover_err: | |
406 | sdsfree(sval); | |
407 | return NULL; | |
408 | } | |
409 | ||
410 | void purge(robj *key) { | |
411 | if (server.mdb_state == REDIS_MDB_OFF) | |
412 | return; | |
413 | ||
414 | int ret; | |
415 | ||
416 | MDB_val kval; | |
417 | kval.mv_data = key->ptr; | |
418 | kval.mv_size = sdslen((sds)key->ptr); | |
419 | ||
420 | MDB_txn *txn; | |
421 | ret = mdb_txn_begin(env, NULL, 0, &txn); | |
422 | if (ret != 0) | |
423 | return; | |
424 | ||
425 | ret = mdb_del(txn, dbi, &kval, NULL); | |
426 | if (ret != 0) { | |
427 | mdb_txn_abort(txn); | |
428 | return; | |
429 | } | |
430 | ||
431 | mdb_txn_commit(txn); | |
432 | } | |
433 | ||
434 | int rummage(redisClient *c, unsigned long *numkeys) { | |
435 | if (server.mdb_state == REDIS_MDB_OFF) | |
436 | return REDIS_OK; | |
437 | ||
438 | int ret; | |
439 | ||
440 | MDB_txn *txn; | |
441 | ret = mdb_txn_begin(env, NULL, 0, &txn); | |
442 | if (ret != 0) | |
443 | return REDIS_ERR; | |
444 | ||
445 | MDB_cursor *cursor; | |
446 | ret = mdb_cursor_open(txn, dbi, &cursor); | |
447 | if (ret != 0) { | |
448 | mdb_txn_abort(txn); | |
449 | return REDIS_ERR; | |
450 | } | |
451 | ||
452 | MDB_val kval; | |
453 | while ((ret = mdb_cursor_get(cursor, &kval, NULL, MDB_NEXT)) == 0) { | |
454 | robj *key = createStringObject(kval.mv_data, kval.mv_size); | |
455 | addReplyBulk(c, key); | |
456 | ++*numkeys; | |
457 | decrRefCount(key); | |
458 | } | |
459 | ||
460 | mdb_cursor_close(cursor); | |
461 | mdb_txn_abort(txn); | |
462 | return REDIS_OK; | |
463 | } |