]>
Commit | Line | Data |
---|---|---|
571e257d | 1 | #include "redis.h" |
bff31e12 | 2 | #include "endianconv.h" |
571e257d | 3 | |
8e2a225a JF |
4 | #include <sys/stat.h> |
5 | #include <lmdb.h> | |
6 | ||
571e257d | 7 | /* ----------------------------------------------------------------------------- |
bff31e12 | 8 | * DUMP, RESTORE and MIGRATE commands |
571e257d | 9 | * -------------------------------------------------------------------------- */ |
10 | ||
8e2a225a JF |
11 | MDB_env *env; |
12 | MDB_dbi dbi; | |
13 | ||
bff31e12 | 14 | /* Generates a DUMP-format representation of the object 'o', adding it to the |
15 | * io stream pointed by 'rio'. This function can't fail. */ | |
16 | void createDumpPayload(rio *payload, robj *o) { | |
17 | unsigned char buf[2]; | |
18 | uint64_t crc; | |
19 | ||
20 | /* Serialize the object in a RDB-like format. It consist of an object type | |
21 | * byte followed by the serialized object. This is understood by RESTORE. */ | |
22 | rioInitWithBuffer(payload,sdsempty()); | |
23 | redisAssert(rdbSaveObjectType(payload,o)); | |
24 | redisAssert(rdbSaveObject(payload,o)); | |
25 | ||
26 | /* Write the footer, this is how it looks like: | |
27 | * ----------------+---------------------+---------------+ | |
28 | * ... RDB payload | 2 bytes RDB version | 8 bytes CRC64 | | |
29 | * ----------------+---------------------+---------------+ | |
30 | * RDB version and CRC are both in little endian. | |
31 | */ | |
32 | ||
33 | /* RDB version */ | |
34 | buf[0] = REDIS_RDB_VERSION & 0xff; | |
35 | buf[1] = (REDIS_RDB_VERSION >> 8) & 0xff; | |
36 | payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,buf,2); | |
37 | ||
38 | /* CRC64 */ | |
e9574039 | 39 | crc = crc64(0,(unsigned char*)payload->io.buffer.ptr, |
bff31e12 | 40 | sdslen(payload->io.buffer.ptr)); |
41 | memrev64ifbe(&crc); | |
42 | payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,&crc,8); | |
43 | } | |
44 | ||
45 | /* Verify that the RDB version of the dump payload matches the one of this Redis | |
46 | * instance and that the checksum is ok. | |
47 | * If the DUMP payload looks valid REDIS_OK is returned, otherwise REDIS_ERR | |
48 | * is returned. */ | |
49 | int verifyDumpPayload(unsigned char *p, size_t len) { | |
50 | unsigned char *footer; | |
51 | uint16_t rdbver; | |
52 | uint64_t crc; | |
53 | ||
54 | /* At least 2 bytes of RDB version and 8 of CRC64 should be present. */ | |
55 | if (len < 10) return REDIS_ERR; | |
56 | footer = p+(len-10); | |
57 | ||
58 | /* Verify RDB version */ | |
59 | rdbver = (footer[1] << 8) | footer[0]; | |
60 | if (rdbver != REDIS_RDB_VERSION) return REDIS_ERR; | |
61 | ||
62 | /* Verify CRC64 */ | |
e9574039 | 63 | crc = crc64(0,p,len-8); |
bff31e12 | 64 | memrev64ifbe(&crc); |
65 | return (memcmp(&crc,footer+2,8) == 0) ? REDIS_OK : REDIS_ERR; | |
66 | } | |
67 | ||
68 | /* DUMP keyname | |
69 | * DUMP is actually not used by Redis Cluster but it is the obvious | |
70 | * complement of RESTORE and can be useful for different applications. */ | |
71 | void dumpCommand(redisClient *c) { | |
72 | robj *o, *dumpobj; | |
73 | rio payload; | |
74 | ||
75 | /* Check if the key is here. */ | |
76 | if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) { | |
77 | addReply(c,shared.nullbulk); | |
78 | return; | |
79 | } | |
80 | ||
81 | /* Create the DUMP encoded representation. */ | |
82 | createDumpPayload(&payload,o); | |
83 | ||
84 | /* Transfer to the client */ | |
85 | dumpobj = createObject(REDIS_STRING,payload.io.buffer.ptr); | |
86 | addReplyBulk(c,dumpobj); | |
87 | decrRefCount(dumpobj); | |
88 | return; | |
89 | } | |
90 | ||
571e257d | 91 | /* RESTORE key ttl serialized-value */ |
92 | void restoreCommand(redisClient *c) { | |
93 | long ttl; | |
94 | rio payload; | |
95 | int type; | |
96 | robj *obj; | |
97 | ||
98 | /* Make sure this key does not already exist here... */ | |
99 | if (lookupKeyWrite(c->db,c->argv[1]) != NULL) { | |
100 | addReplyError(c,"Target key name is busy."); | |
101 | return; | |
102 | } | |
103 | ||
104 | /* Check if the TTL value makes sense */ | |
105 | if (getLongFromObjectOrReply(c,c->argv[2],&ttl,NULL) != REDIS_OK) { | |
106 | return; | |
107 | } else if (ttl < 0) { | |
108 | addReplyError(c,"Invalid TTL value, must be >= 0"); | |
109 | return; | |
110 | } | |
111 | ||
bff31e12 | 112 | /* Verify RDB version and data checksum. */ |
113 | if (verifyDumpPayload(c->argv[3]->ptr,sdslen(c->argv[3]->ptr)) == REDIS_ERR) { | |
114 | addReplyError(c,"DUMP payload version or checksum are wrong"); | |
115 | return; | |
116 | } | |
117 | ||
571e257d | 118 | rioInitWithBuffer(&payload,c->argv[3]->ptr); |
119 | if (((type = rdbLoadObjectType(&payload)) == -1) || | |
120 | ((obj = rdbLoadObject(type,&payload)) == NULL)) | |
121 | { | |
122 | addReplyError(c,"Bad data format"); | |
123 | return; | |
124 | } | |
125 | ||
126 | /* Create the key and set the TTL if any */ | |
127 | dbAdd(c->db,c->argv[1],obj); | |
bff31e12 | 128 | if (ttl) setExpire(c->db,c->argv[1],mstime()+ttl); |
571e257d | 129 | signalModifiedKey(c->db,c->argv[1]); |
130 | addReply(c,shared.ok); | |
131 | server.dirty++; | |
132 | } | |
133 | ||
134 | /* MIGRATE host port key dbid timeout */ | |
135 | void migrateCommand(redisClient *c) { | |
136 | int fd; | |
137 | long timeout; | |
138 | long dbid; | |
3ba5eab7 | 139 | long long ttl = 0, expireat; |
571e257d | 140 | robj *o; |
141 | rio cmd, payload; | |
142 | ||
143 | /* Sanity check */ | |
144 | if (getLongFromObjectOrReply(c,c->argv[5],&timeout,NULL) != REDIS_OK) | |
145 | return; | |
146 | if (getLongFromObjectOrReply(c,c->argv[4],&dbid,NULL) != REDIS_OK) | |
147 | return; | |
d85a09df | 148 | if (timeout <= 0) timeout = 1000; |
571e257d | 149 | |
150 | /* Check if the key is here. If not we reply with success as there is | |
151 | * nothing to migrate (for instance the key expired in the meantime), but | |
152 | * we include such information in the reply string. */ | |
153 | if ((o = lookupKeyRead(c->db,c->argv[3])) == NULL) { | |
154 | addReplySds(c,sdsnew("+NOKEY\r\n")); | |
155 | return; | |
156 | } | |
157 | ||
158 | /* Connect */ | |
159 | fd = anetTcpNonBlockConnect(server.neterr,c->argv[1]->ptr, | |
160 | atoi(c->argv[2]->ptr)); | |
161 | if (fd == -1) { | |
162 | addReplyErrorFormat(c,"Can't connect to target node: %s", | |
163 | server.neterr); | |
164 | return; | |
165 | } | |
26a48504 | 166 | if ((aeWait(fd,AE_WRITABLE,timeout) & AE_WRITABLE) == 0) { |
ae3aeca8 | 167 | close(fd); |
37cc07dd | 168 | addReplySds(c,sdsnew("-IOERR error or timeout connecting to the client\r\n")); |
571e257d | 169 | return; |
170 | } | |
171 | ||
bff31e12 | 172 | /* Create RESTORE payload and generate the protocol to call the command. */ |
571e257d | 173 | rioInitWithBuffer(&cmd,sdsempty()); |
174 | redisAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',2)); | |
175 | redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"SELECT",6)); | |
176 | redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,dbid)); | |
177 | ||
fb8409a5 | 178 | expireat = getExpire(c->db,c->argv[3]); |
179 | if (expireat != -1) { | |
180 | ttl = expireat-mstime(); | |
181 | if (ttl < 1) ttl = 1; | |
182 | } | |
571e257d | 183 | redisAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',4)); |
184 | redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"RESTORE",7)); | |
185 | redisAssertWithInfo(c,NULL,c->argv[3]->encoding == REDIS_ENCODING_RAW); | |
186 | redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,c->argv[3]->ptr,sdslen(c->argv[3]->ptr))); | |
3ba5eab7 | 187 | redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,ttl)); |
571e257d | 188 | |
189 | /* Finally the last argument that is the serailized object payload | |
bff31e12 | 190 | * in the DUMP format. */ |
191 | createDumpPayload(&payload,o); | |
192 | redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,payload.io.buffer.ptr, | |
193 | sdslen(payload.io.buffer.ptr))); | |
571e257d | 194 | sdsfree(payload.io.buffer.ptr); |
195 | ||
196 | /* Tranfer the query to the other node in 64K chunks. */ | |
197 | { | |
198 | sds buf = cmd.io.buffer.ptr; | |
199 | size_t pos = 0, towrite; | |
200 | int nwritten = 0; | |
201 | ||
202 | while ((towrite = sdslen(buf)-pos) > 0) { | |
203 | towrite = (towrite > (64*1024) ? (64*1024) : towrite); | |
bde80cb2 | 204 | nwritten = syncWrite(fd,buf+pos,towrite,timeout); |
571e257d | 205 | if (nwritten != (signed)towrite) goto socket_wr_err; |
206 | pos += nwritten; | |
207 | } | |
208 | } | |
209 | ||
210 | /* Read back the reply. */ | |
211 | { | |
212 | char buf1[1024]; | |
213 | char buf2[1024]; | |
214 | ||
215 | /* Read the two replies */ | |
216 | if (syncReadLine(fd, buf1, sizeof(buf1), timeout) <= 0) | |
217 | goto socket_rd_err; | |
218 | if (syncReadLine(fd, buf2, sizeof(buf2), timeout) <= 0) | |
219 | goto socket_rd_err; | |
220 | if (buf1[0] == '-' || buf2[0] == '-') { | |
221 | addReplyErrorFormat(c,"Target instance replied with error: %s", | |
222 | (buf1[0] == '-') ? buf1+1 : buf2+1); | |
223 | } else { | |
224 | robj *aux; | |
225 | ||
226 | dbDelete(c->db,c->argv[3]); | |
227 | signalModifiedKey(c->db,c->argv[3]); | |
228 | addReply(c,shared.ok); | |
229 | server.dirty++; | |
230 | ||
231 | /* Translate MIGRATE as DEL for replication/AOF. */ | |
232 | aux = createStringObject("DEL",3); | |
233 | rewriteClientCommandVector(c,2,aux,c->argv[3]); | |
234 | decrRefCount(aux); | |
235 | } | |
236 | } | |
237 | ||
238 | sdsfree(cmd.io.buffer.ptr); | |
239 | close(fd); | |
240 | return; | |
241 | ||
242 | socket_wr_err: | |
37cc07dd | 243 | addReplySds(c,sdsnew("-IOERR error or timeout writing to target instance\r\n")); |
571e257d | 244 | sdsfree(cmd.io.buffer.ptr); |
245 | close(fd); | |
246 | return; | |
247 | ||
248 | socket_rd_err: | |
37cc07dd | 249 | addReplySds(c,sdsnew("-IOERR error or timeout reading from target node\r\n")); |
571e257d | 250 | sdsfree(cmd.io.buffer.ptr); |
251 | close(fd); | |
252 | return; | |
253 | } | |
8e2a225a | 254 | |
b0aa9bc8 | 255 | void stopKeyArchive(void) { |
b0aa9bc8 | 256 | redisAssert(env != NULL); |
8e2a225a | 257 | |
b0aa9bc8 JF |
258 | mdb_dbi_close(env, dbi); |
259 | mdb_env_close(env); | |
260 | env = NULL; | |
261 | ||
262 | server.mdb_state = REDIS_MDB_OFF; | |
263 | } | |
264 | ||
265 | int startKeyArchive(void) { | |
b0aa9bc8 JF |
266 | redisAssert(env == NULL); |
267 | ||
268 | int ret; | |
8e2a225a JF |
269 | |
270 | ret = mdb_env_create(&env); | |
b0aa9bc8 | 271 | if (ret != 0) return ret; |
8e2a225a JF |
272 | |
273 | ret = mdb_env_set_mapsize(env, server.mdb_mapsize); | |
b0aa9bc8 | 274 | if (ret != 0) return ret; |
8e2a225a JF |
275 | |
276 | ret = mdb_env_set_maxdbs(env, 1); | |
b0aa9bc8 | 277 | if (ret != 0) return ret; |
8e2a225a JF |
278 | |
279 | mkdir(server.mdb_environment, 0644); | |
280 | ||
281 | ret = mdb_env_open(env, server.mdb_environment, MDB_FIXEDMAP | MDB_NOSYNC, 0664); | |
b0aa9bc8 | 282 | if (ret != 0) return ret; |
8e2a225a JF |
283 | |
284 | MDB_txn *txn; | |
285 | ret = mdb_txn_begin(env, NULL, 0, &txn); | |
b0aa9bc8 | 286 | if (ret != 0) return ret; |
8e2a225a JF |
287 | |
288 | ret = mdb_open(txn, NULL, 0, &dbi); | |
b0aa9bc8 | 289 | if (ret != 0) return ret; |
8e2a225a JF |
290 | |
291 | mdb_txn_commit(txn); | |
b0aa9bc8 JF |
292 | |
293 | server.mdb_state = REDIS_MDB_ON; | |
294 | return 0; | |
8e2a225a JF |
295 | } |
296 | ||
297 | int archive(redisDb *db, robj *key) { | |
298 | if (server.mdb_state == REDIS_MDB_OFF) | |
299 | return 1; | |
b0aa9bc8 | 300 | redisAssert(env != NULL); |
8e2a225a JF |
301 | |
302 | MDB_val kval; | |
303 | kval.mv_data = key->ptr; | |
304 | kval.mv_size = sdslen((sds)key->ptr); | |
305 | ||
306 | robj *object; | |
307 | object = lookupKey(db, key); | |
308 | if (object == NULL) | |
309 | return 0; | |
310 | ||
311 | if (object->archived != 0) | |
5290f05c | 312 | return 1; |
8e2a225a JF |
313 | |
314 | rio payload; | |
315 | createDumpPayload(&payload, object); | |
316 | ||
317 | MDB_val dval; | |
318 | dval.mv_size = sdslen(payload.io.buffer.ptr); | |
319 | dval.mv_data = payload.io.buffer.ptr; | |
320 | ||
321 | int ret; | |
322 | ||
323 | MDB_txn *txn; | |
324 | ret = mdb_txn_begin(env, NULL, 0, &txn); | |
325 | if (ret != 0) | |
326 | goto archive_err; | |
327 | ||
328 | ret = mdb_put(txn, dbi, &kval, &dval, 0); | |
329 | if (ret != 0) { | |
330 | mdb_txn_abort(txn); | |
331 | goto archive_err; | |
332 | } | |
333 | ||
334 | mdb_txn_commit(txn); | |
335 | sdsfree(payload.io.buffer.ptr); | |
336 | return 1; | |
337 | ||
338 | archive_err: | |
339 | sdsfree(payload.io.buffer.ptr); | |
340 | redisAssert(0); | |
341 | return 0; | |
342 | } | |
343 | ||
344 | robj *recover(redisDb *db, robj *key) { | |
345 | if (server.mdb_state == REDIS_MDB_OFF) | |
346 | return NULL; | |
347 | ||
348 | int ret; | |
349 | ||
8e2a225a JF |
350 | MDB_val kval; |
351 | kval.mv_data = key->ptr; | |
352 | kval.mv_size = sdslen((sds)key->ptr); | |
353 | ||
354 | MDB_txn *txn; | |
355 | ret = mdb_txn_begin(env, NULL, 0, &txn); | |
356 | if (ret != 0) | |
357 | return NULL; | |
358 | ||
359 | MDB_cursor *cursor; | |
360 | ret = mdb_cursor_open(txn, dbi, &cursor); | |
361 | if (ret != 0) { | |
362 | mdb_txn_abort(txn); | |
363 | return NULL; | |
364 | } | |
365 | ||
366 | MDB_val pval; | |
367 | ret = mdb_cursor_get(cursor, &kval, &pval, MDB_SET); | |
368 | if (ret != 0) { | |
369 | mdb_txn_abort(txn); | |
370 | return NULL; | |
371 | } | |
372 | ||
373 | sds sval = sdsnewlen(pval.mv_data, pval.mv_size); | |
374 | mdb_cursor_close(cursor); | |
375 | mdb_txn_abort(txn); | |
376 | ||
377 | rio payload; | |
378 | rioInitWithBuffer(&payload, sval); | |
379 | ||
380 | int type = rdbLoadObjectType(&payload); | |
381 | if (type == -1) | |
382 | goto recover_err; | |
383 | ||
384 | robj *object = rdbLoadObject(type, &payload); | |
385 | if (object == NULL) | |
386 | goto recover_err; | |
387 | ||
388 | object->archived = 1; | |
389 | ||
390 | dbAdd(db, key, object); | |
391 | signalModifiedKey(db, key); | |
392 | server.dirty++; | |
393 | ||
394 | sdsfree(sval); | |
395 | return object; | |
396 | ||
397 | recover_err: | |
398 | sdsfree(sval); | |
399 | return NULL; | |
400 | } |