]>
Commit | Line | Data |
---|---|---|
51e135ce A |
1 | /* |
2 | * Copyright (c) 2010-2012 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <stdio.h> | |
30 | #include <stddef.h> | |
31 | #include <stdlib.h> | |
32 | #include <string.h> | |
33 | #include <limits.h> | |
34 | #include <err.h> | |
35 | #include <errno.h> | |
36 | #include <fcntl.h> | |
37 | #include <unistd.h> | |
38 | #include <stdarg.h> | |
39 | #include <sys/types.h> | |
40 | #include <sys/param.h> | |
41 | #include <sys/stat.h> | |
42 | #include <sys/ioctl.h> | |
43 | #include <sys/disk.h> | |
44 | #include <sys/param.h> | |
45 | ||
46 | #include "../fsck_hfs.h" | |
47 | #include "fsck_journal.h" | |
48 | ||
41dcebd9 A |
49 | #define DEBUG_JOURNAL 0 |
50 | ||
51e135ce A |
51 | extern char debug; |
52 | ||
53 | #include <hfs/hfs_format.h> | |
54 | #include <libkern/OSByteOrder.h> | |
55 | ||
56 | typedef struct SwapType { | |
57 | const char *name; | |
58 | uint16_t (^swap16)(uint16_t); | |
59 | uint32_t (^swap32)(uint32_t); | |
60 | uint64_t (^swap64)(uint64_t); | |
61 | } swapper_t; | |
62 | ||
63 | static swapper_t nativeEndian = { | |
64 | "native endian", | |
65 | ^(uint16_t x) { return x; }, | |
66 | ^(uint32_t x) { return x; }, | |
67 | ^(uint64_t x) { return x; } | |
68 | }; | |
69 | ||
70 | static swapper_t swappedEndian = { | |
71 | "swapped endian", | |
72 | ^(uint16_t x) { return OSSwapInt16(x); }, | |
73 | ^(uint32_t x) { return OSSwapInt32(x); }, | |
74 | ^(uint64_t x) { return OSSwapInt64(x); } | |
75 | }; | |
76 | ||
77 | typedef int (^journal_write_block_t)(off_t, void *, size_t); | |
78 | ||
79 | // | |
80 | // this isn't a great checksum routine but it will do for now. | |
81 | // we use it to checksum the journal header and the block list | |
82 | // headers that are at the start of each transaction. | |
83 | // | |
84 | static uint32_t | |
85 | calc_checksum(char *ptr, int len) | |
86 | { | |
87 | int i; | |
88 | uint32_t cksum = 0; | |
89 | ||
90 | // this is a lame checksum but for now it'll do | |
91 | for(i = 0; i < len; i++, ptr++) { | |
92 | cksum = (cksum << 8) ^ (cksum + *(unsigned char *)ptr); | |
93 | } | |
94 | ||
95 | return (~cksum); | |
96 | } | |
97 | ||
98 | typedef struct JournalIOInfo { | |
99 | int jfd; // File descriptor for journal buffer | |
100 | int wrapCount; // Incremented when it wraps around. | |
101 | size_t bSize; // Block size. I/O needs to be done in that amount. | |
102 | uint64_t base; // Base offset of journal buffer, past the header | |
103 | uint64_t size; // Size of the journal, minus the header size | |
104 | uint64_t end; // End of the journal (initially the "end" field from the journal header) | |
105 | uint64_t current; // Current offset; starts at "start" | |
106 | } JournalIOInfo_t; | |
107 | ||
108 | /* | |
109 | * Attempt to read <length> bytes from the journal buffer. | |
110 | * Since this is a wrapped buffer, it may have to start at the | |
111 | * beginning. info->{base, size, end} are read-only; info->current | |
112 | * is updated with the current offset. It returns the number of bytes | |
113 | * it read, or -1 on error. | |
114 | */ | |
115 | static ssize_t | |
116 | journalRead(JournalIOInfo_t *info, uint8_t *buffer, size_t length) | |
117 | { | |
118 | size_t nread = 0; | |
119 | uint8_t *ptr = buffer; | |
120 | ||
121 | // fprintf(stderr, "%s(%p, %p, %zu)\n", __FUNCTION__, info, buffer, length); | |
122 | if (info->wrapCount > 1) { | |
123 | fplog(stderr, "%s(%p, %p, %zu): journal buffer wrap count = %d\n", __FUNCTION__, info, buffer, length, info->wrapCount); | |
124 | return -1; | |
125 | } | |
126 | while (nread < length) { | |
127 | off_t end; | |
128 | size_t amt; | |
129 | ssize_t n; | |
130 | ||
131 | if (info->end < info->current) { | |
132 | // It wraps, so we max out at bse+size | |
133 | end = info->base + info->size; | |
134 | } else { | |
135 | end = info->end; | |
136 | } | |
137 | amt = MIN((length - nread), (end - info->current)); | |
138 | if (amt == 0) { | |
139 | if (debug) { | |
140 | fplog(stderr, "Journal read amount is 0, is that right?\n"); | |
141 | } | |
142 | goto done; | |
143 | } | |
144 | ||
145 | n = pread(info->jfd, ptr, amt, info->current); | |
146 | if (n == -1) { | |
147 | warn("pread(%d, %p, %zu, %llu)", info->jfd, ptr, amt, info->current); | |
148 | goto done; | |
149 | } | |
150 | if (n != amt) { | |
151 | if (debug) { | |
152 | fplog(stderr, "%s(%d): Wanted to read %zu, but only read %zd\n", __FUNCTION__, __LINE__, amt, n); | |
153 | } | |
154 | } | |
155 | nread += n; | |
156 | ptr += n; | |
157 | info->current += n; | |
158 | if (info->current == (info->base + info->size)) { | |
159 | info->current = info->base; | |
160 | info->wrapCount++; | |
161 | } | |
162 | } | |
163 | done: | |
164 | return nread; | |
165 | } | |
166 | ||
167 | /* | |
168 | * Read a transaction from the journal buffer. | |
169 | * A transaction is a list of block_list_headers, and their | |
170 | * associated data. It needs to read all of the block_lists in | |
171 | * a transaction, or it fails. It returns NULL if there are | |
172 | * no transactions, and on error. (Maybe that should change?) | |
173 | */ | |
174 | static block_list_header * | |
175 | getJournalTransaction(JournalIOInfo_t *jinfo, swapper_t *swap) | |
176 | { | |
177 | block_list_header *retval = NULL; | |
178 | uint8_t block[jinfo->bSize]; | |
179 | block_list_header *hdr = (void*)█ | |
180 | ssize_t nread; | |
181 | ssize_t amt; | |
182 | ||
183 | memset(block, 0, sizeof(block)); | |
184 | nread = journalRead(jinfo, block, sizeof(block)); | |
185 | if (nread == -1 || | |
186 | (size_t)nread != sizeof(block)) { | |
187 | if (debug) | |
188 | plog("%s: wanted %zd, got %zd\n", __FUNCTION__, sizeof(block), nread); | |
189 | return NULL; | |
190 | } | |
191 | if (swap->swap32(hdr->num_blocks) == 0) { | |
192 | /* | |
193 | * Either there really are no blocks, or this is not a valid | |
194 | * transaction. Either way, there's nothing for us to do here. | |
195 | */ | |
41dcebd9 | 196 | #if DEBUG_JOURNAL |
51e135ce A |
197 | if (debug) |
198 | fplog(stderr, "%s(%d): hdr->num_blocks == 0\n", __FUNCTION__, __LINE__); | |
41dcebd9 | 199 | #endif |
51e135ce A |
200 | return NULL; |
201 | } | |
202 | /* | |
203 | * Now we check the checksum to see if this is a valid header. | |
204 | * Note that we verify the checksum before reading any more -- if | |
205 | * it's not a valid header, we don't want to read more than a block | |
206 | * size. | |
207 | */ | |
208 | uint32_t tmpChecksum = swap->swap32(hdr->checksum); | |
209 | uint32_t compChecksum; | |
210 | hdr->checksum = 0; | |
211 | compChecksum = calc_checksum((void*)hdr, sizeof(*hdr)); | |
212 | hdr->checksum = swap->swap32(tmpChecksum); | |
213 | ||
214 | if (compChecksum != tmpChecksum) { | |
215 | if (debug) | |
216 | fplog(stderr, "%s(%d): hdr has bad checksum, returning NULL\n", __FUNCTION__, __LINE__); | |
217 | return NULL; | |
218 | } | |
219 | ||
220 | if (swap->swap32(hdr->bytes_used) < sizeof(block)) { | |
41dcebd9 | 221 | #if DEBUG_JOURNAL |
51e135ce A |
222 | if (debug) { |
223 | fplog(stderr, "%s(%d): hdr has bytes_used (%u) less than sizeof block (%zd)\n", | |
224 | __FUNCTION__, __LINE__, swap->swap32(hdr->bytes_used), sizeof(block)); | |
225 | } | |
41dcebd9 | 226 | #endif |
51e135ce A |
227 | return NULL; |
228 | } | |
229 | ||
230 | retval = malloc(swap->swap32(hdr->bytes_used)); | |
231 | if (retval == NULL) | |
232 | return NULL; | |
233 | ||
234 | memset(retval, 0, swap->swap32(hdr->bytes_used)); | |
235 | memcpy(retval, block, sizeof(block)); | |
236 | amt = swap->swap32(hdr->bytes_used) - sizeof(block); | |
237 | nread = journalRead(jinfo, ((uint8_t*)retval) + sizeof(block), amt); | |
238 | if (nread != amt) { | |
239 | free(retval); | |
240 | return NULL; | |
241 | } | |
242 | ||
243 | return retval; | |
244 | } | |
245 | ||
246 | /* | |
247 | * Replay a transaction. | |
248 | * Transactions have a blockListSize amount of block_list_header, and | |
249 | * are then followed by data. We read it in, verify the checksum, and | |
250 | * if it's good, we call the block that was passed in to do something | |
251 | * with it. Maybe write it out. Maybe laugh about it. | |
252 | * | |
253 | * It returns -1 if there was an error before it wrote anything out, | |
254 | * and -2 if there was an error after it wrote something out. | |
255 | * | |
256 | * The arguments are: | |
257 | * txn -- a block_list_header pointer, which has the description and data | |
258 | * to be replayed. | |
259 | * blSize -- the size of the block_list for this journal. (The data | |
260 | * are after the block_list, but part of the same buffer.) | |
261 | * blkSize -- The block size used to convert block numbers to offsets. This | |
262 | * is defined to be the size of the journal header. | |
263 | * swap -- A pointer to a swapper_t used to swap journal data structure elements. | |
264 | * writer -- A block-of-code that does writing. | |
265 | * | |
266 | * "writer" should return -1 to stop the replay (this propagates an error up). | |
267 | */ | |
268 | static int | |
269 | replayTransaction(block_list_header *txn, size_t blSize, size_t blkSize, swapper_t *swap, journal_write_block_t writer) | |
270 | { | |
271 | uint32_t i; | |
272 | uint8_t *endPtr = ((uint8_t*)txn) + swap->swap32(txn->bytes_used); | |
273 | uint8_t *dataPtr = ((uint8_t*)txn) + blSize; | |
274 | int retval = -1; | |
275 | for (i = 1; i < swap->swap32(txn->num_blocks); i++) { | |
41dcebd9 | 276 | #if DEBUG_JOURNAL |
51e135ce A |
277 | if (debug) |
278 | plog("\tBlock %d: blkNum %llu, size %u, data offset = %zd\n", i, swap->swap64(txn->binfo[i].bnum), swap->swap32(txn->binfo[i].bsize), dataPtr - (uint8_t*)txn); | |
41dcebd9 | 279 | #endif |
51e135ce A |
280 | /* |
281 | * XXX | |
282 | * Check with security types on these checks. Need to ensure | |
283 | * that the fields don't take us off into the dark scary woods. | |
284 | * It's mostly the second one that I am unsure about. | |
285 | */ | |
286 | if (dataPtr > endPtr) { | |
287 | if (debug) | |
288 | plog("\tData out of range for block_list_header\n"); | |
289 | return retval; | |
290 | } | |
291 | if ((endPtr - dataPtr) < swap->swap32(txn->binfo[i].bsize)) { | |
292 | if (debug) | |
293 | plog("\tData size for block %d out of range for block_list_header\n", i); | |
294 | return retval; | |
295 | } | |
296 | if ((dataPtr + swap->swap32(txn->binfo[i].bsize)) > endPtr) { | |
297 | if (debug) | |
298 | plog("\tData end out of range for block_list_header\n"); | |
299 | return retval; | |
300 | } | |
41dcebd9 | 301 | #if DEBUG_JOURNAL |
51e135ce A |
302 | // Just for debugging |
303 | if (debug) { | |
304 | if (swap->swap64(txn->binfo[i].bnum) == 2) { | |
305 | HFSPlusVolumeHeader *vp = (void*)dataPtr; | |
306 | plog("vp->signature = %#x, version = %#x\n", vp->signature, vp->version); | |
307 | } | |
308 | } | |
41dcebd9 | 309 | #endif |
51e135ce A |
310 | // It's in the spec, and I saw it come up once on a live volume. |
311 | if (swap->swap64(txn->binfo[i].bnum) == ~(uint64_t)0) { | |
41dcebd9 | 312 | #if DEBUG_JOURNAL |
51e135ce A |
313 | if (debug) |
314 | plog("\tSkipping this block due to magic skip number\n"); | |
41dcebd9 | 315 | #endif |
51e135ce A |
316 | } else { |
317 | // Should we set retval to -2 here? | |
318 | if (writer) { | |
319 | if ((writer)(swap->swap64(txn->binfo[i].bnum) * blkSize, dataPtr, swap->swap32(txn->binfo[i].bsize)) == -1) | |
320 | return retval; | |
321 | } | |
322 | } | |
323 | dataPtr += swap->swap32(txn->binfo[i].bsize); | |
324 | retval = -2; | |
325 | } | |
326 | return 0; | |
327 | } | |
328 | ||
329 | /* | |
330 | * Read a journal header in from the journal device. | |
331 | */ | |
332 | static int | |
333 | loadJournalHeader(int jfd, off_t offset, size_t blockSize, journal_header *jhp) | |
334 | { | |
335 | uint8_t buffer[blockSize]; | |
336 | ssize_t nread; | |
337 | ||
338 | nread = pread(jfd, buffer, sizeof(buffer), offset); | |
339 | if (nread == -1 || | |
340 | (size_t)nread != sizeof(buffer)) { | |
341 | warn("tried to read %zu for journal header buffer, got %zd", sizeof(buffer), nread); | |
342 | return -1; | |
343 | } | |
344 | *jhp = *(journal_header*)buffer; | |
345 | return 0; | |
346 | } | |
347 | ||
348 | /* | |
349 | * Replay a journal (called "journal_open" because you have to | |
350 | * to replay it as part of opening it). At this point, all it | |
351 | * is useful for is replaying the journal. | |
352 | * | |
353 | * It is passed in: | |
354 | * jfd -- file descriptor for the journal device | |
355 | * offset -- offset (in bytes) of the journal on the journal device | |
356 | * journal_size -- size of the jorunal (in bytes) | |
357 | * min_fs_blksize -- Blocksize of the data filesystem | |
358 | * flags -- unused for now | |
359 | * jdev_name -- string name for the journal device. used for logging. | |
360 | * do_write_b -- a block which does the actual writing. | |
361 | * | |
362 | * Currently, for fsck_hfs, the do_write_b block writes to the cache. It could also | |
363 | * just print out the block numbers, or just check their integrity, as much as is | |
364 | * possible. | |
365 | * | |
366 | * The function works by loading the journal header. From there, it then starts | |
367 | * loading transactions, via block_list_header groups. When it gets to the end | |
368 | * of the journal, it tries continuing, in case there were transactions that | |
369 | * didn't get updated in the header (this apparently happens). | |
370 | * | |
371 | * It returns 0 on success, and -1 on error. Note that there's not a lot | |
372 | * fsck_hfs can probably do in the event of error. | |
373 | * | |
374 | */ | |
375 | int | |
376 | journal_open(int jfd, | |
377 | off_t offset, // Offset of journal | |
378 | off_t journal_size, // Size, in bytes, of the entire journal | |
379 | size_t min_fs_blksize, // Blocksize of the data filesystem, journal blocksize must be at least this size | |
380 | uint32_t flags __unused, // Not used in this implementation | |
381 | const char *jdev_name, // The name of the journal device, for logging | |
382 | int (^do_write_b)(off_t, void*, size_t)) | |
383 | { | |
384 | journal_header jhdr = { 0 }; | |
385 | swapper_t *jnlSwap; // Used to swap fields of the journal | |
386 | uint32_t tempCksum; // Temporary checksum value | |
387 | uint32_t jBlkSize = 0; | |
388 | ||
389 | if (ioctl(jfd, DKIOCGETBLOCKSIZE, &jBlkSize) == -1) { | |
927b7b56 | 390 | jBlkSize = (uint32_t)min_fs_blksize; |
51e135ce A |
391 | } else { |
392 | if (jBlkSize < min_fs_blksize) { | |
393 | fplog(stderr, "%s: journal block size %u < min block size %zu for %s\n", __FUNCTION__, jBlkSize, min_fs_blksize, jdev_name); | |
394 | return -1; | |
395 | } | |
396 | if ((jBlkSize % min_fs_blksize) != 0) { | |
397 | fplog(stderr, "%s: journal block size %u is not a multiple of fs block size %zu for %s\n", __FUNCTION__, jBlkSize, min_fs_blksize, jdev_name); | |
398 | return -1; | |
399 | } | |
400 | } | |
401 | if (loadJournalHeader(jfd, offset, jBlkSize, &jhdr) != 0) { | |
402 | fplog(stderr, "%s: unable to load journal header from %s\n", __FUNCTION__, jdev_name); | |
403 | return -1; | |
404 | } | |
405 | ||
406 | /* | |
407 | * Unlike the rest of the filesystem, the journal can be in native or | |
408 | * non-native byte order. Barring moving a filesystem from one host | |
409 | * to another, it'll almost always be in native byte order. | |
410 | */ | |
411 | if (jhdr.endian == ENDIAN_MAGIC) { | |
412 | jnlSwap = &nativeEndian; | |
413 | } else if (OSSwapInt32(jhdr.endian) == ENDIAN_MAGIC) { | |
414 | jnlSwap = &swappedEndian; | |
415 | } else { | |
416 | fplog(stderr, "%s: Unknown journal endian magic number %#x from %s\n", __FUNCTION__, jhdr.endian, jdev_name); | |
417 | return -1; | |
418 | } | |
419 | /* | |
420 | * Two different magic numbers are valid. | |
421 | * Do they mean different thigs, though? | |
422 | */ | |
423 | if (jnlSwap->swap32(jhdr.magic) != JOURNAL_HEADER_MAGIC && | |
424 | jnlSwap->swap32(jhdr.magic) != OLD_JOURNAL_HEADER_MAGIC) { | |
425 | fplog(stderr, "%s: Unknown journal header magic number %#x from %s\n", __FUNCTION__, jhdr.magic, jdev_name); | |
426 | return -1; | |
427 | } | |
428 | ||
429 | /* | |
430 | * Checksums have to be done with the checksum field set to 0. | |
431 | * So we have to stash it aside for a bit, and set the field to | |
432 | * 0, before we can compare. Afterwards, if it compares correctly, | |
433 | * we put the original (swapped, if necessary) value back, just | |
434 | * in case. | |
435 | */ | |
436 | tempCksum = jnlSwap->swap32(jhdr.checksum); | |
437 | jhdr.checksum = 0; | |
438 | if (jnlSwap->swap32(jhdr.magic) == JOURNAL_HEADER_MAGIC && | |
439 | (calc_checksum((void*)&jhdr, JOURNAL_HEADER_CKSUM_SIZE) != tempCksum)) { | |
440 | fplog(stderr, "%s: Invalid journal checksum from %s\n", __FUNCTION__, jdev_name); | |
441 | return -1; | |
442 | } | |
443 | jhdr.checksum = jnlSwap->swap32(tempCksum); | |
444 | ||
445 | /* | |
446 | * Set up information about the journal which we use to do the I/O. | |
447 | * The journal is a circular buffer. However, the start of the journal | |
448 | * buffer is past the journal header. See the JournalIOInfo structure above. | |
449 | */ | |
450 | off_t startOffset = jnlSwap->swap64(jhdr.start); | |
451 | off_t endOffset =jnlSwap->swap64(jhdr.end); | |
452 | off_t journalStart = offset + jnlSwap->swap32(jhdr.jhdr_size); | |
453 | ||
454 | /* | |
455 | * The journal code was updated to be able to read past the "end" of the journal, | |
456 | * to see if there were any valid transactions there. If we are peeking past the | |
457 | * end, we don't care if we have checksum errors -- that just means they're not | |
458 | * valid transactions. | |
459 | * | |
460 | */ | |
461 | int into_the_weeds = 0; | |
462 | uint32_t last_sequence_number = 0; | |
463 | ||
464 | JournalIOInfo_t jinfo = { 0 }; | |
465 | ||
41dcebd9 | 466 | #if DEBUG_JOURNAL |
51e135ce A |
467 | if (debug) |
468 | plog("Journal start sequence number = %u\n", jnlSwap->swap32(jhdr.sequence_num)); | |
41dcebd9 | 469 | #endif |
51e135ce A |
470 | |
471 | /* | |
472 | * Now set up the JournalIOInfo object with the file descriptor, | |
473 | * the block size, start and end of the journal buffer, and where | |
474 | * the journal pointer currently is. | |
475 | */ | |
476 | jinfo.jfd = jfd; | |
477 | jinfo.bSize = jnlSwap->swap32(jhdr.jhdr_size); | |
478 | jinfo.base = journalStart; | |
479 | jinfo.size = journal_size - jinfo.bSize; | |
480 | jinfo.end = offset + endOffset; | |
481 | jinfo.current = offset + startOffset; | |
482 | ||
483 | const char *state = ""; | |
484 | int bad_journal = 0; | |
485 | block_list_header *txn = NULL; | |
486 | ||
487 | /* | |
488 | * Loop while getting transactions. We exit when we hit a checksum | |
489 | * error, or when the sequence number for a transaction doesn't match | |
490 | * what we expect it to. (That's the trickiest part -- the into_the_weeds | |
491 | * portion of the code. It doesn't match the TN11150 documentation, so | |
492 | * I've had to go by both my experience with real-world journals and by | |
493 | * looking at the kernel code.) | |
494 | */ | |
495 | while (1) { | |
496 | int rv; | |
497 | ||
498 | if (jinfo.current == jinfo.end && into_the_weeds == 0) { | |
499 | /* | |
500 | * This is a bit weird, but it works: if current == end, but gone_into_weeds is 1, | |
501 | * then this code will not execute. If it does execute, it'll go to get a transaction. | |
502 | * That will put the pointer past end. | |
503 | */ | |
504 | if (jhdr.sequence_num == 0) { | |
505 | /* | |
506 | * XXX | |
507 | * I am not sure about this; this behaviour is not in TN1150 at all, | |
508 | * but I _think_ this is what the kernel is doing. | |
7adaf79d A |
509 | */ |
510 | #if DEBUG_JOURNAL | |
511 | if (debug) | |
512 | plog("Journal sequence number is 0, is going into the end okay?\n"); | |
513 | #endif | |
51e135ce A |
514 | } |
515 | into_the_weeds = 1; | |
41dcebd9 | 516 | #if DEBUG_JOURNAL |
51e135ce A |
517 | if (debug) |
518 | plog("Attempting to read past stated end of journal\n"); | |
41dcebd9 | 519 | #endif |
51e135ce A |
520 | state = "tentative "; |
521 | jinfo.end = (jinfo.base + startOffset - jinfo.bSize); | |
522 | continue; | |
523 | } | |
41dcebd9 | 524 | #if DEBUG_JOURNAL |
51e135ce A |
525 | if (debug) |
526 | plog("Before getting %stransaction: jinfo.current = %llu\n", state, jinfo.current); | |
41dcebd9 | 527 | #endif |
51e135ce A |
528 | /* |
529 | * Note that getJournalTransaction verifies the checksum on the block_list_header, so | |
530 | * if it's bad, it'll return NULL. | |
531 | */ | |
532 | txn = getJournalTransaction(&jinfo, jnlSwap); | |
533 | if (txn == NULL) { | |
41dcebd9 | 534 | #if DEBUG_JOURNAL |
51e135ce A |
535 | if (debug) |
536 | plog("txn is NULL, jinfo.current = %llu\n", jinfo.current); | |
41dcebd9 | 537 | #endif |
51e135ce | 538 | if (into_the_weeds) { |
41dcebd9 | 539 | #if DEBUG_JOURNAL |
51e135ce A |
540 | if (debug) |
541 | plog("\tBut we do not care, since it is past the end of the journal\n"); | |
41dcebd9 | 542 | #endif |
51e135ce A |
543 | } else { |
544 | bad_journal = 1; | |
545 | } | |
546 | break; | |
547 | } | |
41dcebd9 | 548 | #if DEBUG_JOURNAL |
51e135ce A |
549 | if (debug) { |
550 | plog("After getting %stransaction: jinfo.current = %llu\n", state, jinfo.current); | |
551 | plog("%stxn = { %u max_blocks, %u num_blocks, %u bytes_used, binfo[0].next = %u }\n", state, jnlSwap->swap32(txn->max_blocks), jnlSwap->swap32(txn->num_blocks), jnlSwap->swap32(txn->bytes_used), jnlSwap->swap32(txn->binfo[0].next)); | |
552 | } | |
41dcebd9 | 553 | #endif |
51e135ce A |
554 | if (into_the_weeds) { |
555 | /* | |
556 | * This seems to be what the kernel was checking: if the | |
557 | * last_sequence_number was set, and the txn sequence number | |
558 | * is set, and the txn sequence number doesn't match either | |
559 | * last_sequence_number _or_ an incremented version of it, then | |
560 | * the transaction isn't worth looking at, and we've reached | |
561 | * the end of the journal. | |
562 | */ | |
563 | if (last_sequence_number != 0 && | |
564 | txn->binfo[0].next != 0 && | |
565 | jnlSwap->swap32(txn->binfo[0].next) != last_sequence_number && | |
566 | jnlSwap->swap32(txn->binfo[0].next) != (last_sequence_number + 1)) { | |
567 | // Probably not a valid transaction | |
41dcebd9 | 568 | #if DEBUG_JOURNAL |
51e135ce A |
569 | if (debug) |
570 | plog("\tTentative txn sequence %u is not expected %u, stopping journal replay\n", jnlSwap->swap32(txn->binfo[0].next), last_sequence_number + 1); | |
41dcebd9 | 571 | #endif |
51e135ce A |
572 | break; |
573 | } | |
574 | } | |
575 | /* | |
576 | * If we've got a valid transaction, then we replay it. | |
577 | * If there was an error, we're done with the journal replay. | |
578 | * (If the error occurred after the "end," then we don't care, | |
579 | * and it's not a bad journal.) | |
580 | */ | |
581 | rv = replayTransaction(txn, | |
582 | jnlSwap->swap32(jhdr.blhdr_size), | |
583 | jnlSwap->swap32(jhdr.jhdr_size), | |
584 | jnlSwap, | |
585 | do_write_b); | |
586 | ||
587 | if (rv < 0) { | |
588 | if (debug) | |
589 | plog("\tTransaction replay failed, returned %d\n", rv); | |
590 | if (into_the_weeds) { | |
591 | if (debug) | |
592 | plog("\t\tAnd we don't care\n"); | |
593 | } else { | |
594 | bad_journal = 1; | |
595 | } | |
596 | break; | |
597 | } | |
598 | last_sequence_number = jnlSwap->swap32(txn->binfo[0].next); | |
599 | free(txn); | |
600 | txn = NULL; | |
601 | } | |
602 | if (txn) | |
603 | free(txn); | |
604 | if (bad_journal) { | |
605 | if (debug) | |
606 | plog("Journal was bad, stopped replaying\n"); | |
607 | return -1; | |
608 | } | |
609 | ||
610 | return 0; | |
611 | } |