]>
Commit | Line | Data |
---|---|---|
de8ee011 A |
1 | // |
2 | // lf_hfs_btrees_io.c | |
3 | // livefiles_hfs | |
4 | // | |
5 | // Created by Yakov Ben Zaken on 22/03/2018. | |
6 | // | |
7 | ||
8 | #include <sys/mount.h> | |
9 | #include "lf_hfs_btrees_io.h" | |
10 | #include "lf_hfs.h" | |
11 | #include "lf_hfs_xattr.h" | |
12 | #include "lf_hfs_cnode.h" | |
13 | #include "lf_hfs_endian.h" | |
14 | #include "lf_hfs_btrees_io.h" | |
15 | #include "lf_hfs_utils.h" | |
16 | #include "lf_hfs_file_mgr_internal.h" | |
17 | #include "lf_hfs_btrees_private.h" | |
18 | #include "lf_hfs_generic_buf.h" | |
19 | #include "lf_hfs_vfsutils.h" | |
20 | #include "lf_hfs_vfsops.h" | |
21 | #include "lf_hfs_readwrite_ops.h" | |
22 | #include "lf_hfs_file_extent_mapping.h" | |
23 | #include "lf_hfs_vnops.h" | |
24 | #include "lf_hfs_journal.h" | |
25 | ||
26 | static int ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount); | |
27 | static int btree_journal_modify_block_end(struct hfsmount *hfsmp, GenericLFBuf *bp); | |
28 | void btree_swap_node(GenericLFBuf *bp, __unused void *arg); | |
29 | ||
30 | /* | |
31 | * Return btree node size for given vnode. | |
32 | * | |
33 | * Returns: | |
34 | * For btree vnode, returns btree node size. | |
35 | * For non-btree vnodes, returns 0. | |
36 | */ | |
37 | u_int16_t get_btree_nodesize(struct vnode *vp) | |
38 | { | |
39 | BTreeControlBlockPtr btree; | |
40 | u_int16_t node_size = 0; | |
41 | ||
42 | if (vnode_issystem(vp)) { | |
43 | btree = (BTreeControlBlockPtr) VTOF(vp)->fcbBTCBPtr; | |
44 | if (btree) { | |
45 | node_size = btree->nodeSize; | |
46 | } | |
47 | } | |
48 | ||
49 | return node_size; | |
50 | } | |
51 | ||
52 | OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, __unused ItemCount minBlockCount) | |
53 | { | |
54 | BTreeControlBlockPtr bTreePtr; | |
55 | ||
56 | hfs_assert(vp != NULL); | |
57 | hfs_assert(blockSize >= kMinNodeSize); | |
58 | if (blockSize > MAXBSIZE ) | |
59 | return (fsBTBadNodeSize); | |
60 | ||
61 | bTreePtr = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr; | |
62 | bTreePtr->nodeSize = blockSize; | |
63 | ||
64 | return (E_NONE); | |
65 | } | |
66 | ||
67 | ||
68 | OSStatus GetBTreeBlock(FileReference vp, uint64_t blockNum, GetBlockOptions options, BlockDescriptor *block) | |
69 | { | |
70 | OSStatus retval = E_NONE; | |
71 | GenericLFBufPtr bp = NULL; | |
72 | u_int8_t allow_empty_node; | |
73 | ||
74 | /* If the btree block is being read using hint, it is | |
75 | * fine for the swap code to find zeroed out nodes. | |
76 | */ | |
77 | if (options & kGetBlockHint) { | |
78 | allow_empty_node = true; | |
79 | } else { | |
80 | allow_empty_node = false; | |
81 | } | |
82 | ||
83 | if (options & kGetEmptyBlock) { | |
84 | daddr64_t blkno; | |
85 | off_t offset; | |
86 | ||
87 | offset = (daddr64_t)blockNum * (daddr64_t)block->blockSize; | |
88 | bp = lf_hfs_generic_buf_allocate(vp, blockNum, (uint32_t)block->blockSize, GEN_BUF_IS_UPTODATE | GEN_BUF_LITTLE_ENDIAN); | |
89 | if (bp && !hfs_vnop_blockmap(&(struct vnop_blockmap_args){ | |
90 | .a_vp = vp, | |
91 | .a_foffset = offset, | |
92 | .a_size = block->blockSize, | |
93 | .a_bpn = &blkno | |
94 | })) { | |
95 | } | |
96 | } else { | |
97 | bp = lf_hfs_generic_buf_allocate(vp, blockNum, (uint32_t)block->blockSize, 0); | |
98 | retval = lf_hfs_generic_buf_read( bp ); | |
99 | } | |
100 | if (bp == NULL) | |
101 | retval = -1; //XXX need better error | |
102 | ||
103 | if (retval == E_NONE) { | |
104 | block->blockHeader = bp; | |
105 | block->buffer = bp->pvData; | |
106 | block->blockNum = bp->uBlockN; | |
107 | block->blockReadFromDisk = !(bp->uCacheFlags & GEN_BUF_LITTLE_ENDIAN); | |
108 | ||
109 | // XXXdbg | |
110 | block->isModified = 0; | |
111 | ||
112 | /* Check and endian swap B-Tree node (only if it's a valid block) */ | |
113 | if (!(options & kGetEmptyBlock)) | |
114 | { | |
115 | /* This happens when we first open the b-tree, we might not have all the node data on hand */ | |
116 | if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) && | |
117 | (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != bp->uValidBytes) && | |
118 | (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != bp->uValidBytes)) { | |
119 | ||
120 | /* | |
121 | * Don't swap the node descriptor, record offsets, or other records. | |
122 | * This record will be invalidated and re-read with the correct node | |
123 | * size once the B-tree control block is set up with the node size | |
124 | * from the header record. | |
125 | */ | |
126 | retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly, allow_empty_node); | |
127 | } | |
128 | else | |
129 | { | |
130 | /* | |
131 | * In this case, we have enough data in-hand to do basic validation | |
132 | * on the B-Tree node. | |
133 | */ | |
134 | if (block->blockReadFromDisk) | |
135 | { | |
136 | /* | |
137 | * The node was just read from disk, so always swap/check it. | |
138 | * This is necessary on big endian since the test below won't trigger. | |
139 | */ | |
140 | retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node); | |
141 | } | |
142 | else { | |
143 | /* | |
144 | * Block wasn't read from disk; it was found in the cache. | |
145 | */ | |
146 | if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) { | |
147 | /* | |
148 | * The node was left in the cache in non-native order, so swap it. | |
149 | * This only happens on little endian, after the node is written | |
150 | * back to disk. | |
151 | */ | |
152 | retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node); | |
153 | } | |
154 | else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x000e) { | |
155 | /* | |
156 | * The node was in-cache in native-endianness. We don't need to do | |
157 | * anything here, because the node is ready to use. Set retval == 0. | |
158 | */ | |
159 | retval = 0; | |
160 | } | |
161 | /* | |
162 | * If the node doesn't have hex 14 (0xe) in the last two bytes of the buffer, | |
163 | * it doesn't necessarily mean that this is a bad node. Zeroed nodes that are | |
164 | * marked as unused in the b-tree map node would be OK and not have valid content. | |
165 | */ | |
166 | } | |
167 | } | |
168 | } | |
169 | } | |
170 | ||
171 | if (retval) { | |
172 | if (bp) { | |
173 | lf_hfs_generic_buf_release(bp); | |
174 | } | |
175 | block->blockHeader = NULL; | |
176 | block->buffer = NULL; | |
177 | } | |
178 | ||
179 | return (retval); | |
180 | } | |
181 | ||
182 | ||
183 | void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr) | |
184 | { | |
185 | struct hfsmount *hfsmp = VTOHFS(vp); | |
186 | GenericLFBuf *bp = NULL; | |
187 | ||
188 | if (hfsmp->jnl == NULL) { | |
189 | return; | |
190 | } | |
191 | ||
192 | bp = (GenericLFBuf *) blockPtr->blockHeader; | |
193 | ||
194 | if (bp == NULL) { | |
195 | LFHFS_LOG(LEVEL_ERROR, "ModifyBlockStart: ModifyBlockStart: null bp for blockdescptr %p?!?\n", blockPtr); | |
196 | hfs_assert(0); | |
197 | return; | |
198 | } | |
199 | ||
200 | journal_modify_block_start(hfsmp->jnl, bp); | |
201 | blockPtr->isModified = 1; | |
202 | } | |
203 | ||
204 | void | |
205 | btree_swap_node(GenericLFBuf *bp, __unused void *arg) | |
206 | { | |
207 | lf_hfs_generic_buf_lock(bp); | |
208 | ||
209 | if (!(bp->uCacheFlags & GEN_BUF_LITTLE_ENDIAN)) { | |
210 | goto exit; | |
211 | } | |
212 | ||
213 | // struct hfsmount *hfsmp = (struct hfsmount *)arg; | |
214 | int retval; | |
215 | struct vnode *vp = bp->psVnode; | |
216 | BlockDescriptor block; | |
217 | ||
218 | /* Prepare the block pointer */ | |
219 | block.blockHeader = bp; | |
220 | block.buffer = bp->pvData; | |
221 | block.blockNum = bp->uBlockN; | |
222 | block.blockReadFromDisk = !(bp->uCacheFlags & GEN_BUF_LITTLE_ENDIAN); | |
223 | block.blockSize = bp->uDataSize; | |
224 | ||
225 | /* Swap the data now that this node is ready to go to disk. | |
226 | * We allow swapping of zeroed out nodes here because we might | |
227 | * be writing node whose last record just got deleted. | |
228 | */ | |
229 | retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, true); | |
230 | if (retval) | |
231 | { | |
232 | LFHFS_LOG(LEVEL_ERROR, "btree_swap_node: btree_swap_node: about to write corrupt node!\n"); | |
233 | hfs_assert(0); | |
234 | } | |
235 | exit: | |
236 | lf_hfs_generic_buf_unlock(bp); | |
237 | } | |
238 | ||
239 | ||
240 | static int | |
241 | btree_journal_modify_block_end(struct hfsmount *hfsmp, GenericLFBuf *bp) | |
242 | { | |
243 | return journal_modify_block_end(hfsmp->jnl, bp, btree_swap_node, hfsmp); | |
244 | } | |
245 | ||
246 | OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options) | |
247 | { | |
248 | ||
249 | OSStatus retval = E_NONE; | |
250 | GenericLFBufPtr bp = NULL; | |
251 | struct hfsmount *hfsmp = VTOHFS(vp); | |
252 | ||
253 | bp = (GenericLFBufPtr) blockPtr->blockHeader; | |
254 | ||
255 | if (bp == NULL) { | |
256 | retval = -1; | |
257 | goto exit; | |
258 | } | |
259 | ||
260 | if (options & kTrashBlock) { | |
261 | if (hfsmp->jnl && (bp->uCacheFlags & GEN_BUF_WRITE_LOCK)) | |
262 | { | |
263 | journal_kill_block(hfsmp->jnl, bp); | |
264 | } | |
265 | else | |
266 | { | |
267 | lf_hfs_generic_buf_invalidate(bp); | |
268 | } | |
269 | ||
270 | /* Don't let anyone else try to use this bp, it's been consumed */ | |
271 | blockPtr->blockHeader = NULL; | |
272 | ||
273 | } else { | |
274 | if (options & kForceWriteBlock) { | |
275 | ||
276 | if (hfsmp->jnl) | |
277 | { | |
278 | if (blockPtr->isModified == 0) { | |
279 | LFHFS_LOG(LEVEL_ERROR, "releaseblock: modified is 0 but forcewrite set! bp %p\n", bp); | |
280 | hfs_assert(0); | |
281 | } | |
282 | ||
283 | retval = btree_journal_modify_block_end(hfsmp, bp); | |
284 | blockPtr->isModified = 0; | |
285 | } | |
286 | else | |
287 | { | |
288 | btree_swap_node(bp, NULL); | |
289 | retval = lf_hfs_generic_buf_write(bp); | |
290 | lf_hfs_generic_buf_release(bp); | |
291 | } | |
292 | ||
293 | /* Don't let anyone else try to use this bp, it's been consumed */ | |
294 | blockPtr->blockHeader = NULL; | |
295 | ||
296 | } else if (options & kMarkBlockDirty) { | |
297 | struct timeval tv; | |
298 | microuptime(&tv); | |
299 | if ( (options & kLockTransaction) | |
300 | && hfsmp->jnl == NULL | |
301 | ) | |
302 | { | |
303 | } | |
304 | if (hfsmp->jnl) | |
305 | { | |
306 | if (blockPtr->isModified == 0) { | |
307 | LFHFS_LOG(LEVEL_ERROR, "releaseblock: modified is 0 but markdirty set! bp %p\n", bp); | |
308 | hfs_assert(0); | |
309 | } | |
310 | retval = btree_journal_modify_block_end(hfsmp, bp); | |
311 | blockPtr->isModified = 0; | |
312 | } | |
313 | else | |
314 | { | |
315 | btree_swap_node(bp, NULL); | |
316 | retval = lf_hfs_generic_buf_write(bp); | |
317 | lf_hfs_generic_buf_release(bp); | |
318 | ||
319 | if ( retval != 0) { | |
320 | blockPtr->blockHeader = NULL; | |
321 | goto exit; | |
322 | } | |
323 | } | |
324 | ||
325 | /* Don't let anyone else try to use this bp, it's been consumed */ | |
326 | blockPtr->blockHeader = NULL; | |
327 | ||
328 | } else { | |
329 | btree_swap_node(bp, NULL); | |
330 | ||
331 | // check if we had previously called journal_modify_block_start() | |
332 | // on this block and if so, abort it (which will call buf_brelse()). | |
333 | if (hfsmp->jnl && blockPtr->isModified) { | |
334 | // XXXdbg - I don't want to call modify_block_abort() | |
335 | // because I think it may be screwing up the | |
336 | // journal and blowing away a block that has | |
337 | // valid data in it. | |
338 | // | |
339 | // journal_modify_block_abort(hfsmp->jnl, bp); | |
340 | //panic("hfs: releaseblock called for 0x%x but mod_block_start previously called.\n", bp); | |
341 | btree_journal_modify_block_end(hfsmp, bp); | |
342 | blockPtr->isModified = 0; | |
343 | } | |
344 | else | |
345 | { | |
346 | lf_hfs_generic_buf_release(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */ | |
347 | } | |
348 | ||
349 | /* Don't let anyone else try to use this bp, it's been consumed */ | |
350 | blockPtr->blockHeader = NULL; | |
351 | } | |
352 | } | |
353 | ||
354 | exit: | |
355 | return (retval); | |
356 | } | |
357 | ||
358 | ||
359 | OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) | |
360 | { | |
361 | #pragma unused (maxEOF) | |
362 | ||
363 | OSStatus retval = 0, ret = 0; | |
364 | int64_t actualBytesAdded, origSize; | |
365 | u_int64_t bytesToAdd; | |
366 | u_int32_t startAllocation; | |
367 | u_int32_t fileblocks; | |
368 | BTreeInfoRec btInfo; | |
369 | ExtendedVCB *vcb; | |
370 | FCB *filePtr; | |
371 | int64_t trim = 0; | |
372 | int lockflags = 0; | |
373 | ||
374 | filePtr = GetFileControlBlock(vp); | |
375 | ||
376 | if ( (off_t)minEOF > filePtr->fcbEOF ) | |
377 | { | |
378 | bytesToAdd = minEOF - filePtr->fcbEOF; | |
379 | ||
380 | if (bytesToAdd < filePtr->ff_clumpsize) | |
381 | bytesToAdd = filePtr->ff_clumpsize; //XXX why not always be a mutiple of clump size? | |
382 | } | |
383 | else | |
384 | { | |
385 | return -1; | |
386 | } | |
387 | ||
388 | vcb = VTOVCB(vp); | |
389 | ||
390 | /* | |
391 | * The Extents B-tree can't have overflow extents. ExtendFileC will | |
392 | * return an error if an attempt is made to extend the Extents B-tree | |
393 | * when the resident extents are exhausted. | |
394 | */ | |
395 | ||
396 | /* Protect allocation bitmap and extents overflow file. */ | |
397 | lockflags = SFL_BITMAP; | |
398 | if (VTOC(vp)->c_fileid != kHFSExtentsFileID) | |
399 | lockflags |= SFL_EXTENTS; | |
400 | lockflags = hfs_systemfile_lock(vcb, lockflags, HFS_EXCLUSIVE_LOCK); | |
401 | ||
402 | (void) BTGetInformation(filePtr, 0, &btInfo); | |
403 | ||
404 | #if 0 // XXXdbg | |
405 | /* | |
406 | * The b-tree code expects nodes to be contiguous. So when | |
407 | * the allocation block size is less than the b-tree node | |
408 | * size, we need to force disk allocations to be contiguous. | |
409 | */ | |
410 | if (vcb->blockSize >= btInfo.nodeSize) { | |
411 | extendFlags = 0; | |
412 | } else { | |
413 | /* Ensure that all b-tree nodes are contiguous on disk */ | |
414 | extendFlags = kEFContigMask; | |
415 | } | |
416 | #endif | |
417 | ||
418 | origSize = filePtr->fcbEOF; | |
419 | fileblocks = filePtr->ff_blocks; | |
420 | startAllocation = vcb->nextAllocation; | |
421 | ||
422 | // loop trying to get a contiguous chunk that's an integer multiple | |
423 | // of the btree node size. if we can't get a contiguous chunk that | |
424 | // is at least the node size then we break out of the loop and let | |
425 | // the error propagate back up. | |
426 | while((off_t)bytesToAdd >= btInfo.nodeSize) { | |
427 | do { | |
428 | retval = ExtendFileC(vcb, filePtr, bytesToAdd, 0, | |
429 | kEFContigMask | kEFMetadataMask | kEFNoClumpMask, | |
430 | (int64_t *)&actualBytesAdded); | |
431 | if (retval == dskFulErr && actualBytesAdded == 0) { | |
432 | bytesToAdd >>= 1; | |
433 | if (bytesToAdd < btInfo.nodeSize) { | |
434 | break; | |
435 | } else if ((bytesToAdd % btInfo.nodeSize) != 0) { | |
436 | // make sure it's an integer multiple of the nodeSize | |
437 | bytesToAdd -= (bytesToAdd % btInfo.nodeSize); | |
438 | } | |
439 | } | |
440 | } while (retval == dskFulErr && actualBytesAdded == 0); | |
441 | ||
442 | if (retval == dskFulErr && actualBytesAdded == 0 && bytesToAdd <= btInfo.nodeSize) { | |
443 | break; | |
444 | } | |
445 | ||
446 | filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize; | |
447 | bytesToAdd = minEOF - filePtr->fcbEOF; | |
448 | } | |
449 | ||
450 | /* | |
451 | * If a new extent was added then move the roving allocator | |
452 | * reference forward by the current b-tree file size so | |
453 | * there's plenty of room to grow. | |
454 | */ | |
455 | if ((retval == 0) && | |
456 | ((VCBTOHFS(vcb)->hfs_flags & HFS_METADATA_ZONE) == 0) && | |
457 | (vcb->nextAllocation > startAllocation) && | |
458 | ((vcb->nextAllocation + fileblocks) < vcb->allocLimit)) { | |
459 | HFS_UPDATE_NEXT_ALLOCATION(vcb, vcb->nextAllocation + fileblocks); | |
460 | } | |
461 | ||
462 | filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize; | |
463 | ||
464 | // XXXdbg ExtendFileC() could have returned an error even though | |
465 | // it grew the file to be big enough for our needs. If this is | |
466 | // the case, we don't care about retval so we blow it away. | |
467 | // | |
468 | if (filePtr->fcbEOF >= (off_t)minEOF && retval != 0) { | |
469 | retval = 0; | |
470 | } | |
471 | ||
472 | // XXXdbg if the file grew but isn't large enough or isn't an | |
473 | // even multiple of the nodeSize then trim things back. if | |
474 | // the file isn't large enough we trim back to the original | |
475 | // size. otherwise we trim back to be an even multiple of the | |
476 | // btree node size. | |
477 | // | |
478 | if ((filePtr->fcbEOF < (off_t)minEOF) || ((filePtr->fcbEOF - origSize) % btInfo.nodeSize) != 0) { | |
479 | ||
480 | if (filePtr->fcbEOF < (off_t)minEOF) { | |
481 | retval = dskFulErr; | |
482 | ||
483 | if (filePtr->fcbEOF < origSize) { | |
484 | LFHFS_LOG(LEVEL_ERROR, "ExtendBTreeFile: btree file eof %lld less than orig size %lld!\n", | |
485 | filePtr->fcbEOF, origSize); | |
486 | hfs_assert(0); | |
487 | } | |
488 | ||
489 | trim = filePtr->fcbEOF - origSize; | |
490 | } else { | |
491 | trim = ((filePtr->fcbEOF - origSize) % btInfo.nodeSize); | |
492 | } | |
493 | ||
494 | ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0, 0, FTOC(filePtr)->c_fileid, 0); | |
495 | filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize; | |
496 | ||
497 | // XXXdbg - assert if the file didn't get trimmed back properly | |
498 | if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) { | |
499 | LFHFS_LOG(LEVEL_ERROR, "ExtendBTreeFile: truncate file didn't! fcbEOF %lld nsize %d fcb %p\n", | |
500 | filePtr->fcbEOF, btInfo.nodeSize, filePtr); | |
501 | hfs_assert(0); | |
502 | } | |
503 | ||
504 | if (ret) | |
505 | { | |
506 | LFHFS_LOG(LEVEL_ERROR, "ExtendBTreeFile: error truncating btree files (sz 0x%llx, trim %lld, ret %ld)\n", | |
507 | filePtr->fcbEOF, trim, (long)ret); | |
508 | goto out; | |
509 | } | |
510 | } | |
511 | ||
512 | if(VTOC(vp)->c_fileid != kHFSExtentsFileID) { | |
513 | /* | |
514 | * Get any extents overflow b-tree changes to disk ASAP! | |
515 | */ | |
516 | (void) BTFlushPath(VTOF(vcb->extentsRefNum)); | |
517 | (void) hfs_fsync(vcb->extentsRefNum, MNT_WAIT, 0); | |
518 | } | |
519 | hfs_systemfile_unlock(vcb, lockflags); | |
520 | lockflags = 0; | |
521 | ||
522 | if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) { | |
523 | LFHFS_LOG(LEVEL_ERROR, "extendbtree: fcb %p has eof 0x%llx not a multiple of 0x%x (trim %llx)\n", | |
524 | filePtr, filePtr->fcbEOF, btInfo.nodeSize, trim); | |
525 | hfs_assert(0); | |
526 | } | |
527 | ||
528 | /* | |
529 | * Update the Alternate MDB or Alternate VolumeHeader | |
530 | */ | |
531 | VTOC(vp)->c_flag |= C_MODIFIED; | |
532 | if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) || | |
533 | (VTOC(vp)->c_fileid == kHFSCatalogFileID) || | |
534 | (VTOC(vp)->c_fileid == kHFSAttributesFileID) | |
535 | ) { | |
536 | MarkVCBDirty( vcb ); | |
537 | (void) hfs_flushvolumeheader(VCBTOHFS(vcb), HFS_FVH_WRITE_ALT); | |
538 | } else { | |
539 | VTOC(vp)->c_touch_chgtime = TRUE; | |
540 | VTOC(vp)->c_touch_modtime = TRUE; | |
541 | ||
542 | (void) hfs_update(vp, 0); | |
543 | } | |
544 | ||
545 | ret = ClearBTNodes(vp, btInfo.nodeSize, origSize, (filePtr->fcbEOF - origSize)); | |
546 | out: | |
547 | if (retval == 0) | |
548 | retval = ret; | |
549 | ||
550 | if (lockflags) | |
551 | hfs_systemfile_unlock(vcb, lockflags); | |
552 | ||
553 | return retval; | |
554 | } | |
555 | ||
556 | ||
557 | /* | |
558 | * Clear out (zero) new b-tree nodes on disk. | |
559 | */ | |
560 | static int | |
561 | ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount) | |
562 | { | |
563 | GenericLFBufPtr bp = NULL; | |
564 | daddr64_t blk; | |
565 | daddr64_t blkcnt; | |
566 | ||
567 | blk = offset / blksize; | |
568 | blkcnt = amount / blksize; | |
569 | ||
570 | while (blkcnt > 0) { | |
571 | ||
572 | bp = lf_hfs_generic_buf_allocate(vp, blk, blksize, GEN_BUF_NON_CACHED); | |
573 | if (bp == NULL) | |
574 | continue; | |
575 | ||
576 | // XXXdbg -- skipping the journal since it makes a transaction | |
577 | // become *way* too large | |
578 | lf_hfs_generic_buf_write(bp); | |
579 | lf_hfs_generic_buf_release(bp); | |
580 | ||
581 | --blkcnt; | |
582 | ++blk; | |
583 | } | |
584 | ||
585 | return (0); | |
586 | } | |
587 | ||
588 | ||
589 | extern char hfs_attrname[]; | |
590 | ||
591 | /* | |
592 | * Create an HFS+ Attribute B-tree File. | |
593 | * | |
594 | * No global resources should be held. | |
595 | */ | |
596 | int | |
597 | hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t nodecnt) | |
598 | { | |
599 | struct vnode* vp = NULL; | |
600 | struct cat_desc cndesc; | |
601 | struct cat_attr cnattr; | |
602 | struct cat_fork cfork; | |
603 | BlockDescriptor blkdesc; | |
604 | BTNodeDescriptor *ndp; | |
605 | BTHeaderRec *bthp; | |
606 | BTreeControlBlockPtr btcb = NULL; | |
607 | GenericLFBufPtr bp = NULL; | |
608 | void * buffer; | |
609 | u_int8_t *bitmap; | |
610 | u_int16_t *index; | |
611 | u_int32_t node_num, num_map_nodes; | |
612 | u_int32_t bytes_per_map_record; | |
613 | u_int32_t temp; | |
614 | u_int16_t offset; | |
615 | int intrans = 0; | |
616 | int result; | |
617 | int newvnode_flags = 0; | |
618 | ||
619 | again: | |
620 | /* | |
621 | * Serialize creation using HFS_CREATING_BTREE flag. | |
622 | */ | |
623 | hfs_lock_mount (hfsmp); | |
624 | if (hfsmp->hfs_flags & HFS_CREATING_BTREE) { | |
625 | /* Someone else beat us, wait for them to finish. */ | |
626 | hfs_unlock_mount (hfsmp); | |
627 | usleep( 100 ); | |
628 | if (hfsmp->hfs_attribute_vp) { | |
629 | return (0); | |
630 | } | |
631 | goto again; | |
632 | } | |
633 | hfsmp->hfs_flags |= HFS_CREATING_BTREE; | |
634 | hfs_unlock_mount (hfsmp); | |
635 | ||
636 | /* Check if were out of usable disk space. */ | |
637 | if ((hfs_freeblks(hfsmp, 1) == 0)) { | |
638 | result = ENOSPC; | |
639 | goto exit; | |
640 | } | |
641 | ||
642 | /* | |
643 | * Set up Attribute B-tree vnode | |
644 | * (this must be done before we start a transaction | |
645 | * or take any system file locks) | |
646 | */ | |
647 | bzero(&cndesc, sizeof(cndesc)); | |
648 | cndesc.cd_parentcnid = kHFSRootParentID; | |
649 | cndesc.cd_flags |= CD_ISMETA; | |
650 | cndesc.cd_nameptr = (const u_int8_t *)hfs_attrname; | |
651 | cndesc.cd_namelen = strlen(hfs_attrname); | |
652 | cndesc.cd_cnid = kHFSAttributesFileID; | |
653 | ||
654 | bzero(&cnattr, sizeof(cnattr)); | |
655 | cnattr.ca_linkcount = 1; | |
656 | cnattr.ca_mode = S_IFREG; | |
657 | cnattr.ca_fileid = cndesc.cd_cnid; | |
658 | ||
659 | bzero(&cfork, sizeof(cfork)); | |
660 | cfork.cf_clump = nodesize * nodecnt; | |
661 | ||
662 | result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, | |
663 | &cfork, &vp, &newvnode_flags); | |
664 | if (result) { | |
665 | goto exit; | |
666 | } | |
667 | /* | |
668 | * Set up Attribute B-tree control block | |
669 | */ | |
670 | btcb = hfs_mallocz(sizeof(*btcb)); | |
671 | ||
672 | btcb->nodeSize = nodesize; | |
673 | btcb->maxKeyLength = kHFSPlusAttrKeyMaximumLength; | |
674 | btcb->btreeType = 0xFF; | |
675 | btcb->attributes = kBTVariableIndexKeysMask | kBTBigKeysMask; | |
676 | btcb->version = kBTreeVersion; | |
677 | btcb->writeCount = 1; | |
678 | btcb->flags = 0; /* kBTHeaderDirty */ | |
679 | btcb->fileRefNum = vp; | |
680 | btcb->getBlockProc = GetBTreeBlock; | |
681 | btcb->releaseBlockProc = ReleaseBTreeBlock; | |
682 | btcb->setEndOfForkProc = ExtendBTreeFile; | |
683 | btcb->keyCompareProc = (KeyCompareProcPtr)hfs_attrkeycompare; | |
684 | ||
685 | /* | |
686 | * NOTE: We must make sure to zero out this pointer if we error out in this function! | |
687 | * If we don't, then unmount will treat it as a valid pointer which can lead to a | |
688 | * use-after-free | |
689 | */ | |
690 | VTOF(vp)->fcbBTCBPtr = btcb; | |
691 | ||
692 | /* | |
693 | * Allocate some space | |
694 | */ | |
695 | if (hfs_start_transaction(hfsmp) != 0) { | |
696 | result = EINVAL; | |
697 | goto exit; | |
698 | } | |
699 | intrans = 1; | |
700 | ||
701 | /* Note ExtendBTreeFile will acquire the necessary system file locks. */ | |
702 | result = ExtendBTreeFile(vp, nodesize, cfork.cf_clump); | |
703 | if (result) | |
704 | goto exit; | |
705 | ||
706 | btcb->totalNodes = (u_int32_t)(VTOF(vp)->ff_size) / nodesize; | |
707 | ||
708 | /* | |
709 | * Figure out how many map nodes we'll need. | |
710 | * | |
711 | * bytes_per_map_record = the number of bytes in the map record of a | |
712 | * map node. Since that is the only record in the node, it is the size | |
713 | * of the node minus the node descriptor at the start, and two record | |
714 | * offsets at the end of the node. The "- 2" is to round the size down | |
715 | * to a multiple of 4 bytes (since sizeof(BTNodeDescriptor) is not a | |
716 | * multiple of 4). | |
717 | * | |
718 | * The value "temp" here is the number of *bits* in the map record of | |
719 | * the header node. | |
720 | */ | |
721 | bytes_per_map_record = nodesize - sizeof(BTNodeDescriptor) - 2*sizeof(u_int16_t) - 2; | |
722 | temp = 8 * (nodesize - sizeof(BTNodeDescriptor) | |
723 | - sizeof(BTHeaderRec) | |
724 | - kBTreeHeaderUserBytes | |
725 | - 4 * sizeof(u_int16_t)); | |
726 | if (btcb->totalNodes > temp) { | |
727 | num_map_nodes = howmany(btcb->totalNodes - temp, bytes_per_map_record * 8); | |
728 | } | |
729 | else { | |
730 | num_map_nodes = 0; | |
731 | } | |
732 | ||
733 | btcb->freeNodes = btcb->totalNodes - 1 - num_map_nodes; | |
734 | ||
735 | /* | |
736 | * Initialize the b-tree header on disk | |
737 | */ | |
738 | bp = lf_hfs_generic_buf_allocate(vp, 0, btcb->nodeSize, 0); | |
739 | if (bp == NULL) { | |
740 | result = EIO; | |
741 | goto exit; | |
742 | } | |
743 | ||
744 | buffer = bp->pvData; | |
745 | blkdesc.buffer = buffer; | |
746 | blkdesc.blockHeader = (void *)bp; | |
747 | blkdesc.blockReadFromDisk = 0; | |
748 | blkdesc.isModified = 0; | |
749 | ||
750 | ModifyBlockStart(vp, &blkdesc); | |
751 | ||
752 | if (bp->uDataSize != nodesize) | |
753 | { | |
754 | LFHFS_LOG(LEVEL_ERROR, "hfs_create_attr_btree: bad buffer size (%u)\n", bp->uDataSize); | |
755 | hfs_assert(0); | |
756 | } | |
757 | ||
758 | bzero(buffer, nodesize); | |
759 | index = (u_int16_t *)buffer; | |
760 | ||
761 | /* FILL IN THE NODE DESCRIPTOR: */ | |
762 | ndp = (BTNodeDescriptor *)buffer; | |
763 | if (num_map_nodes != 0) | |
764 | ndp->fLink = 1; | |
765 | ndp->kind = kBTHeaderNode; | |
766 | ndp->numRecords = 3; | |
767 | offset = sizeof(BTNodeDescriptor); | |
768 | index[(nodesize / 2) - 1] = offset; | |
769 | ||
770 | /* FILL IN THE HEADER RECORD: */ | |
771 | bthp = (BTHeaderRec *)((u_int8_t *)buffer + offset); | |
772 | bthp->nodeSize = nodesize; | |
773 | bthp->totalNodes = btcb->totalNodes; | |
774 | bthp->freeNodes = btcb->freeNodes; | |
775 | bthp->clumpSize = cfork.cf_clump; | |
776 | bthp->btreeType = 0xFF; | |
777 | bthp->attributes = kBTVariableIndexKeysMask | kBTBigKeysMask; | |
778 | bthp->maxKeyLength = kHFSPlusAttrKeyMaximumLength; | |
779 | bthp->keyCompareType = kHFSBinaryCompare; | |
780 | offset += sizeof(BTHeaderRec); | |
781 | index[(nodesize / 2) - 2] = offset; | |
782 | ||
783 | /* FILL IN THE USER RECORD: */ | |
784 | offset += kBTreeHeaderUserBytes; | |
785 | index[(nodesize / 2) - 3] = offset; | |
786 | ||
787 | /* Mark the header node and map nodes in use in the map record. | |
788 | * | |
789 | * NOTE: Assumes that the header node's map record has at least | |
790 | * (num_map_nodes + 1) bits. | |
791 | */ | |
792 | bitmap = (u_int8_t *) buffer + offset; | |
793 | temp = num_map_nodes + 1; /* +1 for the header node */ | |
794 | while (temp >= 8) { | |
795 | *(bitmap++) = 0xFF; | |
796 | temp -= 8; | |
797 | } | |
798 | *bitmap = ~(0xFF >> temp); | |
799 | ||
800 | offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec) | |
801 | - kBTreeHeaderUserBytes - (4 * sizeof(int16_t)); | |
802 | index[(nodesize / 2) - 4] = offset; | |
803 | ||
804 | if (hfsmp->jnl) | |
805 | { | |
806 | result = btree_journal_modify_block_end(hfsmp, bp); | |
807 | } | |
808 | else | |
809 | { | |
810 | result = lf_hfs_generic_buf_write(bp); | |
811 | lf_hfs_generic_buf_release(bp); | |
812 | } | |
813 | if (result) | |
814 | goto exit; | |
815 | ||
816 | /* Create the map nodes: node numbers 1 .. num_map_nodes */ | |
817 | for (node_num=1; node_num <= num_map_nodes; ++node_num) { | |
818 | bp = lf_hfs_generic_buf_allocate(vp, node_num, btcb->nodeSize, 0); | |
819 | if (bp == NULL) { | |
820 | result = EIO; | |
821 | goto exit; | |
822 | } | |
823 | buffer = (void *)bp->pvData; | |
824 | blkdesc.buffer = buffer; | |
825 | blkdesc.blockHeader = (void *)bp; | |
826 | blkdesc.blockReadFromDisk = 0; | |
827 | blkdesc.isModified = 0; | |
828 | ||
829 | ModifyBlockStart(vp, &blkdesc); | |
830 | ||
831 | bzero(buffer, nodesize); | |
832 | index = (u_int16_t *)buffer; | |
833 | ||
834 | /* Fill in the node descriptor */ | |
835 | ndp = (BTNodeDescriptor *)buffer; | |
836 | if (node_num != num_map_nodes) | |
837 | ndp->fLink = node_num + 1; | |
838 | ndp->kind = kBTMapNode; | |
839 | ndp->numRecords = 1; | |
840 | offset = sizeof(BTNodeDescriptor); | |
841 | index[(nodesize / 2) - 1] = offset; | |
842 | ||
843 | ||
844 | /* Fill in the map record's offset */ | |
845 | /* Note: We assume that the map record is all zeroes */ | |
846 | offset = sizeof(BTNodeDescriptor) + bytes_per_map_record; | |
847 | index[(nodesize / 2) - 2] = offset; | |
848 | ||
849 | if (hfsmp->jnl) | |
850 | { | |
851 | result = btree_journal_modify_block_end(hfsmp, bp); | |
852 | } | |
853 | else | |
854 | { | |
855 | result = lf_hfs_generic_buf_write(bp); | |
856 | lf_hfs_generic_buf_release(bp); | |
857 | } | |
858 | if (result) | |
859 | goto exit; | |
860 | } | |
861 | ||
862 | /* Update vp/cp for attribute btree */ | |
863 | hfs_lock_mount (hfsmp); | |
864 | hfsmp->hfs_attribute_cp = VTOC(vp); | |
865 | hfsmp->hfs_attribute_vp = vp; | |
866 | hfs_unlock_mount (hfsmp); | |
867 | ||
868 | (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WRITE_ALT); | |
869 | ||
870 | if (intrans) { | |
871 | hfs_end_transaction(hfsmp); | |
872 | intrans = 0; | |
873 | } | |
874 | ||
875 | /* Initialize the vnode for virtual attribute data file */ | |
876 | result = init_attrdata_vnode(hfsmp); | |
877 | if (result) { | |
878 | LFHFS_LOG(LEVEL_ERROR , "hfs_create_attr_btree: vol=%s init_attrdata_vnode() error=%d\n", hfsmp->vcbVN, result); | |
879 | } | |
880 | ||
881 | exit: | |
882 | ||
883 | if (vp && result) { | |
884 | /* | |
885 | * If we're about to error out, then make sure to zero out the B-Tree control block pointer | |
886 | * from the filefork of the EA B-Tree cnode/vnode. Failing to do this will lead to a use | |
887 | * after free at unmount or BTFlushPath. Since we're about to error out anyway, this memory | |
888 | * will be freed. | |
889 | */ | |
890 | VTOF(vp)->fcbBTCBPtr = NULL; | |
891 | } | |
892 | ||
893 | ||
894 | if (vp) { | |
895 | hfs_unlock(VTOC(vp)); | |
896 | } | |
897 | if (result) { | |
898 | hfs_free(btcb); | |
899 | if (vp) { | |
900 | hfs_vnop_reclaim(vp); | |
901 | } | |
902 | /* XXX need to give back blocks ? */ | |
903 | } | |
904 | if (intrans) { | |
905 | hfs_end_transaction(hfsmp); | |
906 | } | |
907 | ||
908 | /* | |
909 | * All done, clear HFS_CREATING_BTREE, and wake up any sleepers. | |
910 | */ | |
911 | hfs_lock_mount (hfsmp); | |
912 | hfsmp->hfs_flags &= ~HFS_CREATING_BTREE; | |
913 | hfs_unlock_mount (hfsmp); | |
914 | ||
915 | return (result); | |
916 | } |