]> git.saurik.com Git - apple/hfs.git/blob - core/hfs_btreeio.c
hfs-407.30.1.tar.gz
[apple/hfs.git] / core / hfs_btreeio.c
1 /*
2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/buf.h>
32 #include <sys/kernel.h>
33 #include <sys/malloc.h>
34 #include <sys/mount.h>
35 #include <sys/vnode.h>
36
37
38 #include "hfs.h"
39 #include "hfs_cnode.h"
40 #include "hfs_dbg.h"
41 #include "hfs_endian.h"
42 #include "hfs_btreeio.h"
43
44 #include "FileMgrInternal.h"
45 #include "BTreesPrivate.h"
46
47 /* From bsd/vfs/vfs_bio.c */
48 extern int bdwrite_internal(struct buf *, int);
49
50 static int ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount);
51 static int btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp);
52
53 void btree_swap_node(struct buf *bp, __unused void *arg);
54
55 /*
56 * Return btree node size for given vnode.
57 *
58 * Returns:
59 * For btree vnode, returns btree node size.
60 * For non-btree vnodes, returns 0.
61 */
62 u_int16_t get_btree_nodesize(struct vnode *vp)
63 {
64 BTreeControlBlockPtr btree;
65 u_int16_t node_size = 0;
66
67 if (vnode_issystem(vp)) {
68 btree = (BTreeControlBlockPtr) VTOF(vp)->fcbBTCBPtr;
69 if (btree) {
70 node_size = btree->nodeSize;
71 }
72 }
73
74 return node_size;
75 }
76
77 OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, __unused ItemCount minBlockCount)
78 {
79 BTreeControlBlockPtr bTreePtr;
80
81 hfs_assert(vp != NULL);
82 hfs_assert(blockSize >= kMinNodeSize);
83 if (blockSize > MAXBSIZE )
84 return (fsBTBadNodeSize);
85
86 bTreePtr = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr;
87 bTreePtr->nodeSize = blockSize;
88
89 return (E_NONE);
90 }
91
92
93 OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions options, BlockDescriptor *block)
94 {
95 OSStatus retval = E_NONE;
96 struct buf *bp = NULL;
97 u_int8_t allow_empty_node;
98
99 /* If the btree block is being read using hint, it is
100 * fine for the swap code to find zeroed out nodes.
101 */
102 if (options & kGetBlockHint) {
103 allow_empty_node = true;
104 } else {
105 allow_empty_node = false;
106 }
107
108 if (options & kGetEmptyBlock) {
109 daddr64_t blkno;
110 off_t offset;
111
112 offset = (daddr64_t)blockNum * (daddr64_t)block->blockSize;
113 bp = buf_getblk(vp, (daddr64_t)blockNum, block->blockSize, 0, 0, BLK_META);
114 if (bp && !hfs_vnop_blockmap(&(struct vnop_blockmap_args){
115 .a_vp = vp,
116 .a_foffset = offset,
117 .a_size = block->blockSize,
118 .a_bpn = &blkno
119 })) {
120 buf_setblkno(bp, blkno);
121 }
122 } else {
123 retval = buf_meta_bread(vp, (daddr64_t)blockNum, block->blockSize, NOCRED, &bp);
124 }
125 if (bp == NULL)
126 retval = -1; //XXX need better error
127
128 if (retval == E_NONE) {
129 block->blockHeader = bp;
130 block->buffer = (char *)buf_dataptr(bp);
131 block->blockNum = buf_lblkno(bp);
132 block->blockReadFromDisk = (buf_fromcache(bp) == 0); /* not found in cache ==> came from disk */
133
134 // XXXdbg
135 block->isModified = 0;
136
137 /* Check and endian swap B-Tree node (only if it's a valid block) */
138 if (!(options & kGetEmptyBlock)) {
139
140 /* This happens when we first open the b-tree, we might not have all the node data on hand */
141 if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) &&
142 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) &&
143 (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) {
144
145 /*
146 * Don't swap the node descriptor, record offsets, or other records.
147 * This record will be invalidated and re-read with the correct node
148 * size once the B-tree control block is set up with the node size
149 * from the header record.
150 */
151 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly, allow_empty_node);
152
153 } else {
154 /*
155 * In this case, we have enough data in-hand to do basic validation
156 * on the B-Tree node.
157 */
158 if (block->blockReadFromDisk) {
159 /*
160 * The node was just read from disk, so always swap/check it.
161 * This is necessary on big endian since the test below won't trigger.
162 */
163 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node);
164 }
165 else {
166 /*
167 * Block wasn't read from disk; it was found in the cache.
168 */
169 if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) {
170 /*
171 * The node was left in the cache in non-native order, so swap it.
172 * This only happens on little endian, after the node is written
173 * back to disk.
174 */
175 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node);
176 }
177 else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x000e) {
178 /*
179 * The node was in-cache in native-endianness. We don't need to do
180 * anything here, because the node is ready to use. Set retval == 0.
181 */
182 retval = 0;
183 }
184 /*
185 * If the node doesn't have hex 14 (0xe) in the last two bytes of the buffer,
186 * it doesn't necessarily mean that this is a bad node. Zeroed nodes that are
187 * marked as unused in the b-tree map node would be OK and not have valid content.
188 */
189 }
190 }
191
192 /*
193 * If we got an error, then the node is only partially swapped.
194 * We mark the buffer invalid so that the next attempt to get the
195 * node will read it and attempt to swap again, and will notice
196 * the error again. If we didn't do this, the next attempt to get
197 * the node might use the partially swapped node as-is.
198 */
199 if (retval)
200 buf_markinvalid(bp);
201 }
202 }
203
204 if (retval) {
205 if (bp)
206 buf_brelse(bp);
207 block->blockHeader = NULL;
208 block->buffer = NULL;
209 }
210
211 return (retval);
212 }
213
214
215 void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr)
216 {
217 struct hfsmount *hfsmp = VTOHFS(vp);
218 struct buf *bp = NULL;
219
220 if (hfsmp->jnl == NULL) {
221 return;
222 }
223
224 bp = (struct buf *) blockPtr->blockHeader;
225 if (bp == NULL) {
226 panic("hfs: ModifyBlockStart: null bp for blockdescptr %p?!?\n", blockPtr);
227 return;
228 }
229
230 journal_modify_block_start(hfsmp->jnl, bp);
231 blockPtr->isModified = 1;
232 }
233
234 void
235 btree_swap_node(struct buf *bp, __unused void *arg)
236 {
237 // struct hfsmount *hfsmp = (struct hfsmount *)arg;
238 int retval;
239 struct vnode *vp = buf_vnode(bp);
240 BlockDescriptor block;
241
242 /* Prepare the block pointer */
243 block.blockHeader = bp;
244 block.buffer = (char *)buf_dataptr(bp);
245 block.blockNum = buf_lblkno(bp);
246 /* not found in cache ==> came from disk */
247 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
248 block.blockSize = buf_count(bp);
249
250 /* Swap the data now that this node is ready to go to disk.
251 * We allow swapping of zeroed out nodes here because we might
252 * be writing node whose last record just got deleted.
253 */
254 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, true);
255 if (retval)
256 panic("hfs: btree_swap_node: about to write corrupt node!\n");
257 }
258
259
260 static int
261 btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp)
262 {
263 return journal_modify_block_end(hfsmp->jnl, bp, btree_swap_node, hfsmp);
264 }
265
266
267 OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options)
268 {
269 struct hfsmount *hfsmp = VTOHFS(vp);
270 OSStatus retval = E_NONE;
271 struct buf *bp = NULL;
272
273 bp = (struct buf *) blockPtr->blockHeader;
274
275 if (bp == NULL) {
276 retval = -1;
277 goto exit;
278 }
279
280 if (options & kTrashBlock) {
281 buf_markinvalid(bp);
282
283 if (hfsmp->jnl && (buf_flags(bp) & B_LOCKED)) {
284 journal_kill_block(hfsmp->jnl, bp);
285 } else {
286 buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
287 }
288
289 /* Don't let anyone else try to use this bp, it's been consumed */
290 blockPtr->blockHeader = NULL;
291
292 } else {
293 if (options & kForceWriteBlock) {
294 if (hfsmp->jnl) {
295 if (blockPtr->isModified == 0) {
296 panic("hfs: releaseblock: modified is 0 but forcewrite set! bp %p\n", bp);
297 }
298
299 retval = btree_journal_modify_block_end(hfsmp, bp);
300 blockPtr->isModified = 0;
301 } else {
302 retval = VNOP_BWRITE(bp);
303 }
304
305 /* Don't let anyone else try to use this bp, it's been consumed */
306 blockPtr->blockHeader = NULL;
307
308 } else if (options & kMarkBlockDirty) {
309 struct timeval tv;
310 microuptime(&tv);
311 if ((options & kLockTransaction) && hfsmp->jnl == NULL) {
312 /*
313 *
314 * Set the B_LOCKED flag and unlock the buffer, causing buf_brelse to move
315 * the buffer onto the LOCKED free list. This is necessary, otherwise
316 * getnewbuf() would try to reclaim the buffers using buf_bawrite, which
317 * isn't going to work.
318 *
319 */
320 /* Don't hog all the buffers... */
321 if (count_lock_queue() > kMaxLockedMetaBuffers) {
322 hfs_btsync(vp, HFS_SYNCTRANS);
323 /* Rollback sync time to cause a sync on lock release... */
324 (void) BTSetLastSync(VTOF(vp), tv.tv_sec - (kMaxSecsForFsync + 1));
325 }
326 buf_setflags(bp, B_LOCKED);
327 }
328
329 /*
330 * Delay-write this block.
331 * If the maximum delayed buffers has been exceeded then
332 * free up some buffers and fall back to an asynchronous write.
333 */
334 if (hfsmp->jnl) {
335 if (blockPtr->isModified == 0) {
336 panic("hfs: releaseblock: modified is 0 but markdirty set! bp %p\n", bp);
337 }
338 retval = btree_journal_modify_block_end(hfsmp, bp);
339 blockPtr->isModified = 0;
340 } else if (bdwrite_internal(bp, 1) != 0) {
341 hfs_btsync(vp, 0);
342 /* Rollback sync time to cause a sync on lock release... */
343 (void) BTSetLastSync(VTOF(vp), tv.tv_sec - (kMaxSecsForFsync + 1));
344
345 buf_clearflags(bp, B_LOCKED);
346 buf_bawrite(bp);
347 }
348
349 /* Don't let anyone else try to use this bp, it's been consumed */
350 blockPtr->blockHeader = NULL;
351
352 } else {
353 // check if we had previously called journal_modify_block_start()
354 // on this block and if so, abort it (which will call buf_brelse()).
355 if (hfsmp->jnl && blockPtr->isModified) {
356 // XXXdbg - I don't want to call modify_block_abort()
357 // because I think it may be screwing up the
358 // journal and blowing away a block that has
359 // valid data in it.
360 //
361 // journal_modify_block_abort(hfsmp->jnl, bp);
362 //panic("hfs: releaseblock called for 0x%x but mod_block_start previously called.\n", bp);
363 btree_journal_modify_block_end(hfsmp, bp);
364 blockPtr->isModified = 0;
365 } else {
366 buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
367 }
368
369 /* Don't let anyone else try to use this bp, it's been consumed */
370 blockPtr->blockHeader = NULL;
371 }
372 }
373
374 exit:
375 return (retval);
376 }
377
378
379 OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF)
380 {
381 #pragma unused (maxEOF)
382
383 OSStatus retval = 0, ret = 0;
384 int64_t actualBytesAdded, origSize;
385 u_int64_t bytesToAdd;
386 u_int32_t startAllocation;
387 u_int32_t fileblocks;
388 BTreeInfoRec btInfo;
389 ExtendedVCB *vcb;
390 FCB *filePtr;
391 struct proc *p = NULL;
392 int64_t trim = 0;
393 int lockflags = 0;
394
395 filePtr = GetFileControlBlock(vp);
396
397 if ( (off_t)minEOF > filePtr->fcbEOF )
398 {
399 bytesToAdd = minEOF - filePtr->fcbEOF;
400
401 if (bytesToAdd < filePtr->ff_clumpsize)
402 bytesToAdd = filePtr->ff_clumpsize; //XXX why not always be a mutiple of clump size?
403 }
404 else
405 {
406 return -1;
407 }
408
409 vcb = VTOVCB(vp);
410
411 /*
412 * The Extents B-tree can't have overflow extents. ExtendFileC will
413 * return an error if an attempt is made to extend the Extents B-tree
414 * when the resident extents are exhausted.
415 */
416
417 /* Protect allocation bitmap and extents overflow file. */
418 lockflags = SFL_BITMAP;
419 if (VTOC(vp)->c_fileid != kHFSExtentsFileID)
420 lockflags |= SFL_EXTENTS;
421 lockflags = hfs_systemfile_lock(vcb, lockflags, HFS_EXCLUSIVE_LOCK);
422
423 (void) BTGetInformation(filePtr, 0, &btInfo);
424
425 #if 0 // XXXdbg
426 /*
427 * The b-tree code expects nodes to be contiguous. So when
428 * the allocation block size is less than the b-tree node
429 * size, we need to force disk allocations to be contiguous.
430 */
431 if (vcb->blockSize >= btInfo.nodeSize) {
432 extendFlags = 0;
433 } else {
434 /* Ensure that all b-tree nodes are contiguous on disk */
435 extendFlags = kEFContigMask;
436 }
437 #endif
438
439 origSize = filePtr->fcbEOF;
440 fileblocks = filePtr->ff_blocks;
441 startAllocation = vcb->nextAllocation;
442
443 // loop trying to get a contiguous chunk that's an integer multiple
444 // of the btree node size. if we can't get a contiguous chunk that
445 // is at least the node size then we break out of the loop and let
446 // the error propagate back up.
447 while((off_t)bytesToAdd >= btInfo.nodeSize) {
448 do {
449 retval = ExtendFileC(vcb, filePtr, bytesToAdd, 0,
450 kEFContigMask | kEFMetadataMask | kEFNoClumpMask,
451 (int64_t *)&actualBytesAdded);
452 if (retval == dskFulErr && actualBytesAdded == 0) {
453 bytesToAdd >>= 1;
454 if (bytesToAdd < btInfo.nodeSize) {
455 break;
456 } else if ((bytesToAdd % btInfo.nodeSize) != 0) {
457 // make sure it's an integer multiple of the nodeSize
458 bytesToAdd -= (bytesToAdd % btInfo.nodeSize);
459 }
460 }
461 } while (retval == dskFulErr && actualBytesAdded == 0);
462
463 if (retval == dskFulErr && actualBytesAdded == 0 && bytesToAdd <= btInfo.nodeSize) {
464 break;
465 }
466
467 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
468 bytesToAdd = minEOF - filePtr->fcbEOF;
469 }
470
471 /*
472 * If a new extent was added then move the roving allocator
473 * reference forward by the current b-tree file size so
474 * there's plenty of room to grow.
475 */
476 if ((retval == 0) &&
477 ((VCBTOHFS(vcb)->hfs_flags & HFS_METADATA_ZONE) == 0) &&
478 (vcb->nextAllocation > startAllocation) &&
479 ((vcb->nextAllocation + fileblocks) < vcb->allocLimit)) {
480 HFS_UPDATE_NEXT_ALLOCATION(vcb, vcb->nextAllocation + fileblocks);
481 }
482
483 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
484
485 // XXXdbg ExtendFileC() could have returned an error even though
486 // it grew the file to be big enough for our needs. If this is
487 // the case, we don't care about retval so we blow it away.
488 //
489 if (filePtr->fcbEOF >= (off_t)minEOF && retval != 0) {
490 retval = 0;
491 }
492
493 // XXXdbg if the file grew but isn't large enough or isn't an
494 // even multiple of the nodeSize then trim things back. if
495 // the file isn't large enough we trim back to the original
496 // size. otherwise we trim back to be an even multiple of the
497 // btree node size.
498 //
499 if ((filePtr->fcbEOF < (off_t)minEOF) || ((filePtr->fcbEOF - origSize) % btInfo.nodeSize) != 0) {
500
501 if (filePtr->fcbEOF < (off_t)minEOF) {
502 retval = dskFulErr;
503
504 if (filePtr->fcbEOF < origSize) {
505 panic("hfs: btree file eof %lld less than orig size %lld!\n",
506 filePtr->fcbEOF, origSize);
507 }
508
509 trim = filePtr->fcbEOF - origSize;
510 } else {
511 trim = ((filePtr->fcbEOF - origSize) % btInfo.nodeSize);
512 }
513
514 ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0, 0, FTOC(filePtr)->c_fileid, 0);
515 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
516
517 // XXXdbg - panic if the file didn't get trimmed back properly
518 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
519 panic("hfs: truncate file didn't! fcbEOF %lld nsize %d fcb %p\n",
520 filePtr->fcbEOF, btInfo.nodeSize, filePtr);
521 }
522
523 if (ret) {
524 // XXXdbg - this probably doesn't need to be a panic()
525 panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %ld)\n",
526 filePtr->fcbEOF, trim, (long)ret);
527 goto out;
528 }
529 }
530
531 if(VTOC(vp)->c_fileid != kHFSExtentsFileID) {
532 /*
533 * Get any extents overflow b-tree changes to disk ASAP!
534 */
535 (void) BTFlushPath(VTOF(vcb->extentsRefNum));
536 (void) hfs_fsync(vcb->extentsRefNum, MNT_WAIT, 0, p);
537 }
538 hfs_systemfile_unlock(vcb, lockflags);
539 lockflags = 0;
540
541 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
542 panic("hfs: extendbtree: fcb %p has eof 0x%llx not a multiple of 0x%x (trim %llx)\n",
543 filePtr, filePtr->fcbEOF, btInfo.nodeSize, trim);
544 }
545
546 /*
547 * Update the Alternate MDB or Alternate VolumeHeader
548 */
549 VTOC(vp)->c_flag |= C_MODIFIED;
550 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
551 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
552 (VTOC(vp)->c_fileid == kHFSAttributesFileID)
553 ) {
554 MarkVCBDirty( vcb );
555 ret = hfs_flushvolumeheader(VCBTOHFS(vcb), HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
556 } else {
557 VTOC(vp)->c_touch_chgtime = TRUE;
558 VTOC(vp)->c_touch_modtime = TRUE;
559 (void) hfs_update(vp, 0);
560 }
561
562 ret = ClearBTNodes(vp, btInfo.nodeSize, origSize, (filePtr->fcbEOF - origSize));
563 out:
564 if (retval == 0)
565 retval = ret;
566
567 if (lockflags)
568 hfs_systemfile_unlock(vcb, lockflags);
569
570 return retval;
571 }
572
573
574 /*
575 * Clear out (zero) new b-tree nodes on disk.
576 */
577 static int
578 ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount)
579 {
580 struct hfsmount *hfsmp = VTOHFS(vp);
581 struct buf *bp = NULL;
582 daddr64_t blk;
583 daddr64_t blkcnt;
584
585 blk = offset / blksize;
586 blkcnt = amount / blksize;
587
588 while (blkcnt > 0) {
589 bp = buf_getblk(vp, blk, blksize, 0, 0, BLK_META);
590 if (bp == NULL)
591 continue;
592
593 // XXXdbg
594 if (hfsmp->jnl) {
595 // XXXdbg -- skipping this for now since it makes a transaction
596 // become *way* too large
597 //journal_modify_block_start(hfsmp->jnl, bp);
598 }
599 bzero((char *)buf_dataptr(bp), blksize);
600
601 buf_markaged(bp);
602
603 // XXXdbg
604 if (hfsmp->jnl) {
605 // XXXdbg -- skipping this for now since it makes a transaction
606 // become *way* too large
607 //journal_modify_block_end(hfsmp->jnl, bp);
608
609 // XXXdbg - remove this once we decide what to do with the
610 // writes to the journal
611 if ((blk % 32) == 0)
612 VNOP_BWRITE(bp);
613 else
614 buf_bawrite(bp);
615 } else {
616 /* wait/yield every 32 blocks so we don't hog all the buffers */
617 if ((blk % 32) == 0)
618 VNOP_BWRITE(bp);
619 else
620 buf_bawrite(bp);
621 }
622 --blkcnt;
623 ++blk;
624 }
625
626 return (0);
627 }
628
629
630 extern char hfs_attrname[];
631
632 /*
633 * Create an HFS+ Attribute B-tree File.
634 *
635 * No global resources should be held.
636 */
637 int
638 hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t nodecnt)
639 {
640 struct vnode* vp = NULLVP;
641 struct cat_desc cndesc;
642 struct cat_attr cnattr;
643 struct cat_fork cfork;
644 BlockDescriptor blkdesc;
645 BTNodeDescriptor *ndp;
646 BTHeaderRec *bthp;
647 BTreeControlBlockPtr btcb = NULL;
648 struct buf *bp = NULL;
649 void * buffer;
650 u_int8_t *bitmap;
651 u_int16_t *index;
652 u_int32_t node_num, num_map_nodes;
653 u_int32_t bytes_per_map_record;
654 u_int32_t temp;
655 u_int16_t offset;
656 int intrans = 0;
657 int result;
658 int newvnode_flags = 0;
659
660 again:
661 /*
662 * Serialize creation using HFS_CREATING_BTREE flag.
663 */
664 hfs_lock_mount (hfsmp);
665 if (hfsmp->hfs_flags & HFS_CREATING_BTREE) {
666 /* Someone else beat us, wait for them to finish. */
667 (void) msleep(&hfsmp->hfs_attribute_cp, &hfsmp->hfs_mutex,
668 PDROP | PINOD, "hfs_create_attr_btree", 0);
669 if (hfsmp->hfs_attribute_vp) {
670 return (0);
671 }
672 goto again;
673 }
674 hfsmp->hfs_flags |= HFS_CREATING_BTREE;
675 hfs_unlock_mount (hfsmp);
676
677 /* Check if were out of usable disk space. */
678 if ((hfs_freeblks(hfsmp, 1) == 0)) {
679 result = ENOSPC;
680 goto exit;
681 }
682
683 /*
684 * Set up Attribute B-tree vnode
685 * (this must be done before we start a transaction
686 * or take any system file locks)
687 */
688 bzero(&cndesc, sizeof(cndesc));
689 cndesc.cd_parentcnid = kHFSRootParentID;
690 cndesc.cd_flags |= CD_ISMETA;
691 cndesc.cd_nameptr = (const u_int8_t *)hfs_attrname;
692 cndesc.cd_namelen = strlen(hfs_attrname);
693 cndesc.cd_cnid = kHFSAttributesFileID;
694
695 bzero(&cnattr, sizeof(cnattr));
696 cnattr.ca_linkcount = 1;
697 cnattr.ca_mode = S_IFREG;
698 cnattr.ca_fileid = cndesc.cd_cnid;
699
700 bzero(&cfork, sizeof(cfork));
701 cfork.cf_clump = nodesize * nodecnt;
702
703 result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
704 &cfork, &vp, &newvnode_flags);
705 if (result) {
706 goto exit;
707 }
708 /*
709 * Set up Attribute B-tree control block
710 */
711 btcb = hfs_mallocz(sizeof(*btcb));
712
713 btcb->nodeSize = nodesize;
714 btcb->maxKeyLength = kHFSPlusAttrKeyMaximumLength;
715 btcb->btreeType = 0xFF;
716 btcb->attributes = kBTVariableIndexKeysMask | kBTBigKeysMask;
717 btcb->version = kBTreeVersion;
718 btcb->writeCount = 1;
719 btcb->flags = 0; /* kBTHeaderDirty */
720 btcb->fileRefNum = vp;
721 btcb->getBlockProc = GetBTreeBlock;
722 btcb->releaseBlockProc = ReleaseBTreeBlock;
723 btcb->setEndOfForkProc = ExtendBTreeFile;
724 btcb->keyCompareProc = (KeyCompareProcPtr)hfs_attrkeycompare;
725
726 /*
727 * NOTE: We must make sure to zero out this pointer if we error out in this function!
728 * If we don't, then unmount will treat it as a valid pointer which can lead to a
729 * use-after-free
730 */
731 VTOF(vp)->fcbBTCBPtr = btcb;
732
733 /*
734 * Allocate some space
735 */
736 if (hfs_start_transaction(hfsmp) != 0) {
737 result = EINVAL;
738 goto exit;
739 }
740 intrans = 1;
741
742 /* Note ExtendBTreeFile will acquire the necessary system file locks. */
743 result = ExtendBTreeFile(vp, nodesize, cfork.cf_clump);
744 if (result)
745 goto exit;
746
747 btcb->totalNodes = VTOF(vp)->ff_size / nodesize;
748
749 /*
750 * Figure out how many map nodes we'll need.
751 *
752 * bytes_per_map_record = the number of bytes in the map record of a
753 * map node. Since that is the only record in the node, it is the size
754 * of the node minus the node descriptor at the start, and two record
755 * offsets at the end of the node. The "- 2" is to round the size down
756 * to a multiple of 4 bytes (since sizeof(BTNodeDescriptor) is not a
757 * multiple of 4).
758 *
759 * The value "temp" here is the number of *bits* in the map record of
760 * the header node.
761 */
762 bytes_per_map_record = nodesize - sizeof(BTNodeDescriptor) - 2*sizeof(u_int16_t) - 2;
763 temp = 8 * (nodesize - sizeof(BTNodeDescriptor)
764 - sizeof(BTHeaderRec)
765 - kBTreeHeaderUserBytes
766 - 4 * sizeof(u_int16_t));
767 if (btcb->totalNodes > temp) {
768 num_map_nodes = howmany(btcb->totalNodes - temp, bytes_per_map_record * 8);
769 }
770 else {
771 num_map_nodes = 0;
772 }
773
774 btcb->freeNodes = btcb->totalNodes - 1 - num_map_nodes;
775
776 /*
777 * Initialize the b-tree header on disk
778 */
779 bp = buf_getblk(vp, 0, nodesize, 0, 0, BLK_META);
780 if (bp == NULL) {
781 result = EIO;
782 goto exit;
783 }
784
785 buffer = (void *)buf_dataptr(bp);
786 blkdesc.buffer = buffer;
787 blkdesc.blockHeader = (void *)bp;
788 blkdesc.blockReadFromDisk = 0;
789 blkdesc.isModified = 0;
790
791 ModifyBlockStart(vp, &blkdesc);
792
793 if (buf_size(bp) != nodesize)
794 panic("hfs_create_attr_btree: bad buffer size (%d)\n", buf_size(bp));
795
796 bzero(buffer, nodesize);
797 index = (u_int16_t *)buffer;
798
799 /* FILL IN THE NODE DESCRIPTOR: */
800 ndp = (BTNodeDescriptor *)buffer;
801 if (num_map_nodes != 0)
802 ndp->fLink = 1;
803 ndp->kind = kBTHeaderNode;
804 ndp->numRecords = 3;
805 offset = sizeof(BTNodeDescriptor);
806 index[(nodesize / 2) - 1] = offset;
807
808 /* FILL IN THE HEADER RECORD: */
809 bthp = (BTHeaderRec *)((u_int8_t *)buffer + offset);
810 bthp->nodeSize = nodesize;
811 bthp->totalNodes = btcb->totalNodes;
812 bthp->freeNodes = btcb->freeNodes;
813 bthp->clumpSize = cfork.cf_clump;
814 bthp->btreeType = 0xFF;
815 bthp->attributes = kBTVariableIndexKeysMask | kBTBigKeysMask;
816 bthp->maxKeyLength = kHFSPlusAttrKeyMaximumLength;
817 bthp->keyCompareType = kHFSBinaryCompare;
818 offset += sizeof(BTHeaderRec);
819 index[(nodesize / 2) - 2] = offset;
820
821 /* FILL IN THE USER RECORD: */
822 offset += kBTreeHeaderUserBytes;
823 index[(nodesize / 2) - 3] = offset;
824
825 /* Mark the header node and map nodes in use in the map record.
826 *
827 * NOTE: Assumes that the header node's map record has at least
828 * (num_map_nodes + 1) bits.
829 */
830 bitmap = (u_int8_t *) buffer + offset;
831 temp = num_map_nodes + 1; /* +1 for the header node */
832 while (temp >= 8) {
833 *(bitmap++) = 0xFF;
834 temp -= 8;
835 }
836 *bitmap = ~(0xFF >> temp);
837
838 offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec)
839 - kBTreeHeaderUserBytes - (4 * sizeof(int16_t));
840 index[(nodesize / 2) - 4] = offset;
841
842 if (hfsmp->jnl) {
843 result = btree_journal_modify_block_end(hfsmp, bp);
844 } else {
845 result = VNOP_BWRITE(bp);
846 }
847 if (result)
848 goto exit;
849
850 /* Create the map nodes: node numbers 1 .. num_map_nodes */
851 for (node_num=1; node_num <= num_map_nodes; ++node_num) {
852 bp = buf_getblk(vp, node_num, nodesize, 0, 0, BLK_META);
853 if (bp == NULL) {
854 result = EIO;
855 goto exit;
856 }
857 buffer = (void *)buf_dataptr(bp);
858 blkdesc.buffer = buffer;
859 blkdesc.blockHeader = (void *)bp;
860 blkdesc.blockReadFromDisk = 0;
861 blkdesc.isModified = 0;
862
863 ModifyBlockStart(vp, &blkdesc);
864
865 bzero(buffer, nodesize);
866 index = (u_int16_t *)buffer;
867
868 /* Fill in the node descriptor */
869 ndp = (BTNodeDescriptor *)buffer;
870 if (node_num != num_map_nodes)
871 ndp->fLink = node_num + 1;
872 ndp->kind = kBTMapNode;
873 ndp->numRecords = 1;
874 offset = sizeof(BTNodeDescriptor);
875 index[(nodesize / 2) - 1] = offset;
876
877
878 /* Fill in the map record's offset */
879 /* Note: We assume that the map record is all zeroes */
880 offset = sizeof(BTNodeDescriptor) + bytes_per_map_record;
881 index[(nodesize / 2) - 2] = offset;
882
883 if (hfsmp->jnl) {
884 result = btree_journal_modify_block_end(hfsmp, bp);
885 } else {
886 result = VNOP_BWRITE(bp);
887 }
888 if (result)
889 goto exit;
890 }
891
892 /* Update vp/cp for attribute btree */
893 hfs_lock_mount (hfsmp);
894 hfsmp->hfs_attribute_cp = VTOC(vp);
895 hfsmp->hfs_attribute_vp = vp;
896 hfs_unlock_mount (hfsmp);
897
898 (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
899
900 if (intrans) {
901 hfs_end_transaction(hfsmp);
902 intrans = 0;
903 }
904
905 /* Initialize the vnode for virtual attribute data file */
906 result = init_attrdata_vnode(hfsmp);
907 if (result) {
908 printf("hfs_create_attr_btree: vol=%s init_attrdata_vnode() error=%d\n", hfsmp->vcbVN, result);
909 }
910
911 exit:
912
913 if (vp && result) {
914 /*
915 * If we're about to error out, then make sure to zero out the B-Tree control block pointer
916 * from the filefork of the EA B-Tree cnode/vnode. Failing to do this will lead to a use
917 * after free at unmount or BTFlushPath. Since we're about to error out anyway, this memory
918 * will be freed.
919 */
920 VTOF(vp)->fcbBTCBPtr = NULL;
921 }
922
923
924 if (vp) {
925 hfs_unlock(VTOC(vp));
926 }
927 if (result) {
928 hfs_free(btcb, sizeof(*btcb));
929 if (vp) {
930 vnode_put(vp);
931 }
932 /* XXX need to give back blocks ? */
933 }
934 if (intrans) {
935 hfs_end_transaction(hfsmp);
936 }
937
938 /*
939 * All done, clear HFS_CREATING_BTREE, and wake up any sleepers.
940 */
941 hfs_lock_mount (hfsmp);
942 hfsmp->hfs_flags &= ~HFS_CREATING_BTREE;
943 wakeup((caddr_t)&hfsmp->hfs_attribute_cp);
944 hfs_unlock_mount (hfsmp);
945
946 return (result);
947 }
948