2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 #include <sys/param.h>
25 #include <sys/systm.h>
27 #include <sys/kernel.h>
28 #include <sys/malloc.h>
29 #include <sys/mount.h>
30 #include <sys/vnode.h>
34 #include "hfs_cnode.h"
36 #include "hfs_endian.h"
38 #include "hfscommon/headers/FileMgrInternal.h"
39 #include "hfscommon/headers/BTreesPrivate.h"
41 #define FORCESYNCBTREEWRITES 0
44 static int ClearBTNodes(struct vnode
*vp
, long blksize
, off_t offset
, off_t amount
);
48 OSStatus
SetBTreeBlockSize(FileReference vp
, ByteCount blockSize
, ItemCount minBlockCount
)
50 BTreeControlBlockPtr bTreePtr
;
52 DBG_ASSERT(vp
!= NULL
);
53 DBG_ASSERT(blockSize
>= kMinNodeSize
);
54 if (blockSize
> MAXBSIZE
)
55 return (fsBTBadNodeSize
);
57 bTreePtr
= (BTreeControlBlockPtr
)VTOF(vp
)->fcbBTCBPtr
;
58 bTreePtr
->nodeSize
= blockSize
;
65 OSStatus
GetBTreeBlock(FileReference vp
, UInt32 blockNum
, GetBlockOptions options
, BlockDescriptor
*block
)
67 OSStatus retval
= E_NONE
;
68 struct buf
*bp
= NULL
;
70 if (options
& kGetEmptyBlock
) {
74 offset
= (daddr64_t
)blockNum
* (daddr64_t
)block
->blockSize
;
75 bp
= buf_getblk(vp
, (daddr64_t
)blockNum
, block
->blockSize
, 0, 0, BLK_META
);
77 VNOP_BLOCKMAP(vp
, offset
, block
->blockSize
, &blkno
, NULL
, NULL
, 0, NULL
) == 0) {
78 buf_setblkno(bp
, blkno
);
81 retval
= buf_meta_bread(vp
, (daddr64_t
)blockNum
, block
->blockSize
, NOCRED
, &bp
);
84 retval
= -1; //XXX need better error
86 if (retval
== E_NONE
) {
87 block
->blockHeader
= bp
;
88 block
->buffer
= (char *)buf_dataptr(bp
);
89 block
->blockNum
= buf_lblkno(bp
);
90 block
->blockReadFromDisk
= (buf_fromcache(bp
) == 0); /* not found in cache ==> came from disk */
93 block
->isModified
= 0;
95 /* Check and endian swap B-Tree node (only if it's a valid block) */
96 if (!(options
& kGetEmptyBlock
)) {
97 /* This happens when we first open the b-tree, we might not have all the node data on hand */
98 if ((((BTNodeDescriptor
*)block
->buffer
)->kind
== kBTHeaderNode
) &&
99 (((BTHeaderRec
*)((char *)block
->buffer
+ 14))->nodeSize
!= buf_count(bp
)) &&
100 (SWAP_BE16 (((BTHeaderRec
*)((char *)block
->buffer
+ 14))->nodeSize
) != buf_count(bp
))) {
103 * Don't swap the node descriptor, record offsets, or other records.
104 * This record will be invalidated and re-read with the correct node
105 * size once the B-tree control block is set up with the node size
106 * from the header record.
108 retval
= hfs_swap_BTNode (block
, vp
, kSwapBTNodeHeaderRecordOnly
);
110 } else if (block
->blockReadFromDisk
) {
112 * The node was just read from disk, so always swap/check it.
113 * This is necessary on big endian since the test below won't trigger.
115 retval
= hfs_swap_BTNode (block
, vp
, kSwapBTNodeBigToHost
);
116 } else if (*((UInt16
*)((char *)block
->buffer
+ (block
->blockSize
- sizeof (UInt16
)))) == 0x0e00) {
118 * The node was left in the cache in non-native order, so swap it.
119 * This only happens on little endian, after the node is written
122 retval
= hfs_swap_BTNode (block
, vp
, kSwapBTNodeBigToHost
);
126 * If we got an error, then the node is only partially swapped.
127 * We mark the buffer invalid so that the next attempt to get the
128 * node will read it and attempt to swap again, and will notice
129 * the error again. If we didn't do this, the next attempt to get
130 * the node might use the partially swapped node as-is.
140 block
->blockHeader
= NULL
;
141 block
->buffer
= NULL
;
149 void ModifyBlockStart(FileReference vp
, BlockDescPtr blockPtr
)
151 struct hfsmount
*hfsmp
= VTOHFS(vp
);
152 struct buf
*bp
= NULL
;
154 if (hfsmp
->jnl
== NULL
) {
158 bp
= (struct buf
*) blockPtr
->blockHeader
;
160 panic("ModifyBlockStart: null bp for blockdescptr 0x%x?!?\n", blockPtr
);
164 journal_modify_block_start(hfsmp
->jnl
, bp
);
165 blockPtr
->isModified
= 1;
169 btree_journal_modify_block_end(struct hfsmount
*hfsmp
, struct buf
*bp
)
172 struct vnode
*vp
= buf_vnode(bp
);
173 BlockDescriptor block
;
175 /* Prepare the block pointer */
176 block
.blockHeader
= bp
;
177 block
.buffer
= (char *)buf_dataptr(bp
);
178 block
.blockNum
= buf_lblkno(bp
);
179 /* not found in cache ==> came from disk */
180 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
181 block
.blockSize
= buf_count(bp
);
183 // XXXdbg have to swap the data before it goes in the journal
184 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
);
186 panic("btree_journal_modify_block_end: about to write corrupt node!\n");
188 return journal_modify_block_end(hfsmp
->jnl
, bp
);
193 OSStatus
ReleaseBTreeBlock(FileReference vp
, BlockDescPtr blockPtr
, ReleaseBlockOptions options
)
195 struct hfsmount
*hfsmp
= VTOHFS(vp
);
196 extern int bdwrite_internal(struct buf
*, int);
197 OSStatus retval
= E_NONE
;
198 struct buf
*bp
= NULL
;
200 bp
= (struct buf
*) blockPtr
->blockHeader
;
207 if (options
& kTrashBlock
) {
210 if (hfsmp
->jnl
&& (buf_flags(bp
) & B_LOCKED
)) {
211 journal_kill_block(hfsmp
->jnl
, bp
);
213 buf_brelse(bp
); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
216 if (options
& kForceWriteBlock
) {
218 if (blockPtr
->isModified
== 0) {
219 panic("hfs: releaseblock: modified is 0 but forcewrite set! bp 0x%x\n", bp
);
222 retval
= btree_journal_modify_block_end(hfsmp
, bp
);
223 blockPtr
->isModified
= 0;
225 retval
= VNOP_BWRITE(bp
);
227 } else if (options
& kMarkBlockDirty
) {
230 if ((options
& kLockTransaction
) && hfsmp
->jnl
== NULL
) {
233 * Set the B_LOCKED flag and unlock the buffer, causing buf_brelse to move
234 * the buffer onto the LOCKED free list. This is necessary, otherwise
235 * getnewbuf() would try to reclaim the buffers using buf_bawrite, which
236 * isn't going to work.
239 extern int count_lock_queue(void);
241 /* Don't hog all the buffers... */
242 if (count_lock_queue() > kMaxLockedMetaBuffers
) {
243 hfs_btsync(vp
, HFS_SYNCTRANS
);
244 /* Rollback sync time to cause a sync on lock release... */
245 (void) BTSetLastSync(VTOF(vp
), tv
.tv_sec
- (kMaxSecsForFsync
+ 1));
247 buf_setflags(bp
, B_LOCKED
);
251 * Delay-write this block.
252 * If the maximum delayed buffers has been exceeded then
253 * free up some buffers and fall back to an asynchronous write.
256 if (blockPtr
->isModified
== 0) {
257 panic("hfs: releaseblock: modified is 0 but markdirty set! bp 0x%x\n", bp
);
259 retval
= btree_journal_modify_block_end(hfsmp
, bp
);
260 blockPtr
->isModified
= 0;
261 } else if (bdwrite_internal(bp
, 1) != 0) {
263 /* Rollback sync time to cause a sync on lock release... */
264 (void) BTSetLastSync(VTOF(vp
), tv
.tv_sec
- (kMaxSecsForFsync
+ 1));
266 buf_clearflags(bp
, B_LOCKED
);
270 // check if we had previously called journal_modify_block_start()
271 // on this block and if so, abort it (which will call buf_brelse()).
272 if (hfsmp
->jnl
&& blockPtr
->isModified
) {
273 // XXXdbg - I don't want to call modify_block_abort()
274 // because I think it may be screwing up the
275 // journal and blowing away a block that has
278 // journal_modify_block_abort(hfsmp->jnl, bp);
279 //panic("hfs: releaseblock called for 0x%x but mod_block_start previously called.\n", bp);
280 btree_journal_modify_block_end(hfsmp
, bp
);
281 blockPtr
->isModified
= 0;
283 buf_brelse(bp
); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
294 OSStatus
ExtendBTreeFile(FileReference vp
, FSSize minEOF
, FSSize maxEOF
)
296 #pragma unused (maxEOF)
298 OSStatus retval
= 0, ret
= 0;
299 UInt64 actualBytesAdded
, origSize
;
301 u_int32_t startAllocation
;
302 u_int32_t fileblocks
;
306 struct proc
*p
= NULL
;
310 filePtr
= GetFileControlBlock(vp
);
312 if ( minEOF
> filePtr
->fcbEOF
)
314 bytesToAdd
= minEOF
- filePtr
->fcbEOF
;
316 if (bytesToAdd
< filePtr
->ff_clumpsize
)
317 bytesToAdd
= filePtr
->ff_clumpsize
; //XXX why not always be a mutiple of clump size?
327 * The Extents B-tree can't have overflow extents. ExtendFileC will
328 * return an error if an attempt is made to extend the Extents B-tree
329 * when the resident extents are exhausted.
332 /* Protect allocation bitmap and extents overflow file. */
333 lockflags
= SFL_BITMAP
;
334 if (VTOC(vp
)->c_fileid
!= kHFSExtentsFileID
)
335 lockflags
|= SFL_EXTENTS
;
336 lockflags
= hfs_systemfile_lock(vcb
, lockflags
, HFS_EXCLUSIVE_LOCK
);
338 (void) BTGetInformation(filePtr
, 0, &btInfo
);
342 * The b-tree code expects nodes to be contiguous. So when
343 * the allocation block size is less than the b-tree node
344 * size, we need to force disk allocations to be contiguous.
346 if (vcb
->blockSize
>= btInfo
.nodeSize
) {
349 /* Ensure that all b-tree nodes are contiguous on disk */
350 extendFlags
= kEFContigMask
;
354 origSize
= filePtr
->fcbEOF
;
355 fileblocks
= filePtr
->ff_blocks
;
356 startAllocation
= vcb
->nextAllocation
;
358 // loop trying to get a contiguous chunk that's an integer multiple
359 // of the btree node size. if we can't get a contiguous chunk that
360 // is at least the node size then we break out of the loop and let
361 // the error propagate back up.
363 retval
= ExtendFileC(vcb
, filePtr
, bytesToAdd
, 0,
364 kEFContigMask
| kEFMetadataMask
,
366 if (retval
== dskFulErr
&& actualBytesAdded
== 0) {
368 if (bytesToAdd
== btInfo
.nodeSize
|| bytesToAdd
< (minEOF
- origSize
)) {
369 // if we're here there's nothing else to try, we're out
370 // of space so we break and bail out.
374 if (bytesToAdd
< btInfo
.nodeSize
) {
375 bytesToAdd
= btInfo
.nodeSize
;
376 } else if ((bytesToAdd
% btInfo
.nodeSize
) != 0) {
377 // make sure it's an integer multiple of the nodeSize
378 bytesToAdd
-= (bytesToAdd
% btInfo
.nodeSize
);
382 } while (retval
== dskFulErr
&& actualBytesAdded
== 0);
385 * If a new extent was added then move the roving allocator
386 * reference forward by the current b-tree file size so
387 * there's plenty of room to grow.
390 ((VCBTOHFS(vcb
)->hfs_flags
& HFS_METADATA_ZONE
) == 0) &&
391 (vcb
->nextAllocation
> startAllocation
) &&
392 ((vcb
->nextAllocation
+ fileblocks
) < vcb
->totalBlocks
)) {
393 vcb
->nextAllocation
+= fileblocks
;
396 filePtr
->fcbEOF
= (u_int64_t
)filePtr
->ff_blocks
* (u_int64_t
)vcb
->blockSize
;
398 // XXXdbg ExtendFileC() could have returned an error even though
399 // it grew the file to be big enough for our needs. If this is
400 // the case, we don't care about retval so we blow it away.
402 if (filePtr
->fcbEOF
>= minEOF
&& retval
!= 0) {
406 // XXXdbg if the file grew but isn't large enough or isn't an
407 // even multiple of the nodeSize then trim things back. if
408 // the file isn't large enough we trim back to the original
409 // size. otherwise we trim back to be an even multiple of the
412 if ((filePtr
->fcbEOF
< minEOF
) || (actualBytesAdded
% btInfo
.nodeSize
) != 0) {
414 if (filePtr
->fcbEOF
< minEOF
) {
417 if (filePtr
->fcbEOF
< origSize
) {
418 panic("hfs: btree file eof %lld less than orig size %lld!\n",
419 filePtr
->fcbEOF
, origSize
);
422 trim
= filePtr
->fcbEOF
- origSize
;
423 if (trim
!= actualBytesAdded
) {
424 panic("hfs: trim == %lld but actualBytesAdded == %lld\n",
425 trim
, actualBytesAdded
);
428 trim
= (actualBytesAdded
% btInfo
.nodeSize
);
431 ret
= TruncateFileC(vcb
, filePtr
, filePtr
->fcbEOF
- trim
, 0);
432 filePtr
->fcbEOF
= (u_int64_t
)filePtr
->ff_blocks
* (u_int64_t
)vcb
->blockSize
;
434 // XXXdbg - panic if the file didn't get trimmed back properly
435 if ((filePtr
->fcbEOF
% btInfo
.nodeSize
) != 0) {
436 panic("hfs: truncate file didn't! fcbEOF %lld nsize %d fcb 0x%x\n",
437 filePtr
->fcbEOF
, btInfo
.nodeSize
, filePtr
);
441 // XXXdbg - this probably doesn't need to be a panic()
442 panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %d)\n",
443 filePtr
->fcbEOF
, trim
, ret
);
446 actualBytesAdded
-= trim
;
449 if(VTOC(vp
)->c_fileid
!= kHFSExtentsFileID
) {
451 * Get any extents overflow b-tree changes to disk ASAP!
453 (void) BTFlushPath(VTOF(vcb
->extentsRefNum
));
454 (void) hfs_fsync(vcb
->extentsRefNum
, MNT_WAIT
, 0, p
);
456 hfs_systemfile_unlock(vcb
, lockflags
);
459 if ((filePtr
->fcbEOF
% btInfo
.nodeSize
) != 0) {
460 panic("hfs: extendbtree: fcb 0x%x has eof 0x%llx not a multiple of 0x%x (trim %llx)\n",
461 filePtr
, filePtr
->fcbEOF
, btInfo
.nodeSize
, trim
);
465 * Update the Alternate MDB or Alternate VolumeHeader
467 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
468 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
469 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
)
471 VTOC(vp
)->c_flag
|= C_MODIFIED
;
473 ret
= hfs_flushvolumeheader(VCBTOHFS(vcb
), MNT_WAIT
, HFS_ALTFLUSH
);
475 VTOC(vp
)->c_touch_chgtime
= TRUE
;
476 VTOC(vp
)->c_touch_modtime
= TRUE
;
477 (void) hfs_update(vp
, TRUE
);
480 ret
= ClearBTNodes(vp
, btInfo
.nodeSize
, filePtr
->fcbEOF
- actualBytesAdded
, actualBytesAdded
);
486 hfs_systemfile_unlock(vcb
, lockflags
);
493 * Clear out (zero) new b-tree nodes on disk.
496 ClearBTNodes(struct vnode
*vp
, long blksize
, off_t offset
, off_t amount
)
498 struct hfsmount
*hfsmp
= VTOHFS(vp
);
499 struct buf
*bp
= NULL
;
503 blk
= offset
/ blksize
;
504 blkcnt
= amount
/ blksize
;
507 bp
= buf_getblk(vp
, blk
, blksize
, 0, 0, BLK_META
);
513 // XXXdbg -- skipping this for now since it makes a transaction
514 // become *way* too large
515 //journal_modify_block_start(hfsmp->jnl, bp);
517 bzero((char *)buf_dataptr(bp
), blksize
);
523 // XXXdbg -- skipping this for now since it makes a transaction
524 // become *way* too large
525 //journal_modify_block_end(hfsmp->jnl, bp);
527 // XXXdbg - remove this once we decide what to do with the
528 // writes to the journal
534 /* wait/yield every 32 blocks so we don't hog all the buffers */
548 extern char hfs_attrname
[];
550 extern int hfs_attrkeycompare(HFSPlusAttrKey
*searchKey
, HFSPlusAttrKey
*trialKey
);
552 int hfs_create_attr_btree(struct hfsmount
*hfsmp
, uint32_t nodesize
, uint32_t nodecnt
);
555 * Create an HFS+ Attribute B-tree File.
557 * A journal transaction must be already started.
560 hfs_create_attr_btree(struct hfsmount
*hfsmp
, uint32_t nodesize
, uint32_t nodecnt
)
562 struct vnode
* vp
= NULL
;
563 struct cat_desc cndesc
;
564 struct cat_attr cnattr
;
565 struct cat_fork cfork
;
566 BlockDescriptor blkdesc
;
567 BTNodeDescriptor
*ndp
;
569 BTreeControlBlockPtr btcb
= NULL
;
570 struct buf
*bp
= NULL
;
576 printf("Creating HFS+ Attribute B-tree File (%d nodes) on %s\n", nodecnt
, hfsmp
->vcbVN
);
579 * Set up Attribute B-tree vnode
581 bzero(&cndesc
, sizeof(cndesc
));
582 cndesc
.cd_parentcnid
= kHFSRootParentID
;
583 cndesc
.cd_flags
|= CD_ISMETA
;
584 cndesc
.cd_nameptr
= hfs_attrname
;
585 cndesc
.cd_namelen
= strlen(hfs_attrname
);
586 cndesc
.cd_cnid
= kHFSAttributesFileID
;
588 bzero(&cnattr
, sizeof(cnattr
));
590 cnattr
.ca_mode
= S_IFREG
;
591 cnattr
.ca_fileid
= cndesc
.cd_cnid
;
593 bzero(&cfork
, sizeof(cfork
));
594 cfork
.cf_clump
= nodesize
* nodecnt
;
596 result
= hfs_getnewvnode(hfsmp
, NULL
, NULL
, &cndesc
, 0, &cnattr
, &cfork
, &vp
);
601 * Set up Attribute B-tree control block
603 MALLOC(btcb
, BTreeControlBlock
*, sizeof(BTreeControlBlock
), M_TEMP
, M_WAITOK
);
604 bzero(btcb
, sizeof(BTreeControlBlock
));
606 btcb
->nodeSize
= nodesize
;
607 btcb
->maxKeyLength
= kHFSPlusAttrKeyMaximumLength
;
608 btcb
->btreeType
= 0xFF;
609 btcb
->attributes
= kBTVariableIndexKeysMask
| kBTBigKeysMask
;
610 btcb
->version
= kBTreeVersion
;
611 btcb
->writeCount
= 1;
612 btcb
->flags
= 0; /* kBTHeaderDirty */
613 btcb
->fileRefNum
= vp
;
614 btcb
->getBlockProc
= GetBTreeBlock
;
615 btcb
->releaseBlockProc
= ReleaseBTreeBlock
;
616 btcb
->setEndOfForkProc
= ExtendBTreeFile
;
617 btcb
->keyCompareProc
= (KeyCompareProcPtr
)hfs_attrkeycompare
;
618 VTOF(vp
)->fcbBTCBPtr
= btcb
;
621 * Allocate some space
623 result
= ExtendBTreeFile(vp
, nodesize
, cfork
.cf_clump
);
627 btcb
->totalNodes
= VTOF(vp
)->ff_size
/ nodesize
;
628 btcb
->freeNodes
= btcb
->totalNodes
- 1;
631 * Initialize the b-tree header on disk
633 bp
= buf_getblk(vp
, 0, nodesize
, 0, 0, BLK_META
);
639 buffer
= (void *)buf_dataptr(bp
);
640 blkdesc
.buffer
= buffer
;
641 blkdesc
.blockHeader
= (void *)bp
;
642 blkdesc
.blockReadFromDisk
= 0;
643 blkdesc
.isModified
= 0;
645 ModifyBlockStart(vp
, &blkdesc
);
647 if (buf_size(bp
) != nodesize
)
648 panic("hfs_create_attr_btree: bad buffer size (%d)\n", buf_size(bp
));
650 bzero(buffer
, nodesize
);
651 index
= (int16_t *)buffer
;
653 /* FILL IN THE NODE DESCRIPTOR: */
654 ndp
= (BTNodeDescriptor
*)buffer
;
655 ndp
->kind
= kBTHeaderNode
;
657 offset
= sizeof(BTNodeDescriptor
);
658 index
[(nodesize
/ 2) - 1] = offset
;
660 /* FILL IN THE HEADER RECORD: */
661 bthp
= (BTHeaderRec
*)((UInt8
*)buffer
+ offset
);
662 bthp
->nodeSize
= nodesize
;
663 bthp
->totalNodes
= btcb
->totalNodes
;
664 bthp
->freeNodes
= btcb
->freeNodes
;
665 bthp
->clumpSize
= cfork
.cf_clump
;
666 bthp
->btreeType
= 0xFF;
667 bthp
->attributes
= kBTVariableIndexKeysMask
| kBTBigKeysMask
;
668 bthp
->maxKeyLength
= kHFSPlusAttrKeyMaximumLength
;
669 bthp
->keyCompareType
= kHFSBinaryCompare
;
670 offset
+= sizeof(BTHeaderRec
);
671 index
[(nodesize
/ 2) - 2] = offset
;
673 /* FILL IN THE USER RECORD: */
674 offset
+= kBTreeHeaderUserBytes
;
675 index
[(nodesize
/ 2) - 3] = offset
;
677 /* FILL IN THE MAP RECORD (only one node in use). */
678 *((u_int8_t
*)buffer
+ offset
) = 0x80;
679 offset
+= nodesize
- sizeof(BTNodeDescriptor
) - sizeof(BTHeaderRec
)
680 - kBTreeHeaderUserBytes
- (4 * sizeof(int16_t));
681 index
[(nodesize
/ 2) - 4] = offset
;
684 result
= btree_journal_modify_block_end(hfsmp
, bp
);
686 result
= VNOP_BWRITE(bp
);
691 /* Publish new btree file */
692 hfsmp
->hfs_attribute_vp
= vp
;
693 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
696 hfs_unlock(VTOC(vp
));
702 // hfs_truncate(); /* XXX need to give back blocks */