X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/c0fea4742e91338fffdcf79f86a7c1d5e2b97eb1..c18c124eaa464aaaa5549e99e5a70fc9cbb50944:/bsd/hfs/hfs_btreeio.c diff --git a/bsd/hfs/hfs_btreeio.c b/bsd/hfs/hfs_btreeio.c index 3c19d78e2..fefc36ad3 100644 --- a/bsd/hfs/hfs_btreeio.c +++ b/bsd/hfs/hfs_btreeio.c @@ -1,28 +1,35 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2011 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include #include #include +#include #include #include #include @@ -33,18 +40,44 @@ #include "hfs_cnode.h" #include "hfs_dbg.h" #include "hfs_endian.h" +#include "hfs_btreeio.h" #include "hfscommon/headers/FileMgrInternal.h" #include "hfscommon/headers/BTreesPrivate.h" #define FORCESYNCBTREEWRITES 0 +/* From bsd/vfs/vfs_bio.c */ +extern int bdwrite_internal(struct buf *, int); + +static int ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount); +static int btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp); + +void btree_swap_node(struct buf *bp, __unused void *arg); + +/* + * Return btree node size for given vnode. + * + * Returns: + * For btree vnode, returns btree node size. + * For non-btree vnodes, returns 0. + */ +u_int16_t get_btree_nodesize(struct vnode *vp) +{ + BTreeControlBlockPtr btree; + u_int16_t node_size = 0; -static int ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount); + if (vnode_issystem(vp)) { + btree = (BTreeControlBlockPtr) VTOF(vp)->fcbBTCBPtr; + if (btree) { + node_size = btree->nodeSize; + } + } + return node_size; +} -__private_extern__ -OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, ItemCount minBlockCount) +OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, __unused ItemCount minBlockCount) { BTreeControlBlockPtr bTreePtr; @@ -60,11 +93,20 @@ OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, ItemCount minB } -__private_extern__ -OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions options, BlockDescriptor *block) +OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions options, BlockDescriptor *block) { OSStatus retval = E_NONE; struct buf *bp = NULL; + u_int8_t allow_empty_node; + + /* If the btree block is being read using hint, it is + * fine for the swap code to find zeroed out nodes. + */ + if (options & kGetBlockHint) { + allow_empty_node = true; + } else { + allow_empty_node = false; + } if (options & kGetEmptyBlock) { daddr64_t blkno; @@ -91,47 +133,72 @@ OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions option // XXXdbg block->isModified = 0; - /* Check and endian swap B-Tree node (only if it's a valid block) */ - if (!(options & kGetEmptyBlock)) { - /* This happens when we first open the b-tree, we might not have all the node data on hand */ - if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) && - (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) && - (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) { + /* Check and endian swap B-Tree node (only if it's a valid block) */ + if (!(options & kGetEmptyBlock)) { + + /* This happens when we first open the b-tree, we might not have all the node data on hand */ + if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) && + (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) && + (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) { - /* - * Don't swap the node descriptor, record offsets, or other records. - * This record will be invalidated and re-read with the correct node - * size once the B-tree control block is set up with the node size - * from the header record. - */ - retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly); - - } else if (block->blockReadFromDisk) { - /* - * The node was just read from disk, so always swap/check it. - * This is necessary on big endian since the test below won't trigger. - */ - retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost); - } else if (*((UInt16 *)((char *)block->buffer + (block->blockSize - sizeof (UInt16)))) == 0x0e00) { /* - * The node was left in the cache in non-native order, so swap it. - * This only happens on little endian, after the node is written - * back to disk. + * Don't swap the node descriptor, record offsets, or other records. + * This record will be invalidated and re-read with the correct node + * size once the B-tree control block is set up with the node size + * from the header record. */ - retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost); - } - - /* - * If we got an error, then the node is only partially swapped. - * We mark the buffer invalid so that the next attempt to get the - * node will read it and attempt to swap again, and will notice - * the error again. If we didn't do this, the next attempt to get - * the node might use the partially swapped node as-is. - */ - if (retval) + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly, allow_empty_node); + + } else { + /* + * In this case, we have enough data in-hand to do basic validation + * on the B-Tree node. + */ + if (block->blockReadFromDisk) { + /* + * The node was just read from disk, so always swap/check it. + * This is necessary on big endian since the test below won't trigger. + */ + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node); + } + else { + /* + * Block wasn't read from disk; it was found in the cache. + */ + if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) { + /* + * The node was left in the cache in non-native order, so swap it. + * This only happens on little endian, after the node is written + * back to disk. + */ + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node); + } + else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x000e) { + /* + * The node was in-cache in native-endianness. We don't need to do + * anything here, because the node is ready to use. Set retval == 0. + */ + retval = 0; + } + /* + * If the node doesn't have hex 14 (0xe) in the last two bytes of the buffer, + * it doesn't necessarily mean that this is a bad node. Zeroed nodes that are + * marked as unused in the b-tree map node would be OK and not have valid content. + */ + } + } + + /* + * If we got an error, then the node is only partially swapped. + * We mark the buffer invalid so that the next attempt to get the + * node will read it and attempt to swap again, and will notice + * the error again. If we didn't do this, the next attempt to get + * the node might use the partially swapped node as-is. + */ + if (retval) buf_markinvalid(bp); - } - } + } + } if (retval) { if (bp) @@ -144,7 +211,6 @@ OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions option } -__private_extern__ void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr) { struct hfsmount *hfsmp = VTOHFS(vp); @@ -156,7 +222,7 @@ void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr) bp = (struct buf *) blockPtr->blockHeader; if (bp == NULL) { - panic("ModifyBlockStart: null bp for blockdescptr 0x%x?!?\n", blockPtr); + panic("hfs: ModifyBlockStart: null bp for blockdescptr %p?!?\n", blockPtr); return; } @@ -164,9 +230,10 @@ void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr) blockPtr->isModified = 1; } -static int -btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp) +void +btree_swap_node(struct buf *bp, __unused void *arg) { + // struct hfsmount *hfsmp = (struct hfsmount *)arg; int retval; struct vnode *vp = buf_vnode(bp); BlockDescriptor block; @@ -179,20 +246,26 @@ btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp) block.blockReadFromDisk = (buf_fromcache(bp) == 0); block.blockSize = buf_count(bp); - // XXXdbg have to swap the data before it goes in the journal - retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig); + /* Swap the data now that this node is ready to go to disk. + * We allow swapping of zeroed out nodes here because we might + * be writing node whose last record just got deleted. + */ + retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, true); if (retval) - panic("btree_journal_modify_block_end: about to write corrupt node!\n"); + panic("hfs: btree_swap_node: about to write corrupt node!\n"); +} + - return journal_modify_block_end(hfsmp->jnl, bp); +static int +btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp) +{ + return journal_modify_block_end(hfsmp->jnl, bp, btree_swap_node, hfsmp); } -__private_extern__ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options) { struct hfsmount *hfsmp = VTOHFS(vp); - extern int bdwrite_internal(struct buf *, int); OSStatus retval = E_NONE; struct buf *bp = NULL; @@ -211,11 +284,15 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock } else { buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */ } + + /* Don't let anyone else try to use this bp, it's been consumed */ + blockPtr->blockHeader = NULL; + } else { if (options & kForceWriteBlock) { if (hfsmp->jnl) { if (blockPtr->isModified == 0) { - panic("hfs: releaseblock: modified is 0 but forcewrite set! bp 0x%x\n", bp); + panic("hfs: releaseblock: modified is 0 but forcewrite set! bp %p\n", bp); } retval = btree_journal_modify_block_end(hfsmp, bp); @@ -223,6 +300,10 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock } else { retval = VNOP_BWRITE(bp); } + + /* Don't let anyone else try to use this bp, it's been consumed */ + blockPtr->blockHeader = NULL; + } else if (options & kMarkBlockDirty) { struct timeval tv; microuptime(&tv); @@ -235,8 +316,6 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock * isn't going to work. * */ - extern int count_lock_queue(void); - /* Don't hog all the buffers... */ if (count_lock_queue() > kMaxLockedMetaBuffers) { hfs_btsync(vp, HFS_SYNCTRANS); @@ -253,7 +332,7 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock */ if (hfsmp->jnl) { if (blockPtr->isModified == 0) { - panic("hfs: releaseblock: modified is 0 but markdirty set! bp 0x%x\n", bp); + panic("hfs: releaseblock: modified is 0 but markdirty set! bp %p\n", bp); } retval = btree_journal_modify_block_end(hfsmp, bp); blockPtr->isModified = 0; @@ -265,6 +344,10 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock buf_clearflags(bp, B_LOCKED); buf_bawrite(bp); } + + /* Don't let anyone else try to use this bp, it's been consumed */ + blockPtr->blockHeader = NULL; + } else { // check if we had previously called journal_modify_block_start() // on this block and if so, abort it (which will call buf_brelse()). @@ -281,36 +364,36 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock } else { buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */ } - }; - }; + + /* Don't let anyone else try to use this bp, it's been consumed */ + blockPtr->blockHeader = NULL; + } + } exit: return (retval); } -#define HFS_CLUMP_ADJ_LIMIT (200*1024*1024) - -__private_extern__ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) { #pragma unused (maxEOF) OSStatus retval = 0, ret = 0; - UInt64 actualBytesAdded, origSize; - UInt64 bytesToAdd; + int64_t actualBytesAdded, origSize; + u_int64_t bytesToAdd; u_int32_t startAllocation; u_int32_t fileblocks; - BTreeInfoRec btInfo; + BTreeInfoRec btInfo; ExtendedVCB *vcb; - FCB *filePtr; - struct proc *p = NULL; - UInt64 trim = 0; - int lockflags = 0; + FCB *filePtr; + struct proc *p = NULL; + int64_t trim = 0; + int lockflags = 0; filePtr = GetFileControlBlock(vp); - if ( minEOF > filePtr->fcbEOF ) + if ( (off_t)minEOF > filePtr->fcbEOF ) { bytesToAdd = minEOF - filePtr->fcbEOF; @@ -323,13 +406,7 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) } vcb = VTOVCB(vp); - - /* Take past growth into account when extending the catalog file. */ - if ((VTOC(vp)->c_fileid == kHFSCatalogFileID) && - (bytesToAdd / vcb->blockSize) < filePtr->fcbExtents[0].blockCount) { - bytesToAdd = filePtr->fcbExtents[0].blockCount * (UInt64)vcb->blockSize; - bytesToAdd = MIN(bytesToAdd, HFS_CLUMP_ADJ_LIMIT); - } + /* * The Extents B-tree can't have overflow extents. ExtendFileC will * return an error if an attempt is made to extend the Extents B-tree @@ -366,27 +443,29 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) // of the btree node size. if we can't get a contiguous chunk that // is at least the node size then we break out of the loop and let // the error propagate back up. - do { + while((off_t)bytesToAdd >= btInfo.nodeSize) { + do { retval = ExtendFileC(vcb, filePtr, bytesToAdd, 0, - kEFContigMask | kEFMetadataMask, - &actualBytesAdded); + kEFContigMask | kEFMetadataMask | kEFNoClumpMask, + (int64_t *)&actualBytesAdded); if (retval == dskFulErr && actualBytesAdded == 0) { - - if (bytesToAdd == btInfo.nodeSize || bytesToAdd < (minEOF - origSize)) { - // if we're here there's nothing else to try, we're out - // of space so we break and bail out. - break; - } else { - bytesToAdd >>= 1; - if (bytesToAdd < btInfo.nodeSize) { - bytesToAdd = btInfo.nodeSize; - } else if ((bytesToAdd % btInfo.nodeSize) != 0) { - // make sure it's an integer multiple of the nodeSize - bytesToAdd -= (bytesToAdd % btInfo.nodeSize); - } - } + bytesToAdd >>= 1; + if (bytesToAdd < btInfo.nodeSize) { + break; + } else if ((bytesToAdd % btInfo.nodeSize) != 0) { + // make sure it's an integer multiple of the nodeSize + bytesToAdd -= (bytesToAdd % btInfo.nodeSize); + } } - } while (retval == dskFulErr && actualBytesAdded == 0); + } while (retval == dskFulErr && actualBytesAdded == 0); + + if (retval == dskFulErr && actualBytesAdded == 0 && bytesToAdd <= btInfo.nodeSize) { + break; + } + + filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize; + bytesToAdd = minEOF - filePtr->fcbEOF; + } /* * If a new extent was added then move the roving allocator @@ -396,8 +475,8 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) if ((retval == 0) && ((VCBTOHFS(vcb)->hfs_flags & HFS_METADATA_ZONE) == 0) && (vcb->nextAllocation > startAllocation) && - ((vcb->nextAllocation + fileblocks) < vcb->totalBlocks)) { - vcb->nextAllocation += fileblocks; + ((vcb->nextAllocation + fileblocks) < vcb->allocLimit)) { + HFS_UPDATE_NEXT_ALLOCATION(vcb, vcb->nextAllocation + fileblocks); } filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize; @@ -406,7 +485,7 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) // it grew the file to be big enough for our needs. If this is // the case, we don't care about retval so we blow it away. // - if (filePtr->fcbEOF >= minEOF && retval != 0) { + if (filePtr->fcbEOF >= (off_t)minEOF && retval != 0) { retval = 0; } @@ -416,9 +495,9 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) // size. otherwise we trim back to be an even multiple of the // btree node size. // - if ((filePtr->fcbEOF < minEOF) || (actualBytesAdded % btInfo.nodeSize) != 0) { + if ((filePtr->fcbEOF < (off_t)minEOF) || ((filePtr->fcbEOF - origSize) % btInfo.nodeSize) != 0) { - if (filePtr->fcbEOF < minEOF) { + if (filePtr->fcbEOF < (off_t)minEOF) { retval = dskFulErr; if (filePtr->fcbEOF < origSize) { @@ -427,30 +506,25 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) } trim = filePtr->fcbEOF - origSize; - if (trim != actualBytesAdded) { - panic("hfs: trim == %lld but actualBytesAdded == %lld\n", - trim, actualBytesAdded); - } } else { - trim = (actualBytesAdded % btInfo.nodeSize); + trim = ((filePtr->fcbEOF - origSize) % btInfo.nodeSize); } - ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0); + ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0, 0, FTOC(filePtr)->c_fileid, 0); filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize; // XXXdbg - panic if the file didn't get trimmed back properly if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) { - panic("hfs: truncate file didn't! fcbEOF %lld nsize %d fcb 0x%x\n", + panic("hfs: truncate file didn't! fcbEOF %lld nsize %d fcb %p\n", filePtr->fcbEOF, btInfo.nodeSize, filePtr); } if (ret) { // XXXdbg - this probably doesn't need to be a panic() - panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %d)\n", - filePtr->fcbEOF, trim, ret); + panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %ld)\n", + filePtr->fcbEOF, trim, (long)ret); goto out; } - actualBytesAdded -= trim; } if(VTOC(vp)->c_fileid != kHFSExtentsFileID) { @@ -464,7 +538,7 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) lockflags = 0; if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) { - panic("hfs: extendbtree: fcb 0x%x has eof 0x%llx not a multiple of 0x%x (trim %llx)\n", + panic("hfs: extendbtree: fcb %p has eof 0x%llx not a multiple of 0x%x (trim %llx)\n", filePtr, filePtr->fcbEOF, btInfo.nodeSize, trim); } @@ -484,7 +558,7 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) (void) hfs_update(vp, TRUE); } - ret = ClearBTNodes(vp, btInfo.nodeSize, filePtr->fcbEOF - actualBytesAdded, actualBytesAdded); + ret = ClearBTNodes(vp, btInfo.nodeSize, origSize, (filePtr->fcbEOF - origSize)); out: if (retval == 0) retval = ret; @@ -500,7 +574,7 @@ out: * Clear out (zero) new b-tree nodes on disk. */ static int -ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount) +ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount) { struct hfsmount *hfsmp = VTOHFS(vp); struct buf *bp = NULL; @@ -554,19 +628,15 @@ ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount) extern char hfs_attrname[]; -extern int hfs_attrkeycompare(HFSPlusAttrKey *searchKey, HFSPlusAttrKey *trialKey); - -int hfs_create_attr_btree(struct hfsmount *hfsmp, uint32_t nodesize, uint32_t nodecnt); - /* * Create an HFS+ Attribute B-tree File. * - * A journal transaction must be already started. + * No global resources should be held. */ int -hfs_create_attr_btree(struct hfsmount *hfsmp, uint32_t nodesize, uint32_t nodecnt) +hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t nodecnt) { - struct vnode* vp = NULL; + struct vnode* vp = NULLVP; struct cat_desc cndesc; struct cat_attr cnattr; struct cat_fork cfork; @@ -576,34 +646,64 @@ hfs_create_attr_btree(struct hfsmount *hfsmp, uint32_t nodesize, uint32_t nodecn BTreeControlBlockPtr btcb = NULL; struct buf *bp = NULL; void * buffer; + u_int8_t *bitmap; u_int16_t *index; + u_int32_t node_num, num_map_nodes; + u_int32_t bytes_per_map_record; + u_int32_t temp; u_int16_t offset; + int intrans = 0; int result; + int newvnode_flags = 0; + +again: + /* + * Serialize creation using HFS_CREATING_BTREE flag. + */ + hfs_lock_mount (hfsmp); + if (hfsmp->hfs_flags & HFS_CREATING_BTREE) { + /* Someone else beat us, wait for them to finish. */ + (void) msleep(&hfsmp->hfs_attribute_cp, &hfsmp->hfs_mutex, + PDROP | PINOD, "hfs_create_attr_btree", 0); + if (hfsmp->hfs_attribute_vp) { + return (0); + } + goto again; + } + hfsmp->hfs_flags |= HFS_CREATING_BTREE; + hfs_unlock_mount (hfsmp); - printf("Creating HFS+ Attribute B-tree File (%d nodes) on %s\n", nodecnt, hfsmp->vcbVN); + /* Check if were out of usable disk space. */ + if ((hfs_freeblks(hfsmp, 1) == 0)) { + result = ENOSPC; + goto exit; + } /* * Set up Attribute B-tree vnode + * (this must be done before we start a transaction + * or take any system file locks) */ bzero(&cndesc, sizeof(cndesc)); cndesc.cd_parentcnid = kHFSRootParentID; cndesc.cd_flags |= CD_ISMETA; - cndesc.cd_nameptr = hfs_attrname; + cndesc.cd_nameptr = (const u_int8_t *)hfs_attrname; cndesc.cd_namelen = strlen(hfs_attrname); cndesc.cd_cnid = kHFSAttributesFileID; bzero(&cnattr, sizeof(cnattr)); - cnattr.ca_nlink = 1; + cnattr.ca_linkcount = 1; cnattr.ca_mode = S_IFREG; cnattr.ca_fileid = cndesc.cd_cnid; bzero(&cfork, sizeof(cfork)); cfork.cf_clump = nodesize * nodecnt; - result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, &vp); - if (result) - return (result); - + result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, + &cfork, &vp, &newvnode_flags); + if (result) { + goto exit; + } /* * Set up Attribute B-tree control block */ @@ -627,13 +727,46 @@ hfs_create_attr_btree(struct hfsmount *hfsmp, uint32_t nodesize, uint32_t nodecn /* * Allocate some space */ + if (hfs_start_transaction(hfsmp) != 0) { + result = EINVAL; + goto exit; + } + intrans = 1; + + /* Note ExtendBTreeFile will acquire the necessary system file locks. */ result = ExtendBTreeFile(vp, nodesize, cfork.cf_clump); if (result) goto exit; btcb->totalNodes = VTOF(vp)->ff_size / nodesize; - btcb->freeNodes = btcb->totalNodes - 1; + /* + * Figure out how many map nodes we'll need. + * + * bytes_per_map_record = the number of bytes in the map record of a + * map node. Since that is the only record in the node, it is the size + * of the node minus the node descriptor at the start, and two record + * offsets at the end of the node. The "- 2" is to round the size down + * to a multiple of 4 bytes (since sizeof(BTNodeDescriptor) is not a + * multiple of 4). + * + * The value "temp" here is the number of *bits* in the map record of + * the header node. + */ + bytes_per_map_record = nodesize - sizeof(BTNodeDescriptor) - 2*sizeof(u_int16_t) - 2; + temp = 8 * (nodesize - sizeof(BTNodeDescriptor) + - sizeof(BTHeaderRec) + - kBTreeHeaderUserBytes + - 4 * sizeof(u_int16_t)); + if (btcb->totalNodes > temp) { + num_map_nodes = howmany(btcb->totalNodes - temp, bytes_per_map_record * 8); + } + else { + num_map_nodes = 0; + } + + btcb->freeNodes = btcb->totalNodes - 1 - num_map_nodes; + /* * Initialize the b-tree header on disk */ @@ -655,17 +788,19 @@ hfs_create_attr_btree(struct hfsmount *hfsmp, uint32_t nodesize, uint32_t nodecn panic("hfs_create_attr_btree: bad buffer size (%d)\n", buf_size(bp)); bzero(buffer, nodesize); - index = (int16_t *)buffer; + index = (u_int16_t *)buffer; /* FILL IN THE NODE DESCRIPTOR: */ ndp = (BTNodeDescriptor *)buffer; + if (num_map_nodes != 0) + ndp->fLink = 1; ndp->kind = kBTHeaderNode; ndp->numRecords = 3; offset = sizeof(BTNodeDescriptor); index[(nodesize / 2) - 1] = offset; /* FILL IN THE HEADER RECORD: */ - bthp = (BTHeaderRec *)((UInt8 *)buffer + offset); + bthp = (BTHeaderRec *)((u_int8_t *)buffer + offset); bthp->nodeSize = nodesize; bthp->totalNodes = btcb->totalNodes; bthp->freeNodes = btcb->freeNodes; @@ -681,8 +816,19 @@ hfs_create_attr_btree(struct hfsmount *hfsmp, uint32_t nodesize, uint32_t nodecn offset += kBTreeHeaderUserBytes; index[(nodesize / 2) - 3] = offset; - /* FILL IN THE MAP RECORD (only one node in use). */ - *((u_int8_t *)buffer + offset) = 0x80; + /* Mark the header node and map nodes in use in the map record. + * + * NOTE: Assumes that the header node's map record has at least + * (num_map_nodes + 1) bits. + */ + bitmap = (u_int8_t *) buffer + offset; + temp = num_map_nodes + 1; /* +1 for the header node */ + while (temp >= 8) { + *(bitmap++) = 0xFF; + temp -= 8; + } + *bitmap = ~(0xFF >> temp); + offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec) - kBTreeHeaderUserBytes - (4 * sizeof(int16_t)); index[(nodesize / 2) - 4] = offset; @@ -695,21 +841,92 @@ hfs_create_attr_btree(struct hfsmount *hfsmp, uint32_t nodesize, uint32_t nodecn if (result) goto exit; - /* Publish new btree file */ + /* Create the map nodes: node numbers 1 .. num_map_nodes */ + for (node_num=1; node_num <= num_map_nodes; ++node_num) { + bp = buf_getblk(vp, node_num, nodesize, 0, 0, BLK_META); + if (bp == NULL) { + result = EIO; + goto exit; + } + buffer = (void *)buf_dataptr(bp); + blkdesc.buffer = buffer; + blkdesc.blockHeader = (void *)bp; + blkdesc.blockReadFromDisk = 0; + blkdesc.isModified = 0; + + ModifyBlockStart(vp, &blkdesc); + + bzero(buffer, nodesize); + index = (u_int16_t *)buffer; + + /* Fill in the node descriptor */ + ndp = (BTNodeDescriptor *)buffer; + if (node_num != num_map_nodes) + ndp->fLink = node_num + 1; + ndp->kind = kBTMapNode; + ndp->numRecords = 1; + offset = sizeof(BTNodeDescriptor); + index[(nodesize / 2) - 1] = offset; + + + /* Fill in the map record's offset */ + /* Note: We assume that the map record is all zeroes */ + offset = sizeof(BTNodeDescriptor) + bytes_per_map_record; + index[(nodesize / 2) - 2] = offset; + + if (hfsmp->jnl) { + result = btree_journal_modify_block_end(hfsmp, bp); + } else { + result = VNOP_BWRITE(bp); + } + if (result) + goto exit; + } + + /* Update vp/cp for attribute btree */ + hfs_lock_mount (hfsmp); + hfsmp->hfs_attribute_cp = VTOC(vp); hfsmp->hfs_attribute_vp = vp; + hfs_unlock_mount (hfsmp); + (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + if (intrans) { + hfs_end_transaction(hfsmp); + intrans = 0; + } + + /* Initialize the vnode for virtual attribute data file */ + result = init_attrdata_vnode(hfsmp); + if (result) { + printf("hfs_create_attr_btree: vol=%s init_attrdata_vnode() error=%d\n", hfsmp->vcbVN, result); + } + exit: - hfs_unlock(VTOC(vp)); + if (vp) { + hfs_unlock(VTOC(vp)); + } if (result) { if (btcb) { FREE (btcb, M_TEMP); } - vnode_put(vp); - // hfs_truncate(); /* XXX need to give back blocks */ + if (vp) { + vnode_put(vp); + } + /* XXX need to give back blocks ? */ + } + if (intrans) { + hfs_end_transaction(hfsmp); } - return (result); -} + /* + * All done, clear HFS_CREATING_BTREE, and wake up any sleepers. + */ + hfs_lock_mount (hfsmp); + hfsmp->hfs_flags &= ~HFS_CREATING_BTREE; + wakeup((caddr_t)&hfsmp->hfs_attribute_cp); + hfs_unlock_mount (hfsmp); + return (result); +}