X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/2d21ac55c334faf3a56e5634905ed6987fc787d4..c18c124eaa464aaaa5549e99e5a70fc9cbb50944:/bsd/hfs/hfs_btreeio.c?ds=inline diff --git a/bsd/hfs/hfs_btreeio.c b/bsd/hfs/hfs_btreeio.c index 0c81c879f..fefc36ad3 100644 --- a/bsd/hfs/hfs_btreeio.c +++ b/bsd/hfs/hfs_btreeio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2011 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -50,11 +50,33 @@ /* From bsd/vfs/vfs_bio.c */ extern int bdwrite_internal(struct buf *, int); -static int ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount); +static int ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount); static int btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp); +void btree_swap_node(struct buf *bp, __unused void *arg); + +/* + * Return btree node size for given vnode. + * + * Returns: + * For btree vnode, returns btree node size. + * For non-btree vnodes, returns 0. + */ +u_int16_t get_btree_nodesize(struct vnode *vp) +{ + BTreeControlBlockPtr btree; + u_int16_t node_size = 0; + + if (vnode_issystem(vp)) { + btree = (BTreeControlBlockPtr) VTOF(vp)->fcbBTCBPtr; + if (btree) { + node_size = btree->nodeSize; + } + } + + return node_size; +} -__private_extern__ OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, __unused ItemCount minBlockCount) { BTreeControlBlockPtr bTreePtr; @@ -71,11 +93,20 @@ OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, __unused ItemC } -__private_extern__ OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions options, BlockDescriptor *block) { OSStatus retval = E_NONE; struct buf *bp = NULL; + u_int8_t allow_empty_node; + + /* If the btree block is being read using hint, it is + * fine for the swap code to find zeroed out nodes. + */ + if (options & kGetBlockHint) { + allow_empty_node = true; + } else { + allow_empty_node = false; + } if (options & kGetEmptyBlock) { daddr64_t blkno; @@ -102,47 +133,72 @@ OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions opt // XXXdbg block->isModified = 0; - /* Check and endian swap B-Tree node (only if it's a valid block) */ - if (!(options & kGetEmptyBlock)) { - /* This happens when we first open the b-tree, we might not have all the node data on hand */ - if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) && - (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) && - (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) { + /* Check and endian swap B-Tree node (only if it's a valid block) */ + if (!(options & kGetEmptyBlock)) { + + /* This happens when we first open the b-tree, we might not have all the node data on hand */ + if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) && + (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) && + (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) { - /* - * Don't swap the node descriptor, record offsets, or other records. - * This record will be invalidated and re-read with the correct node - * size once the B-tree control block is set up with the node size - * from the header record. - */ - retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly); - - } else if (block->blockReadFromDisk) { - /* - * The node was just read from disk, so always swap/check it. - * This is necessary on big endian since the test below won't trigger. - */ - retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost); - } else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) { /* - * The node was left in the cache in non-native order, so swap it. - * This only happens on little endian, after the node is written - * back to disk. + * Don't swap the node descriptor, record offsets, or other records. + * This record will be invalidated and re-read with the correct node + * size once the B-tree control block is set up with the node size + * from the header record. */ - retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost); - } - - /* - * If we got an error, then the node is only partially swapped. - * We mark the buffer invalid so that the next attempt to get the - * node will read it and attempt to swap again, and will notice - * the error again. If we didn't do this, the next attempt to get - * the node might use the partially swapped node as-is. - */ - if (retval) + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly, allow_empty_node); + + } else { + /* + * In this case, we have enough data in-hand to do basic validation + * on the B-Tree node. + */ + if (block->blockReadFromDisk) { + /* + * The node was just read from disk, so always swap/check it. + * This is necessary on big endian since the test below won't trigger. + */ + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node); + } + else { + /* + * Block wasn't read from disk; it was found in the cache. + */ + if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) { + /* + * The node was left in the cache in non-native order, so swap it. + * This only happens on little endian, after the node is written + * back to disk. + */ + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node); + } + else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x000e) { + /* + * The node was in-cache in native-endianness. We don't need to do + * anything here, because the node is ready to use. Set retval == 0. + */ + retval = 0; + } + /* + * If the node doesn't have hex 14 (0xe) in the last two bytes of the buffer, + * it doesn't necessarily mean that this is a bad node. Zeroed nodes that are + * marked as unused in the b-tree map node would be OK and not have valid content. + */ + } + } + + /* + * If we got an error, then the node is only partially swapped. + * We mark the buffer invalid so that the next attempt to get the + * node will read it and attempt to swap again, and will notice + * the error again. If we didn't do this, the next attempt to get + * the node might use the partially swapped node as-is. + */ + if (retval) buf_markinvalid(bp); - } - } + } + } if (retval) { if (bp) @@ -155,7 +211,6 @@ OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions opt } -__private_extern__ void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr) { struct hfsmount *hfsmp = VTOHFS(vp); @@ -167,7 +222,7 @@ void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr) bp = (struct buf *) blockPtr->blockHeader; if (bp == NULL) { - panic("ModifyBlockStart: null bp for blockdescptr %p?!?\n", blockPtr); + panic("hfs: ModifyBlockStart: null bp for blockdescptr %p?!?\n", blockPtr); return; } @@ -175,7 +230,7 @@ void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr) blockPtr->isModified = 1; } -static void +void btree_swap_node(struct buf *bp, __unused void *arg) { // struct hfsmount *hfsmp = (struct hfsmount *)arg; @@ -191,10 +246,13 @@ btree_swap_node(struct buf *bp, __unused void *arg) block.blockReadFromDisk = (buf_fromcache(bp) == 0); block.blockSize = buf_count(bp); - // swap the data now that this node is ready to go to disk - retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig); + /* Swap the data now that this node is ready to go to disk. + * We allow swapping of zeroed out nodes here because we might + * be writing node whose last record just got deleted. + */ + retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, true); if (retval) - panic("btree_swap_node: about to write corrupt node!\n"); + panic("hfs: btree_swap_node: about to write corrupt node!\n"); } @@ -205,7 +263,6 @@ btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp) } -__private_extern__ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options) { struct hfsmount *hfsmp = VTOHFS(vp); @@ -227,6 +284,10 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock } else { buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */ } + + /* Don't let anyone else try to use this bp, it's been consumed */ + blockPtr->blockHeader = NULL; + } else { if (options & kForceWriteBlock) { if (hfsmp->jnl) { @@ -239,6 +300,10 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock } else { retval = VNOP_BWRITE(bp); } + + /* Don't let anyone else try to use this bp, it's been consumed */ + blockPtr->blockHeader = NULL; + } else if (options & kMarkBlockDirty) { struct timeval tv; microuptime(&tv); @@ -279,6 +344,10 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock buf_clearflags(bp, B_LOCKED); buf_bawrite(bp); } + + /* Don't let anyone else try to use this bp, it's been consumed */ + blockPtr->blockHeader = NULL; + } else { // check if we had previously called journal_modify_block_start() // on this block and if so, abort it (which will call buf_brelse()). @@ -295,15 +364,17 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock } else { buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */ } - }; - }; + + /* Don't let anyone else try to use this bp, it's been consumed */ + blockPtr->blockHeader = NULL; + } + } exit: return (retval); } -__private_extern__ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) { #pragma unused (maxEOF) @@ -439,7 +510,7 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) trim = ((filePtr->fcbEOF - origSize) % btInfo.nodeSize); } - ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0); + ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0, 0, FTOC(filePtr)->c_fileid, 0); filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize; // XXXdbg - panic if the file didn't get trimmed back properly @@ -451,7 +522,7 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) if (ret) { // XXXdbg - this probably doesn't need to be a panic() panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %ld)\n", - filePtr->fcbEOF, trim, ret); + filePtr->fcbEOF, trim, (long)ret); goto out; } } @@ -503,7 +574,7 @@ out: * Clear out (zero) new b-tree nodes on disk. */ static int -ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount) +ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount) { struct hfsmount *hfsmp = VTOHFS(vp); struct buf *bp = NULL; @@ -575,18 +646,24 @@ hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t node BTreeControlBlockPtr btcb = NULL; struct buf *bp = NULL; void * buffer; + u_int8_t *bitmap; u_int16_t *index; + u_int32_t node_num, num_map_nodes; + u_int32_t bytes_per_map_record; + u_int32_t temp; u_int16_t offset; int intrans = 0; int result; + int newvnode_flags = 0; + again: /* * Serialize creation using HFS_CREATING_BTREE flag. */ - lck_mtx_lock(&hfsmp->hfs_mutex); + hfs_lock_mount (hfsmp); if (hfsmp->hfs_flags & HFS_CREATING_BTREE) { /* Someone else beat us, wait for them to finish. */ - (void) msleep(hfsmp->hfs_attribute_cp, &hfsmp->hfs_mutex, + (void) msleep(&hfsmp->hfs_attribute_cp, &hfsmp->hfs_mutex, PDROP | PINOD, "hfs_create_attr_btree", 0); if (hfsmp->hfs_attribute_vp) { return (0); @@ -594,7 +671,7 @@ again: goto again; } hfsmp->hfs_flags |= HFS_CREATING_BTREE; - lck_mtx_unlock(&hfsmp->hfs_mutex); + hfs_unlock_mount (hfsmp); /* Check if were out of usable disk space. */ if ((hfs_freeblks(hfsmp, 1) == 0)) { @@ -622,7 +699,8 @@ again: bzero(&cfork, sizeof(cfork)); cfork.cf_clump = nodesize * nodecnt; - result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, &vp); + result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, + &cfork, &vp, &newvnode_flags); if (result) { goto exit; } @@ -661,8 +739,34 @@ again: goto exit; btcb->totalNodes = VTOF(vp)->ff_size / nodesize; - btcb->freeNodes = btcb->totalNodes - 1; + /* + * Figure out how many map nodes we'll need. + * + * bytes_per_map_record = the number of bytes in the map record of a + * map node. Since that is the only record in the node, it is the size + * of the node minus the node descriptor at the start, and two record + * offsets at the end of the node. The "- 2" is to round the size down + * to a multiple of 4 bytes (since sizeof(BTNodeDescriptor) is not a + * multiple of 4). + * + * The value "temp" here is the number of *bits* in the map record of + * the header node. + */ + bytes_per_map_record = nodesize - sizeof(BTNodeDescriptor) - 2*sizeof(u_int16_t) - 2; + temp = 8 * (nodesize - sizeof(BTNodeDescriptor) + - sizeof(BTHeaderRec) + - kBTreeHeaderUserBytes + - 4 * sizeof(u_int16_t)); + if (btcb->totalNodes > temp) { + num_map_nodes = howmany(btcb->totalNodes - temp, bytes_per_map_record * 8); + } + else { + num_map_nodes = 0; + } + + btcb->freeNodes = btcb->totalNodes - 1 - num_map_nodes; + /* * Initialize the b-tree header on disk */ @@ -688,6 +792,8 @@ again: /* FILL IN THE NODE DESCRIPTOR: */ ndp = (BTNodeDescriptor *)buffer; + if (num_map_nodes != 0) + ndp->fLink = 1; ndp->kind = kBTHeaderNode; ndp->numRecords = 3; offset = sizeof(BTNodeDescriptor); @@ -710,8 +816,19 @@ again: offset += kBTreeHeaderUserBytes; index[(nodesize / 2) - 3] = offset; - /* FILL IN THE MAP RECORD (only one node in use). */ - *((u_int8_t *)buffer + offset) = 0x80; + /* Mark the header node and map nodes in use in the map record. + * + * NOTE: Assumes that the header node's map record has at least + * (num_map_nodes + 1) bits. + */ + bitmap = (u_int8_t *) buffer + offset; + temp = num_map_nodes + 1; /* +1 for the header node */ + while (temp >= 8) { + *(bitmap++) = 0xFF; + temp -= 8; + } + *bitmap = ~(0xFF >> temp); + offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec) - kBTreeHeaderUserBytes - (4 * sizeof(int16_t)); index[(nodesize / 2) - 4] = offset; @@ -724,13 +841,67 @@ again: if (result) goto exit; + /* Create the map nodes: node numbers 1 .. num_map_nodes */ + for (node_num=1; node_num <= num_map_nodes; ++node_num) { + bp = buf_getblk(vp, node_num, nodesize, 0, 0, BLK_META); + if (bp == NULL) { + result = EIO; + goto exit; + } + buffer = (void *)buf_dataptr(bp); + blkdesc.buffer = buffer; + blkdesc.blockHeader = (void *)bp; + blkdesc.blockReadFromDisk = 0; + blkdesc.isModified = 0; + + ModifyBlockStart(vp, &blkdesc); + + bzero(buffer, nodesize); + index = (u_int16_t *)buffer; + + /* Fill in the node descriptor */ + ndp = (BTNodeDescriptor *)buffer; + if (node_num != num_map_nodes) + ndp->fLink = node_num + 1; + ndp->kind = kBTMapNode; + ndp->numRecords = 1; + offset = sizeof(BTNodeDescriptor); + index[(nodesize / 2) - 1] = offset; + + + /* Fill in the map record's offset */ + /* Note: We assume that the map record is all zeroes */ + offset = sizeof(BTNodeDescriptor) + bytes_per_map_record; + index[(nodesize / 2) - 2] = offset; + + if (hfsmp->jnl) { + result = btree_journal_modify_block_end(hfsmp, bp); + } else { + result = VNOP_BWRITE(bp); + } + if (result) + goto exit; + } + /* Update vp/cp for attribute btree */ - lck_mtx_lock(&hfsmp->hfs_mutex); + hfs_lock_mount (hfsmp); hfsmp->hfs_attribute_cp = VTOC(vp); hfsmp->hfs_attribute_vp = vp; - lck_mtx_unlock(&hfsmp->hfs_mutex); + hfs_unlock_mount (hfsmp); (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + + if (intrans) { + hfs_end_transaction(hfsmp); + intrans = 0; + } + + /* Initialize the vnode for virtual attribute data file */ + result = init_attrdata_vnode(hfsmp); + if (result) { + printf("hfs_create_attr_btree: vol=%s init_attrdata_vnode() error=%d\n", hfsmp->vcbVN, result); + } + exit: if (vp) { hfs_unlock(VTOC(vp)); @@ -751,10 +922,10 @@ exit: /* * All done, clear HFS_CREATING_BTREE, and wake up any sleepers. */ - lck_mtx_lock(&hfsmp->hfs_mutex); + hfs_lock_mount (hfsmp); hfsmp->hfs_flags &= ~HFS_CREATING_BTREE; - wakeup((caddr_t)hfsmp->hfs_attribute_cp); - lck_mtx_unlock(&hfsmp->hfs_mutex); + wakeup((caddr_t)&hfsmp->hfs_attribute_cp); + hfs_unlock_mount (hfsmp); return (result); }