/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
/* From bsd/vfs/vfs_bio.c */
extern int bdwrite_internal(struct buf *, int);
-static int ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount);
+static int ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount);
static int btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp);
+void btree_swap_node(struct buf *bp, __unused void *arg);
+
+/*
+ * Return btree node size for given vnode.
+ *
+ * Returns:
+ * For btree vnode, returns btree node size.
+ * For non-btree vnodes, returns 0.
+ */
+u_int16_t get_btree_nodesize(struct vnode *vp)
+{
+ BTreeControlBlockPtr btree;
+ u_int16_t node_size = 0;
+
+ if (vnode_issystem(vp)) {
+ btree = (BTreeControlBlockPtr) VTOF(vp)->fcbBTCBPtr;
+ if (btree) {
+ node_size = btree->nodeSize;
+ }
+ }
+
+ return node_size;
+}
-__private_extern__
OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, __unused ItemCount minBlockCount)
{
BTreeControlBlockPtr bTreePtr;
}
-__private_extern__
OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions options, BlockDescriptor *block)
{
OSStatus retval = E_NONE;
struct buf *bp = NULL;
+ u_int8_t allow_empty_node;
+
+ /* If the btree block is being read using hint, it is
+ * fine for the swap code to find zeroed out nodes.
+ */
+ if (options & kGetBlockHint) {
+ allow_empty_node = true;
+ } else {
+ allow_empty_node = false;
+ }
if (options & kGetEmptyBlock) {
daddr64_t blkno;
// XXXdbg
block->isModified = 0;
- /* Check and endian swap B-Tree node (only if it's a valid block) */
- if (!(options & kGetEmptyBlock)) {
- /* This happens when we first open the b-tree, we might not have all the node data on hand */
- if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) &&
- (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) &&
- (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) {
+ /* Check and endian swap B-Tree node (only if it's a valid block) */
+ if (!(options & kGetEmptyBlock)) {
+
+ /* This happens when we first open the b-tree, we might not have all the node data on hand */
+ if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) &&
+ (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) &&
+ (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) {
- /*
- * Don't swap the node descriptor, record offsets, or other records.
- * This record will be invalidated and re-read with the correct node
- * size once the B-tree control block is set up with the node size
- * from the header record.
- */
- retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly);
-
- } else if (block->blockReadFromDisk) {
- /*
- * The node was just read from disk, so always swap/check it.
- * This is necessary on big endian since the test below won't trigger.
- */
- retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost);
- } else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) {
/*
- * The node was left in the cache in non-native order, so swap it.
- * This only happens on little endian, after the node is written
- * back to disk.
+ * Don't swap the node descriptor, record offsets, or other records.
+ * This record will be invalidated and re-read with the correct node
+ * size once the B-tree control block is set up with the node size
+ * from the header record.
*/
- retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost);
- }
-
- /*
- * If we got an error, then the node is only partially swapped.
- * We mark the buffer invalid so that the next attempt to get the
- * node will read it and attempt to swap again, and will notice
- * the error again. If we didn't do this, the next attempt to get
- * the node might use the partially swapped node as-is.
- */
- if (retval)
+ retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly, allow_empty_node);
+
+ } else {
+ /*
+ * In this case, we have enough data in-hand to do basic validation
+ * on the B-Tree node.
+ */
+ if (block->blockReadFromDisk) {
+ /*
+ * The node was just read from disk, so always swap/check it.
+ * This is necessary on big endian since the test below won't trigger.
+ */
+ retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node);
+ }
+ else {
+ /*
+ * Block wasn't read from disk; it was found in the cache.
+ */
+ if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) {
+ /*
+ * The node was left in the cache in non-native order, so swap it.
+ * This only happens on little endian, after the node is written
+ * back to disk.
+ */
+ retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node);
+ }
+ else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x000e) {
+ /*
+ * The node was in-cache in native-endianness. We don't need to do
+ * anything here, because the node is ready to use. Set retval == 0.
+ */
+ retval = 0;
+ }
+ /*
+ * If the node doesn't have hex 14 (0xe) in the last two bytes of the buffer,
+ * it doesn't necessarily mean that this is a bad node. Zeroed nodes that are
+ * marked as unused in the b-tree map node would be OK and not have valid content.
+ */
+ }
+ }
+
+ /*
+ * If we got an error, then the node is only partially swapped.
+ * We mark the buffer invalid so that the next attempt to get the
+ * node will read it and attempt to swap again, and will notice
+ * the error again. If we didn't do this, the next attempt to get
+ * the node might use the partially swapped node as-is.
+ */
+ if (retval)
buf_markinvalid(bp);
- }
- }
+ }
+ }
if (retval) {
if (bp)
}
-__private_extern__
void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr)
{
struct hfsmount *hfsmp = VTOHFS(vp);
bp = (struct buf *) blockPtr->blockHeader;
if (bp == NULL) {
- panic("ModifyBlockStart: null bp for blockdescptr %p?!?\n", blockPtr);
+ panic("hfs: ModifyBlockStart: null bp for blockdescptr %p?!?\n", blockPtr);
return;
}
blockPtr->isModified = 1;
}
-static void
+void
btree_swap_node(struct buf *bp, __unused void *arg)
{
// struct hfsmount *hfsmp = (struct hfsmount *)arg;
block.blockReadFromDisk = (buf_fromcache(bp) == 0);
block.blockSize = buf_count(bp);
- // swap the data now that this node is ready to go to disk
- retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
+ /* Swap the data now that this node is ready to go to disk.
+ * We allow swapping of zeroed out nodes here because we might
+ * be writing node whose last record just got deleted.
+ */
+ retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, true);
if (retval)
- panic("btree_swap_node: about to write corrupt node!\n");
+ panic("hfs: btree_swap_node: about to write corrupt node!\n");
}
}
-__private_extern__
OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options)
{
struct hfsmount *hfsmp = VTOHFS(vp);
} else {
buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
}
+
+ /* Don't let anyone else try to use this bp, it's been consumed */
+ blockPtr->blockHeader = NULL;
+
} else {
if (options & kForceWriteBlock) {
if (hfsmp->jnl) {
} else {
retval = VNOP_BWRITE(bp);
}
+
+ /* Don't let anyone else try to use this bp, it's been consumed */
+ blockPtr->blockHeader = NULL;
+
} else if (options & kMarkBlockDirty) {
struct timeval tv;
microuptime(&tv);
buf_clearflags(bp, B_LOCKED);
buf_bawrite(bp);
}
+
+ /* Don't let anyone else try to use this bp, it's been consumed */
+ blockPtr->blockHeader = NULL;
+
} else {
// check if we had previously called journal_modify_block_start()
// on this block and if so, abort it (which will call buf_brelse()).
} else {
buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
}
- };
- };
+
+ /* Don't let anyone else try to use this bp, it's been consumed */
+ blockPtr->blockHeader = NULL;
+ }
+ }
exit:
return (retval);
}
-__private_extern__
OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF)
{
#pragma unused (maxEOF)
trim = ((filePtr->fcbEOF - origSize) % btInfo.nodeSize);
}
- ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0);
+ ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0, 0, FTOC(filePtr)->c_fileid, 0);
filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
// XXXdbg - panic if the file didn't get trimmed back properly
if (ret) {
// XXXdbg - this probably doesn't need to be a panic()
panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %ld)\n",
- filePtr->fcbEOF, trim, ret);
+ filePtr->fcbEOF, trim, (long)ret);
goto out;
}
}
* Clear out (zero) new b-tree nodes on disk.
*/
static int
-ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount)
+ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount)
{
struct hfsmount *hfsmp = VTOHFS(vp);
struct buf *bp = NULL;
BTreeControlBlockPtr btcb = NULL;
struct buf *bp = NULL;
void * buffer;
+ u_int8_t *bitmap;
u_int16_t *index;
+ u_int32_t node_num, num_map_nodes;
+ u_int32_t bytes_per_map_record;
+ u_int32_t temp;
u_int16_t offset;
int intrans = 0;
int result;
+ int newvnode_flags = 0;
+
again:
/*
* Serialize creation using HFS_CREATING_BTREE flag.
*/
- lck_mtx_lock(&hfsmp->hfs_mutex);
+ hfs_lock_mount (hfsmp);
if (hfsmp->hfs_flags & HFS_CREATING_BTREE) {
/* Someone else beat us, wait for them to finish. */
- (void) msleep(hfsmp->hfs_attribute_cp, &hfsmp->hfs_mutex,
+ (void) msleep(&hfsmp->hfs_attribute_cp, &hfsmp->hfs_mutex,
PDROP | PINOD, "hfs_create_attr_btree", 0);
if (hfsmp->hfs_attribute_vp) {
return (0);
goto again;
}
hfsmp->hfs_flags |= HFS_CREATING_BTREE;
- lck_mtx_unlock(&hfsmp->hfs_mutex);
+ hfs_unlock_mount (hfsmp);
/* Check if were out of usable disk space. */
if ((hfs_freeblks(hfsmp, 1) == 0)) {
bzero(&cfork, sizeof(cfork));
cfork.cf_clump = nodesize * nodecnt;
- result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, &vp);
+ result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
+ &cfork, &vp, &newvnode_flags);
if (result) {
goto exit;
}
goto exit;
btcb->totalNodes = VTOF(vp)->ff_size / nodesize;
- btcb->freeNodes = btcb->totalNodes - 1;
+ /*
+ * Figure out how many map nodes we'll need.
+ *
+ * bytes_per_map_record = the number of bytes in the map record of a
+ * map node. Since that is the only record in the node, it is the size
+ * of the node minus the node descriptor at the start, and two record
+ * offsets at the end of the node. The "- 2" is to round the size down
+ * to a multiple of 4 bytes (since sizeof(BTNodeDescriptor) is not a
+ * multiple of 4).
+ *
+ * The value "temp" here is the number of *bits* in the map record of
+ * the header node.
+ */
+ bytes_per_map_record = nodesize - sizeof(BTNodeDescriptor) - 2*sizeof(u_int16_t) - 2;
+ temp = 8 * (nodesize - sizeof(BTNodeDescriptor)
+ - sizeof(BTHeaderRec)
+ - kBTreeHeaderUserBytes
+ - 4 * sizeof(u_int16_t));
+ if (btcb->totalNodes > temp) {
+ num_map_nodes = howmany(btcb->totalNodes - temp, bytes_per_map_record * 8);
+ }
+ else {
+ num_map_nodes = 0;
+ }
+
+ btcb->freeNodes = btcb->totalNodes - 1 - num_map_nodes;
+
/*
* Initialize the b-tree header on disk
*/
/* FILL IN THE NODE DESCRIPTOR: */
ndp = (BTNodeDescriptor *)buffer;
+ if (num_map_nodes != 0)
+ ndp->fLink = 1;
ndp->kind = kBTHeaderNode;
ndp->numRecords = 3;
offset = sizeof(BTNodeDescriptor);
offset += kBTreeHeaderUserBytes;
index[(nodesize / 2) - 3] = offset;
- /* FILL IN THE MAP RECORD (only one node in use). */
- *((u_int8_t *)buffer + offset) = 0x80;
+ /* Mark the header node and map nodes in use in the map record.
+ *
+ * NOTE: Assumes that the header node's map record has at least
+ * (num_map_nodes + 1) bits.
+ */
+ bitmap = (u_int8_t *) buffer + offset;
+ temp = num_map_nodes + 1; /* +1 for the header node */
+ while (temp >= 8) {
+ *(bitmap++) = 0xFF;
+ temp -= 8;
+ }
+ *bitmap = ~(0xFF >> temp);
+
offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec)
- kBTreeHeaderUserBytes - (4 * sizeof(int16_t));
index[(nodesize / 2) - 4] = offset;
if (result)
goto exit;
+ /* Create the map nodes: node numbers 1 .. num_map_nodes */
+ for (node_num=1; node_num <= num_map_nodes; ++node_num) {
+ bp = buf_getblk(vp, node_num, nodesize, 0, 0, BLK_META);
+ if (bp == NULL) {
+ result = EIO;
+ goto exit;
+ }
+ buffer = (void *)buf_dataptr(bp);
+ blkdesc.buffer = buffer;
+ blkdesc.blockHeader = (void *)bp;
+ blkdesc.blockReadFromDisk = 0;
+ blkdesc.isModified = 0;
+
+ ModifyBlockStart(vp, &blkdesc);
+
+ bzero(buffer, nodesize);
+ index = (u_int16_t *)buffer;
+
+ /* Fill in the node descriptor */
+ ndp = (BTNodeDescriptor *)buffer;
+ if (node_num != num_map_nodes)
+ ndp->fLink = node_num + 1;
+ ndp->kind = kBTMapNode;
+ ndp->numRecords = 1;
+ offset = sizeof(BTNodeDescriptor);
+ index[(nodesize / 2) - 1] = offset;
+
+
+ /* Fill in the map record's offset */
+ /* Note: We assume that the map record is all zeroes */
+ offset = sizeof(BTNodeDescriptor) + bytes_per_map_record;
+ index[(nodesize / 2) - 2] = offset;
+
+ if (hfsmp->jnl) {
+ result = btree_journal_modify_block_end(hfsmp, bp);
+ } else {
+ result = VNOP_BWRITE(bp);
+ }
+ if (result)
+ goto exit;
+ }
+
/* Update vp/cp for attribute btree */
- lck_mtx_lock(&hfsmp->hfs_mutex);
+ hfs_lock_mount (hfsmp);
hfsmp->hfs_attribute_cp = VTOC(vp);
hfsmp->hfs_attribute_vp = vp;
- lck_mtx_unlock(&hfsmp->hfs_mutex);
+ hfs_unlock_mount (hfsmp);
(void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+
+ if (intrans) {
+ hfs_end_transaction(hfsmp);
+ intrans = 0;
+ }
+
+ /* Initialize the vnode for virtual attribute data file */
+ result = init_attrdata_vnode(hfsmp);
+ if (result) {
+ printf("hfs_create_attr_btree: vol=%s init_attrdata_vnode() error=%d\n", hfsmp->vcbVN, result);
+ }
+
exit:
if (vp) {
hfs_unlock(VTOC(vp));
/*
* All done, clear HFS_CREATING_BTREE, and wake up any sleepers.
*/
- lck_mtx_lock(&hfsmp->hfs_mutex);
+ hfs_lock_mount (hfsmp);
hfsmp->hfs_flags &= ~HFS_CREATING_BTREE;
- wakeup((caddr_t)hfsmp->hfs_attribute_cp);
- lck_mtx_unlock(&hfsmp->hfs_mutex);
+ wakeup((caddr_t)&hfsmp->hfs_attribute_cp);
+ hfs_unlock_mount (hfsmp);
return (result);
}