/*
- * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
*
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License"). You may not use this file except in compliance with the
- * License. Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
*
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
*
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
File: VolumeAllocation.c
Version: HFS Plus 1.0
- Copyright: © 1996-2001 by Apple Computer, Inc., all rights reserved.
+ Copyright: � 1996-2009 by Apple Computer, Inc., all rights reserved.
*/
/*
Public routines:
- BlockAllocate
+ BlockAllocate / hfs_block_alloc
Allocate space on a volume. Can allocate space contiguously.
If not contiguous, then allocation may be less than what was
asked for. Returns the starting block number, and number of
- blocks. (Will only do a single extent???)
+ blocks. It will only return a single extent.
+
BlockDeallocate
Deallocate a contiguous run of allocation blocks.
-
-
-Internal routines:
+
+ BlockMarkAllocated
+ Exported wrapper to mark blocks as in-use. This will correctly determine
+ whether or not the red-black tree is enabled and call the appropriate function
+ if applicable.
BlockMarkFree
+ Exported wrapper to mark blocks as freed. This will correctly determine whether or
+ not the red-black tree is enabled and call the appropriate function if applicable.
+
+
+ ResetVCBFreeExtCache
+ Since the red-black tree obviates the need to maintain the free extent cache, we do
+ not update it if the tree is also live. As a result, if we ever need to destroy the trees
+ we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
+ bitmap scanning allocator.
+ We also reset and disable the free extent cache when volume resizing is
+ in flight.
+
+ UpdateAllocLimit
+ Adjusts the AllocLimit field in the hfs mount point. This is used when we need to prevent
+ allocations from occupying space in the region we are modifying during a filesystem resize.
+ At other times, it should be consistent with the total number of allocation blocks in the
+ filesystem. It is also used to shrink or grow the number of blocks that the red-black tree should
+ know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
+ number of items in the tree that we can allocate from.
+
+ ScanUnmapBlocks
+ Traverse the entire allocation bitmap. Potentially issue DKIOCUNMAPs to the device as it
+ tracks unallocated ranges when iterating the volume bitmap. Additionally, build up the in-core
+ summary table of the allocation bitmap.
+
+Internal routines:
+ BlockMarkFreeInternal
Mark a contiguous range of blocks as free. The corresponding
- bits in the volume bitmap will be cleared.
- BlockMarkAllocated
+ bits in the volume bitmap will be cleared. This will actually do the work
+ of modifying the bitmap for us.
+
+ BlockMarkAllocatedInternal
Mark a contiguous range of blocks as allocated. The cor-
responding bits in the volume bitmap are set. Also tests to see
- if any of the blocks were previously unallocated.
- FindContiguous
+ if any of the blocks were previously unallocated.
+ BlockFindContiguous
Find a contiguous range of blocks of a given size. The caller
specifies where to begin the search (by block number). The
- block number of the first block in the range is returned.
- BlockAllocateAny
+ block number of the first block in the range is returned. This is only
+ called by the bitmap scanning logic as the red-black tree should be able
+ to do this internally by searching its tree.
+ BlockFindAny
Find and allocate a contiguous range of blocks up to a given size. The
first range of contiguous free blocks found are allocated, even if there
are fewer blocks than requested (and even if a contiguous range of blocks
of the given size exists elsewhere).
- BlockAllocateContig
- Find and allocate a contiguous range of blocks of a given size. If
- a contiguous range of free blocks of the given size isn't found, then
- the allocation fails (i.e. it is "all or nothing").
-
- BlockAllocateKnown
+ BlockFindAnyBitmap
+ Finds a range of blocks per the above requirements without using the
+ Allocation RB Tree. This relies on the bitmap-scanning logic in order to find
+ any valid range of free space needed.
+ BlockFindContig
+ Find a contiguous range of blocks of a given size.
+ If the minimum cannot be satisfied, nothing is
+ returned.
+ BlockFindKnown
Try to allocate space from known free space in the volume's
free extent cache.
-
ReadBitmapBlock
Given an allocation block number, read the bitmap block that
contains that allocation block into a caller-supplied buffer.
ReleaseBitmapBlock
Release a bitmap block back into the buffer cache.
+
+ ReadBitmapRange
+ Given an allocation block number, read a range of bitmap that
+ must begin at that allocation block into a caller supplied buffer.
+
+ ReleaseBitmapRange
+ Release and invalidate a buf_t corresponding to the bitmap
+ back into the UBC in order to prevent coherency issues.
+
+ remove_free_extent_cache
+ Remove an extent from the free extent cache. Handles overlaps
+ with multiple extents in the cache, and handles splitting an
+ extent in the cache if the extent to be removed is in the middle
+ of a cached extent.
+
+ add_free_extent_cache
+ Add an extent to the free extent cache. It will merge the
+ input extent with extents already in the cache.
+ CheckUnmappedBytes
+ Check whether or not the current transaction
+ has allocated blocks that were recently freed. This may have data safety implications.
+
+
+
+Debug/Test Routines
+ hfs_isallocated
+ Test to see if any blocks in a range are allocated. Journal or
+ allocation file lock must be held.
+
+ hfs_isallocated_scan
+ Test to see if any blocks in a range are allocated. Releases and
+ invalidates the block used when finished.
+
+Optimization Routines
+ hfs_alloc_scan_block
+ Given a starting allocation block number, figures out which physical block contains that
+ allocation block's bit, and scans it from the starting bit until either the ending bit or
+ the end of the block. Free space extents are inserted into the appropriate red-black tree.
+
*/
-#include "../../hfs_macos_defs.h"
#include <sys/types.h>
#include <sys/buf.h>
-#include <sys/systm.h>
+#if !HFS_ALLOC_TEST
+
+#include "../../hfs_macos_defs.h"
+#include <sys/systm.h>
+#include <sys/ubc.h>
+#include <kern/kalloc.h>
+/* For VM Page size */
+#include <libkern/libkern.h>
+#include <vfs/vfs_journal.h>
#include "../../hfs.h"
+#include "../../hfs_endian.h"
+#include "../headers/FileMgrInternal.h"
+
+#endif // !HFS_ALLOC_TEST
+
+#include <sys/sysctl.h>
+#include <sys/disk.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+
#include "../../hfs_dbg.h"
#include "../../hfs_format.h"
-#include "../../hfs_endian.h"
+#include "../../hfs_kdebug.h"
+#include "../../rangelist.h"
+#include "../../hfs_extents.h"
-#include "../headers/FileMgrInternal.h"
+/* Headers for unmap-on-mount support */
+#include <sys/disk.h>
+#ifndef CONFIG_HFS_TRIM
+#define CONFIG_HFS_TRIM 0
+#endif
+
+/*
+ * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
+ * KERNEL_DEBUG_CONSTANT events are enabled at runtime. (They're
+ * disabled by default because there can be a lot of these events,
+ * and we don't want to overwhelm the kernel debug buffer. If you
+ * want to watch these events in particular, just set the sysctl.)
+ */
+static int hfs_kdebug_allocation = 0;
+SYSCTL_DECL(_vfs_generic);
+SYSCTL_NODE(_vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system");
+SYSCTL_NODE(_vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug");
+SYSCTL_INT(_vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations");
+enum {
+ /*
+ * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
+ * BlockDeallocate, including the internal BlockAllocateXxx
+ * routines so we can see how an allocation was satisfied.
+ *
+ * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
+ * free extent cache.
+ *
+ * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
+ *
+ * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
+ * or clearing bits, scanning the bitmap).
+ */
+ HFSDBG_ALLOC_ENABLED = 1,
+ HFSDBG_EXT_CACHE_ENABLED = 2,
+ HFSDBG_UNMAP_ENABLED = 4,
+ HFSDBG_BITMAP_ENABLED = 8
+};
enum {
kBytesPerWord = 4,
#define kHighBitInWordMask 0x80000000ul
#define kAllBitsSetInWord 0xFFFFFFFFul
+#define HFS_MIN_SUMMARY_BLOCKSIZE 4096
+
+#define ALLOC_DEBUG 0
static OSErr ReadBitmapBlock(
- ExtendedVCB *vcb,
- UInt32 bit,
- UInt32 **buffer,
- UInt32 *blockRef);
+ ExtendedVCB *vcb,
+ u_int32_t bit,
+ u_int32_t **buffer,
+ uintptr_t *blockRef,
+ hfs_block_alloc_flags_t flags);
static OSErr ReleaseBitmapBlock(
- ExtendedVCB *vcb,
- UInt32 blockRef,
- Boolean dirty);
-
-static OSErr BlockAllocateAny(
- ExtendedVCB *vcb,
- UInt32 startingBlock,
- UInt32 endingBlock,
- UInt32 maxBlocks,
- Boolean useMetaZone,
- UInt32 *actualStartBlock,
- UInt32 *actualNumBlocks);
-
-static OSErr BlockAllocateContig(
- ExtendedVCB *vcb,
- UInt32 startingBlock,
- UInt32 minBlocks,
- UInt32 maxBlocks,
- Boolean useMetaZone,
- UInt32 *actualStartBlock,
- UInt32 *actualNumBlocks);
+ ExtendedVCB *vcb,
+ uintptr_t blockRef,
+ Boolean dirty);
+
+static OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
+ HFSPlusExtentDescriptor *extent,
+ hfs_block_alloc_flags_t flags,
+ hfs_alloc_extra_args_t *ap);
+
+static OSErr BlockFindAny(
+ ExtendedVCB *vcb,
+ u_int32_t startingBlock,
+ u_int32_t endingBlock,
+ u_int32_t maxBlocks,
+ hfs_block_alloc_flags_t flags,
+ Boolean trustSummary,
+ u_int32_t *actualStartBlock,
+ u_int32_t *actualNumBlocks);
+
+static OSErr BlockFindAnyBitmap(
+ ExtendedVCB *vcb,
+ u_int32_t startingBlock,
+ u_int32_t endingBlock,
+ u_int32_t maxBlocks,
+ hfs_block_alloc_flags_t flags,
+ u_int32_t *actualStartBlock,
+ u_int32_t *actualNumBlocks);
+
+static OSErr BlockFindContig(
+ ExtendedVCB *vcb,
+ u_int32_t startingBlock,
+ u_int32_t minBlocks,
+ u_int32_t maxBlocks,
+ hfs_block_alloc_flags_t flags,
+ u_int32_t *actualStartBlock,
+ u_int32_t *actualNumBlocks);
static OSErr BlockFindContiguous(
- ExtendedVCB *vcb,
- UInt32 startingBlock,
- UInt32 endingBlock,
- UInt32 minBlocks,
- UInt32 maxBlocks,
- Boolean useMetaZone,
- UInt32 *actualStartBlock,
- UInt32 *actualNumBlocks);
-
-static OSErr BlockAllocateKnown(
- ExtendedVCB *vcb,
- UInt32 maxBlocks,
- UInt32 *actualStartBlock,
- UInt32 *actualNumBlocks);
+ ExtendedVCB *vcb,
+ u_int32_t startingBlock,
+ u_int32_t endingBlock,
+ u_int32_t minBlocks,
+ u_int32_t maxBlocks,
+ Boolean useMetaZone,
+ Boolean trustSummary,
+ u_int32_t *actualStartBlock,
+ u_int32_t *actualNumBlocks,
+ hfs_block_alloc_flags_t flags);
+
+static OSErr BlockFindKnown(
+ ExtendedVCB *vcb,
+ u_int32_t maxBlocks,
+ u_int32_t *actualStartBlock,
+ u_int32_t *actualNumBlocks);
+
+static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
+ HFSPlusExtentDescriptor *extent,
+ uint32_t max_blocks,
+ hfs_block_alloc_flags_t flags);
+
+static OSErr BlockMarkAllocatedInternal (
+ ExtendedVCB *vcb,
+ u_int32_t startingBlock,
+ u_int32_t numBlocks,
+ hfs_block_alloc_flags_t flags);
+
+static OSErr BlockMarkFreeInternal(
+ ExtendedVCB *vcb,
+ u_int32_t startingBlock,
+ u_int32_t numBlocks,
+ Boolean do_validate);
+
+
+static OSErr ReadBitmapRange (struct hfsmount *hfsmp, uint32_t offset, uint32_t iosize,
+ uint32_t **buffer, struct buf **blockRef);
+
+static OSErr ReleaseScanBitmapRange( struct buf *bp );
+
+static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset,
+ u_int32_t numBlocks, struct jnl_trim_list *list);
+
+static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list);
+
+static int hfs_alloc_scan_range(struct hfsmount *hfsmp,
+ u_int32_t startbit,
+ u_int32_t *bitToScan,
+ struct jnl_trim_list *list);
+
+static int hfs_scan_range_size (struct hfsmount* hfsmp, uint32_t start, uint32_t *iosize);
+static uint32_t CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recent, uint32_t *next);
+
+/* Bitmap Re-use Detection */
+static inline int extents_overlap (uint32_t start1, uint32_t len1,
+ uint32_t start2, uint32_t len2) {
+ return !( ((start1 + len1) <= start2) || ((start2 + len2) <= start1) );
+}
+
+
+int hfs_isallocated_scan (struct hfsmount *hfsmp,
+ u_int32_t startingBlock,
+ u_int32_t *bp_buf);
+
+/* Summary Table Functions */
+static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse);
+static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t *index);
+static int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
+static int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t *alloc);
+static int hfs_release_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t length);
+static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t *freeblocks);
+static int hfs_rebuild_summary (struct hfsmount *hfsmp);
+
+#if 0
+static int hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
+#endif
+
+/* Used in external mount code to initialize the summary table */
+int hfs_init_summary (struct hfsmount *hfsmp);
+
+#if ALLOC_DEBUG
+void hfs_validate_summary (struct hfsmount *hfsmp);
+#endif
+
+
+/* Functions for manipulating free extent cache */
+static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
+static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
+static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
+
+static void hfs_release_reserved(hfsmount_t *hfsmp, struct rl_entry *range, int list);
+
+/* Functions for getting free exents */
+
+typedef struct bitmap_context {
+ void *bitmap; // current bitmap chunk
+ uint32_t run_offset; // offset (in bits) from start of bitmap to start of current run
+ uint32_t chunk_current; // next bit to scan in the chunk
+ uint32_t chunk_end; // number of valid bits in this chunk
+ struct hfsmount *hfsmp;
+ struct buf *bp;
+ uint32_t last_free_summary_bit; // last marked free summary bit
+ int lockflags;
+ uint64_t lock_start;
+} bitmap_context_t;
+
+static errno_t get_more_bits(bitmap_context_t *bitmap_ctx);
+static int bit_count_set(void *bitmap, int start, int end);
+static int bit_count_clr(void *bitmap, int start, int end);
+static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count);
+static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count);
+static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count);
+static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set);
+static int clzll(uint64_t x);
+#if ALLOC_DEBUG
/*
-;________________________________________________________________________________
-;
-; Routine: BlkAlloc
-;
-; Function: Allocate space on a volume. If contiguous allocation is requested,
-; at least the requested number of bytes will be allocated or an
-; error will be returned. If contiguous allocation is not forced,
-; the space will be allocated at the first free fragment following
-; the requested starting allocation block. If there is not enough
-; room there, a block of less than the requested size will be
-; allocated.
-;
-; If the requested starting block is 0 (for new file allocations),
-; the volume's allocation block pointer will be used as a starting
-; point.
-;
-; Input Arguments:
-; vcb - Pointer to ExtendedVCB for the volume to allocate space on
-; fcb - Pointer to FCB for the file for which storage is being allocated
-; startingBlock - Preferred starting allocation block, 0 = no preference
-; forceContiguous - Force contiguous flag - if bit 0 set (NE), allocation is contiguous
-; or an error is returned
-; useMetaZone -
-; minBlocks - Number of blocks requested. If the allocation is non-contiguous,
-; less than this may actually be allocated
-; maxBlocks - The maximum number of blocks to allocate. If there is additional free
-; space after bytesRequested, then up to maxBlocks bytes should really
-; be allocated. (Used by ExtendFileC to round up allocations to a multiple
-; of the file's clump size.)
-;
-; Output:
-; (result) - Error code, zero for successful allocation
-; *startBlock - Actual starting allocation block
-; *actualBlocks - Actual number of allocation blocks allocated
-;
-; Side effects:
-; The volume bitmap is read and updated; the volume bitmap cache may be changed.
-;________________________________________________________________________________
-*/
+ * Validation Routine to verify that the TRIM list maintained by the journal
+ * is in good shape relative to what we think the bitmap should have. We should
+ * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
+ * we panic.
+ */
+int trim_validate_bitmap (struct hfsmount *hfsmp);
+int trim_validate_bitmap (struct hfsmount *hfsmp) {
+ u_int64_t blockno_offset;
+ u_int64_t numblocks;
+ int i;
+ int count;
+ u_int32_t startblk;
+ u_int32_t blks;
+ int err = 0;
+ uint32_t alloccount = 0;
-__private_extern__
-OSErr BlockAllocate (
- ExtendedVCB *vcb, /* which volume to allocate space on */
- UInt32 startingBlock, /* preferred starting block, or 0 for no preference */
- UInt32 minBlocks, /* desired number of blocks to allocate */
- UInt32 maxBlocks, /* maximum number of blocks to allocate */
- Boolean forceContiguous, /* non-zero to force contiguous allocation and to force */
- /* minBlocks bytes to actually be allocated */
-
- Boolean useMetaZone,
- UInt32 *actualStartBlock, /* actual first block of allocation */
- UInt32 *actualNumBlocks) /* number of blocks actually allocated; if forceContiguous */
- /* was zero, then this may represent fewer than minBlocks */
+ if (hfsmp->jnl) {
+ struct journal *jnl = (struct journal*)hfsmp->jnl;
+ if (jnl->active_tr) {
+ struct jnl_trim_list *trim = &(jnl->active_tr->trim);
+ count = trim->extent_count;
+ for (i = 0; i < count; i++) {
+ blockno_offset = trim->extents[i].offset;
+ blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
+ blockno_offset = blockno_offset / hfsmp->blockSize;
+ numblocks = trim->extents[i].length / hfsmp->blockSize;
+
+ startblk = (u_int32_t)blockno_offset;
+ blks = (u_int32_t) numblocks;
+ err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);
+
+ if (err == 0 && alloccount != 0) {
+ panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+#endif
+
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine: hfs_unmap_free_extent
+ ;
+ ; Function: Make note of a range of allocation blocks that should be
+ ; unmapped (trimmed). That is, the given range of blocks no
+ ; longer have useful content, and the device can unmap the
+ ; previous contents. For example, a solid state disk may reuse
+ ; the underlying storage for other blocks.
+ ;
+ ; This routine is only supported for journaled volumes. The extent
+ ; being freed is passed to the journal code, and the extent will
+ ; be unmapped after the current transaction is written to disk.
+ ;
+ ; Input Arguments:
+ ; hfsmp - The volume containing the allocation blocks.
+ ; startingBlock - The first allocation block of the extent being freed.
+ ; numBlocks - The number of allocation blocks of the extent being freed.
+ ;________________________________________________________________________________
+ */
+static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
{
- UInt32 freeBlocks;
- OSErr err;
- Boolean updateAllocPtr = false; // true if nextAllocation needs to be updated
+ u_int64_t offset;
+ u_int64_t length;
+ u_int64_t device_sz;
+ int err = 0;
- //
- // Initialize outputs in case we get an error
- //
- *actualStartBlock = 0;
- *actualNumBlocks = 0;
- freeBlocks = hfs_freeblks(VCBTOHFS(vcb), 0);
-
- //
- // If the disk is already full, don't bother.
- //
- if (freeBlocks == 0) {
- err = dskFulErr;
- goto Exit;
- }
- if (forceContiguous && freeBlocks < minBlocks) {
- err = dskFulErr;
- goto Exit;
- }
- /*
- * Clip if necessary so we don't over-subscribe the free blocks.
- */
- if (minBlocks > freeBlocks) {
- minBlocks = freeBlocks;
+ if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
+
+ if (ALLOC_DEBUG) {
+ if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
+ panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks);
+ }
}
- if (maxBlocks > freeBlocks) {
- maxBlocks = freeBlocks;
+
+ if (hfsmp->jnl != NULL) {
+ device_sz = hfsmp->hfs_logical_bytes;
+ offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
+ length = (u_int64_t) numBlocks * hfsmp->blockSize;
+
+ /* Validate that the trim is in a valid range of bytes */
+ if ((offset >= device_sz) || ((offset + length) > device_sz)) {
+ printf("hfs_unmap_free_ext: ignoring trim vol=%s @ off %lld len %lld \n", hfsmp->vcbVN, offset, length);
+ err = EINVAL;
+ }
+
+ if (err == 0) {
+ err = journal_trim_add_extent(hfsmp->jnl, offset, length);
+ if (err) {
+ printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent for vol=%s", err, hfsmp->vcbVN);
+ }
+ }
}
- //
- // If caller didn't specify a starting block number, then use the volume's
- // next block to allocate from.
- //
- if (startingBlock == 0) {
- HFS_MOUNT_LOCK(vcb, TRUE);
- startingBlock = vcb->nextAllocation;
- HFS_MOUNT_UNLOCK(vcb, TRUE);
- updateAllocPtr = true;
+ if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0);
+}
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine: hfs_track_unmap_blocks
+ ;
+ ; Function: Make note of a range of allocation blocks that should be
+ ; unmapped (trimmed). That is, the given range of blocks no
+ ; longer have useful content, and the device can unmap the
+ ; previous contents. For example, a solid state disk may reuse
+ ; the underlying storage for other blocks.
+ ;
+ ; This routine is only supported for journaled volumes.
+ ;
+ ; *****NOTE*****:
+ ; This function should *NOT* be used when the volume is fully
+ ; mounted. This function is intended to support a bitmap iteration
+ ; at mount time to fully inform the SSD driver of the state of all blocks
+ ; at mount time, and assumes that there is no allocation/deallocation
+ ; interference during its iteration.,
+ ;
+ ; Input Arguments:
+ ; hfsmp - The volume containing the allocation blocks.
+ ; offset - The first allocation block of the extent being freed.
+ ; numBlocks - The number of allocation blocks of the extent being freed.
+ ; list - The list of currently tracked trim ranges.
+ ;________________________________________________________________________________
+ */
+static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start,
+ u_int32_t numBlocks, struct jnl_trim_list *list) {
+
+ u_int64_t offset;
+ u_int64_t length;
+ int error = 0;
+
+ if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL) && list->allocated_count && list->extents != NULL) {
+ int extent_no = list->extent_count;
+ offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
+ length = (u_int64_t) numBlocks * hfsmp->blockSize;
+
+
+ list->extents[extent_no].offset = offset;
+ list->extents[extent_no].length = length;
+ list->extent_count++;
+ if (list->extent_count == list->allocated_count) {
+ error = hfs_issue_unmap (hfsmp, list);
+ }
}
- if (startingBlock >= vcb->totalBlocks) {
- startingBlock = 0; /* overflow so start at beginning */
+
+ return error;
+}
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine: hfs_issue_unmap
+ ;
+ ; Function: Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list
+ ;
+ ; Input Arguments:
+ ; hfsmp - The volume containing the allocation blocks.
+ ; list - The list of currently tracked trim ranges.
+ ;________________________________________________________________________________
+ */
+
+static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list)
+{
+ dk_unmap_t unmap;
+ int error = 0;
+
+ if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+ KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
}
- //
- // If the request must be contiguous, then find a sequence of free blocks
- // that is long enough. Otherwise, find the first free block.
- //
- if (forceContiguous) {
- err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks,
- useMetaZone, actualStartBlock, actualNumBlocks);
- /*
- * If we allocated from a new position then
- * also update the roving allocatior.
- */
- if ((err == noErr) &&
- (*actualStartBlock > startingBlock) &&
- ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
- (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
- HFS_MOUNT_LOCK(vcb, TRUE);
- vcb->nextAllocation = *actualStartBlock;
- HFS_MOUNT_UNLOCK(vcb, TRUE);
+ if (list->extent_count > 0 && list->extents != NULL) {
+ bzero(&unmap, sizeof(unmap));
+ unmap.extents = list->extents;
+ unmap.extentsCount = list->extent_count;
+
+ if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+ KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_NONE, hfsmp->hfs_raw_dev, unmap.extentsCount, 0, 0, 0);
}
- } else {
- /*
- * Scan the bitmap once, gather the N largest free extents, then
- * allocate from these largest extents. Repeat as needed until
- * we get all the space we needed. We could probably build up
- * that list when the higher level caller tried (and failed) a
- * contiguous allocation first.
+
+#if CONFIG_PROTECT
+ /*
+ * If we have not yet completed the first scan through the bitmap, then
+ * optionally inform the block driver below us that this is an initialization
+ * TRIM scan, if it can deal with this information.
*/
- err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks);
- if (err == dskFulErr)
- err = BlockAllocateAny(vcb, startingBlock, vcb->totalBlocks,
- maxBlocks, useMetaZone, actualStartBlock,
- actualNumBlocks);
- if (err == dskFulErr)
- err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks,
- useMetaZone, actualStartBlock,
- actualNumBlocks);
+ if ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED) == 0) {
+ unmap.options |= _DK_UNMAP_INITIALIZE;
+ }
+#endif
+ /* Issue a TRIM and flush them out */
+ error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
+
+ bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t)));
+ bzero (&unmap, sizeof(unmap));
+ list->extent_count = 0;
}
-Exit:
- // if we actually allocated something then go update the
- // various bits of state that we maintain regardless of
- // whether there was an error (i.e. partial allocations
- // still need to update things like the free block count).
- //
- if (*actualNumBlocks != 0) {
- //
- // If we used the volume's roving allocation pointer, then we need to update it.
- // Adding in the length of the current allocation might reduce the next allocate
- // call by avoiding a re-scan of the already allocated space. However, the clump
- // just allocated can quite conceivably end up being truncated or released when
- // the file is closed or its EOF changed. Leaving the allocation pointer at the
- // start of the last allocation will avoid unnecessary fragmentation in this case.
- //
- HFS_MOUNT_LOCK(vcb, TRUE);
+ if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+ KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
+ }
- if (updateAllocPtr &&
- ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
- (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
- vcb->nextAllocation = *actualStartBlock;
- }
- //
- // Update the number of free blocks on the volume
- //
- vcb->freeBlocks -= *actualNumBlocks;
- MarkVCBDirty(vcb);
- HFS_MOUNT_UNLOCK(vcb, TRUE);
+ return error;
+}
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine: hfs_unmap_alloc_extent
+ ;
+ ; Function: Make note of a range of allocation blocks, some of
+ ; which may have previously been passed to hfs_unmap_free_extent,
+ ; is now in use on the volume. The given blocks will be removed
+ ; from any pending DKIOCUNMAP.
+ ;
+ ; Input Arguments:
+ ; hfsmp - The volume containing the allocation blocks.
+ ; startingBlock - The first allocation block of the extent being allocated.
+ ; numBlocks - The number of allocation blocks being allocated.
+ ;________________________________________________________________________________
+ */
+
+static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
+{
+ u_int64_t offset;
+ u_int64_t length;
+ int err = 0;
+
+ if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
- hfs_generate_volume_notifications(VCBTOHFS(vcb));
+ if (hfsmp->jnl != NULL) {
+ offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
+ length = (u_int64_t) numBlocks * hfsmp->blockSize;
+
+ err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
+ if (err) {
+ printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent for vol=%s", err, hfsmp->vcbVN);
+ }
}
-
- return err;
+
+ if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0);
}
/*
;________________________________________________________________________________
;
-; Routine: BlkDealloc
+; Routine: hfs_trim_callback
;
-; Function: Update the bitmap to deallocate a run of disk allocation blocks
+; Function: This function is called when a transaction that freed extents
+; (via hfs_unmap_free_extent/journal_trim_add_extent) has been
+; written to the on-disk journal. This routine will add those
+; extents to the free extent cache so that they can be reused.
;
-; Input Arguments:
-; vcb - Pointer to ExtendedVCB for the volume to free space on
-; firstBlock - First allocation block to be freed
-; numBlocks - Number of allocation blocks to free up (must be > 0!)
+; CAUTION: This routine is called while the journal's trim lock
+; is held shared, so that no other thread can reuse any portion
+; of those extents. We must be very careful about which locks
+; we take from within this callback, to avoid deadlock. The
+; call to add_free_extent_cache will end up taking the cache's
+; lock (just long enough to add these extents to the cache).
;
-; Output:
-; (result) - Result code
+; CAUTION: If the journal becomes invalid (eg., due to an I/O
+; error when trying to write to the journal), this callback
+; will stop getting called, even if extents got freed before
+; the journal became invalid!
;
-; Side effects:
-; The volume bitmap is read and updated; the volume bitmap cache may be changed.
+; Input Arguments:
+; arg - The hfsmount of the volume containing the extents.
+; extent_count - The number of extents freed in the transaction.
+; extents - An array of extents (byte ranges) that were freed.
;________________________________________________________________________________
*/
-__private_extern__
-OSErr BlockDeallocate (
- ExtendedVCB *vcb, // Which volume to deallocate space on
- UInt32 firstBlock, // First block in range to deallocate
- UInt32 numBlocks) // Number of contiguous blocks to deallocate
+__private_extern__ void
+hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
{
- OSErr err;
-
- //
- // If no blocks to deallocate, then exit early
- //
- if (numBlocks == 0) {
- err = noErr;
- goto Exit;
+ uint32_t i;
+ uint32_t startBlock, numBlocks;
+ struct hfsmount *hfsmp = arg;
+
+ if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0);
+
+ for (i=0; i<extent_count; ++i) {
+ /* Convert the byte range in *extents back to a range of allocation blocks. */
+ startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize;
+ numBlocks = extents[i].length / hfsmp->blockSize;
+ (void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
}
- //
- // Call internal routine to free the sequence of blocks
- //
- err = BlockMarkFree(vcb, firstBlock, numBlocks);
- if (err)
- goto Exit;
+ if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0);
+}
- //
- // Update the volume's free block count, and mark the VCB as dirty.
- //
- HFS_MOUNT_LOCK(vcb, TRUE);
- vcb->freeBlocks += numBlocks;
- if (vcb->nextAllocation == (firstBlock + numBlocks))
- vcb->nextAllocation -= numBlocks;
- MarkVCBDirty(vcb);
- HFS_MOUNT_UNLOCK(vcb, TRUE);
- hfs_generate_volume_notifications(VCBTOHFS(vcb));
-Exit:
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine: CheckUnmappedBytes
+ ;
+ ; Function: From the specified inputs, determine if the extent in question overlaps
+ ; space that was recently freed, where the recently freed space may still be
+ ; lingering in an uncommitted journal transaction. This may have data safety
+ ; implications. The intended use is to decide whether or not to force a journal flush
+ ; before allowing file data I/O to be issued. If we did not do this
+ ; then it would be possible to issue the file I/O ahead of the
+ ; journal, resulting in data being overwritten if the transaction either
+ ; is not committed or cannot be replayed.
+ ;
+ ; NOTE: This function assumes that the journal and catalog/extent locks are held.
+ ;
+ ; Input Arguments:
+ ; hfsmp - The volume containing the allocation blocks.
+ ; foffset - start of the extent in question (in allocation blocks)
+ ; numbytes - number of blocks in the extent.
+ ; recently_freed: - output pointer containing whether or not the blocks were freed recently
+ ; overlap_end - end of the overlap between the argument extent and the trim list (in allocation blocks)
+ ;
+ ; Output:
+ ;
+ ; Returns 0 if we could determine extent validity for this (or a previous transaction)
+ ; Returns errno if there was an error
+ ;
+ ; If returned 0, then recently freed will contain a boolean that indicates
+ ; that it was recently freed.
+ ;________________________________________________________________________________
+ */
+
+u_int32_t
+CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recently_freed, uint32_t *overlap_end) {
+ uint64_t device_offset;
+ uint64_t numbytes;
+ uint32_t err = 0;
+ uint64_t lba_overlap_end;
+ if (hfsmp->jnl != NULL) {
+ /*
+ * Convert the allocation block # and the number of blocks into device-relative
+ * offsets so that they can be compared using the TRIM list.
+ */
+ uint64_t device_sz = hfsmp->hfs_logical_bytes;
+ device_offset = blockno * ((uint64_t)hfsmp->blockSize);
+ device_offset += hfsmp->hfsPlusIOPosOffset;
+ numbytes = (((uint64_t)hfsmp->blockSize) * numblocks);
+
+ /*
+ * Since we check that the device_offset isn't too large, it's safe to subtract it
+ * from the size in the second check.
+ */
+ if ((device_offset >= device_sz) || (numbytes > (device_sz - device_offset))) {
+ return EINVAL;
+ }
+
+ /* Ask the journal if this extent overlaps with any pending TRIMs */
+ if (journal_trim_extent_overlap (hfsmp->jnl, device_offset, numbytes, &lba_overlap_end)) {
+ *recently_freed = 1;
+
+ /* Convert lba_overlap_end back into allocation blocks */
+ uint64_t end_offset = lba_overlap_end - hfsmp->hfsPlusIOPosOffset;
+ end_offset = end_offset / ((uint64_t) hfsmp->blockSize);
+ *overlap_end = (uint32_t) end_offset;
+ }
+ else {
+ *recently_freed = 0;
+ }
+ err = 0;
+ }
+ else {
+ /* There may not be a journal. In that case, always return success. */
+ *recently_freed = 0;
+ }
return err;
+
}
-UInt8 freebitcount[16] = {
- 4, 3, 3, 2, 3, 2, 2, 1, /* 0 1 2 3 4 5 6 7 */
- 3, 2, 2, 1, 2, 1, 1, 0, /* 8 9 A B C D E F */
-};
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine: ScanUnmapBlocks
+ ;
+ ; Function: Traverse the bitmap, and potentially issue DKIOCUNMAPs to the underlying
+ ; device as needed so that the underlying disk device is as
+ ; up-to-date as possible with which blocks are unmapped.
+ ; Additionally build up the summary table as needed.
+ ;
+ ; This function reads the bitmap in large block size
+ ; (up to 1MB) unlink the runtime which reads the bitmap
+ ; in 4K block size. So if this function is being called
+ ; after the volume is mounted and actively modified, the
+ ; caller needs to invalidate all of the existing buffers
+ ; associated with the bitmap vnode before calling this
+ ; function. If the buffers are not invalidated, it can
+ ; cause but_t collision and potential data corruption.
+ ;
+ ; Input Arguments:
+ ; hfsmp - The volume containing the allocation blocks.
+ ;________________________________________________________________________________
+ */
__private_extern__
-UInt32
-MetaZoneFreeBlocks(ExtendedVCB *vcb)
+u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp)
{
- UInt32 freeblocks;
- UInt32 *currCache;
- UInt32 blockRef;
- UInt32 bit;
- UInt32 lastbit;
- int bytesleft;
- int bytesperblock;
- UInt8 byte;
- UInt8 *buffer;
+ u_int32_t blocks_scanned = 0;
+ int error = 0;
+ struct jnl_trim_list trimlist;
- blockRef = 0;
- bytesleft = freeblocks = 0;
- buffer = NULL;
- bit = VCBTOHFS(vcb)->hfs_metazone_start;
- if (bit == 1)
- bit = 0;
-
- lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
- bytesperblock = vcb->vcbVBMIOSize;
+ if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+ KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
+ }
/*
- * Count all the bits from bit to lastbit.
+ *struct jnl_trim_list {
+ uint32_t allocated_count;
+ uint32_t extent_count;
+ dk_extent_t *extents;
+ };
*/
- while (bit < lastbit) {
- /*
- * Get next bitmap block.
- */
- if (bytesleft == 0) {
- if (blockRef) {
- (void) ReleaseBitmapBlock(vcb, blockRef, false);
- blockRef = 0;
- }
- if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef) != 0) {
- return (0);
- }
- buffer = (UInt8 *)currCache;
- bytesleft = bytesperblock;
+ bzero (&trimlist, sizeof(trimlist));
+
+ /*
+ * The scanning itself here is not tied to the presence of CONFIG_HFS_TRIM
+ * which is now enabled for most architectures. Instead, any trim related
+ * work should be tied to whether the underlying storage media supports
+ * UNMAP, as any solid state device would on desktop or embedded.
+ *
+ * We do this because we may want to scan the full bitmap on desktop
+ * for spinning media for the purposes of building up the
+ * summary table.
+ *
+ * We also avoid sending TRIMs down to the underlying media if the mount is read-only.
+ */
+
+ if ((hfsmp->hfs_flags & HFS_UNMAP) &&
+ ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
+ /* If the underlying device supports unmap and the mount is read-write, initialize */
+ int alloc_count = PAGE_SIZE / sizeof(dk_extent_t);
+ void *extents = kalloc (alloc_count * sizeof(dk_extent_t));
+ if (extents == NULL) {
+ return ENOMEM;
}
- byte = *buffer++;
- freeblocks += freebitcount[byte & 0x0F];
- freeblocks += freebitcount[(byte >> 4) & 0x0F];
- bit += kBitsPerByte;
- --bytesleft;
+ trimlist.extents = (dk_extent_t*)extents;
+ trimlist.allocated_count = alloc_count;
+ trimlist.extent_count = 0;
}
- if (blockRef)
- (void) ReleaseBitmapBlock(vcb, blockRef, false);
- return (freeblocks);
-}
+ while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){
+ error = hfs_alloc_scan_range (hfsmp, blocks_scanned, &blocks_scanned, &trimlist);
-/*
- * Obtain the next allocation block (bit) that's
- * outside the metadata allocation zone.
- */
-static UInt32 NextBitmapBlock(
- ExtendedVCB *vcb,
- UInt32 bit)
-{
- struct hfsmount *hfsmp = VCBTOHFS(vcb);
+ if (error) {
+ printf("HFS: bitmap scan range error: %d on vol=%s\n", error, hfsmp->vcbVN);
+ break;
+ }
+ }
- if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
- return (bit);
- /*
- * Skip over metadata allocation zone.
+ if ((hfsmp->hfs_flags & HFS_UNMAP) &&
+ ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
+ if (error == 0) {
+ hfs_issue_unmap(hfsmp, &trimlist);
+ }
+ if (trimlist.extents) {
+ kfree (trimlist.extents, (trimlist.allocated_count * sizeof(dk_extent_t)));
+ }
+ }
+
+ /*
+ * This is in an #if block because hfs_validate_summary prototype and function body
+ * will only show up if ALLOC_DEBUG is on, to save wired memory ever so slightly.
*/
- if ((bit >= hfsmp->hfs_metazone_start) &&
- (bit <= hfsmp->hfs_metazone_end)) {
- bit = hfsmp->hfs_metazone_end + 1;
+#if ALLOC_DEBUG
+ sanity_check_free_ext(hfsmp, 1);
+ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+ /* Validate the summary table too! */
+ hfs_validate_summary(hfsmp);
+ printf("HFS: Summary validation complete on %s\n", hfsmp->vcbVN);
}
- return (bit);
-}
+#endif
+ if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+ KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
+ }
-/*
-;_______________________________________________________________________
-;
-; Routine: ReadBitmapBlock
-;
-; Function: Read in a bitmap block corresponding to a given allocation
-; block (bit). Return a pointer to the bitmap block.
-;
-; Inputs:
-; vcb -- Pointer to ExtendedVCB
-; bit -- Allocation block whose bitmap block is desired
-;
-; Outputs:
-; buffer -- Pointer to bitmap block corresonding to "block"
-; blockRef
-;_______________________________________________________________________
-*/
-static OSErr ReadBitmapBlock(
- ExtendedVCB *vcb,
- UInt32 bit,
- UInt32 **buffer,
- UInt32 *blockRef)
+ return error;
+}
+
+static void add_to_reserved_list(hfsmount_t *hfsmp, uint32_t start,
+ uint32_t count, int list,
+ struct rl_entry **reservation)
{
- OSErr err;
- struct buf *bp = NULL;
- struct vnode *vp = NULL;
- daddr64_t block;
- UInt32 blockSize;
+ struct rl_entry *range, *next_range;
+
+ if (list == HFS_TENTATIVE_BLOCKS) {
+ int nranges = 0;
+ // Don't allow more than 4 tentative reservations
+ TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
+ rl_link, next_range) {
+ if (++nranges > 3)
+ hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
+ }
+ }
- /*
- * volume bitmap blocks are protected by the allocation file lock
- */
- REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
+ MALLOC(range, struct rl_entry *, sizeof(*range), M_TEMP, M_WAITOK);
+ range->rl_start = start;
+ range->rl_end = start + count - 1;
+ TAILQ_INSERT_HEAD(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
+ *reservation = range;
+}
- blockSize = (UInt32)vcb->vcbVBMIOSize;
- block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
+static void hfs_release_reserved(hfsmount_t *hfsmp,
+ struct rl_entry *range,
+ int list)
+{
+ if (range->rl_start == -1)
+ return;
+
+ TAILQ_REMOVE(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
+
+ if (rl_len(range) > 0) {
+ if (list == HFS_TENTATIVE_BLOCKS)
+ hfsmp->tentativeBlocks -= rl_len(range);
+ else {
+ /*
+ * We don't need to unmap tentative blocks because we won't have
+ * written to them, but we might have written to reserved blocks.
+ * Nothing can refer to those blocks so this doesn't have to be
+ * via the journal. If this proves to be too expensive, we could
+ * consider not sending down the unmap or we could require this
+ * to always be called within a transaction and then we can use
+ * the journal.
+ */
+ dk_extent_t extent = {
+ .offset = (hfs_blk_to_bytes(range->rl_start, hfsmp->blockSize)
+ + hfsmp->hfsPlusIOPosOffset),
+ .length = hfs_blk_to_bytes(rl_len(range), hfsmp->blockSize)
+ };
+ dk_unmap_t unmap = {
+ .extents = &extent,
+ .extentsCount = 1,
+ };
+ VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap,
+ 0, vfs_context_kernel());
+ assert(hfsmp->lockedBlocks >= rl_len(range));
+ hfsmp->lockedBlocks -= rl_len(range);
+ }
+ hfs_release_summary(hfsmp, range->rl_start, rl_len(range));
+ add_free_extent_cache(hfsmp, range->rl_start, rl_len(range));
+ }
- if (vcb->vcbSigWord == kHFSPlusSigWord) {
- vp = vcb->hfs_allocation_vp; /* use allocation file vnode */
+ range->rl_start = -1;
+ range->rl_end = -2;
+}
- } else /* hfs */ {
- vp = VCBTOHFS(vcb)->hfs_devvp; /* use device I/O vnode */
- block += vcb->vcbVBMSt; /* map to physical block */
+static void hfs_free_locked_internal(hfsmount_t *hfsmp,
+ struct rl_entry **reservation,
+ int list)
+{
+ if (*reservation) {
+ hfs_release_reserved(hfsmp, *reservation, list);
+ FREE(*reservation, M_TEMP);
+ *reservation = NULL;
}
+}
- err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);
+void hfs_free_tentative(hfsmount_t *hfsmp, struct rl_entry **reservation)
+{
+ hfs_free_locked_internal(hfsmp, reservation, HFS_TENTATIVE_BLOCKS);
+}
- if (bp) {
- if (err) {
- buf_brelse(bp);
- *blockRef = NULL;
- *buffer = NULL;
- } else {
- *blockRef = (UInt32)bp;
- *buffer = (UInt32 *)buf_dataptr(bp);
- }
- }
+void hfs_free_locked(hfsmount_t *hfsmp, struct rl_entry **reservation)
+{
+ hfs_free_locked_internal(hfsmp, reservation, HFS_LOCKED_BLOCKS);
+}
+
+OSErr BlockAllocate (
+ hfsmount_t *hfsmp, /* which volume to allocate space on */
+ u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */
+ u_int32_t minBlocks, /* desired number of blocks to allocate */
+ u_int32_t maxBlocks, /* maximum number of blocks to allocate */
+ hfs_block_alloc_flags_t flags, /* option flags */
+ u_int32_t *actualStartBlock, /* actual first block of allocation */
+ u_int32_t *actualNumBlocks)
+{
+ hfs_alloc_extra_args_t extra_args = {
+ .max_blocks = maxBlocks
+ };
+
+ HFSPlusExtentDescriptor extent = { startingBlock, minBlocks };
+
+ OSErr err = hfs_block_alloc_int(hfsmp, &extent, flags, &extra_args);
+
+ *actualStartBlock = extent.startBlock;
+ *actualNumBlocks = extent.blockCount;
return err;
}
+errno_t hfs_block_alloc(hfsmount_t *hfsmp,
+ HFSPlusExtentDescriptor *extent,
+ hfs_block_alloc_flags_t flags,
+ hfs_alloc_extra_args_t *ap)
+{
+ return MacToVFSError(hfs_block_alloc_int(hfsmp, extent, flags, ap));
+}
/*
-;_______________________________________________________________________
-;
-; Routine: ReleaseBitmapBlock
-;
-; Function: Relase a bitmap block.
-;
-; Inputs:
-; vcb
-; blockRef
-; dirty
-;_______________________________________________________________________
-*/
-static OSErr ReleaseBitmapBlock(
- ExtendedVCB *vcb,
- UInt32 blockRef,
- Boolean dirty)
+ ;________________________________________________________________________________
+ ;
+ ; Routine: hfs_block_alloc_int
+ ;
+ ; Function: Allocate space on a volume. If contiguous allocation is requested,
+ ; at least the requested number of bytes will be allocated or an
+ ; error will be returned. If contiguous allocation is not forced,
+ ; the space will be allocated with the first largest extent available
+ ; at the requested starting allocation block. If there is not enough
+ ; room there, a block allocation of less than the requested size will be
+ ; allocated.
+ ;
+ ; If the requested starting block is 0 (for new file allocations),
+ ; the volume's allocation block pointer will be used as a starting
+ ; point.
+ ;
+ ; Input Arguments:
+ ; hfsmp - Pointer to the HFS mount structure.
+ ; extent - startBlock indicates the block to start
+ ; searching from and blockCount is the number of
+ ; blocks required. Depending on the flags used,
+ ; more or less blocks may be returned. The
+ ; allocated extent is returned via this
+ ; parameter.
+ ; flags - Flags to specify options like contiguous, use
+ ; metadata zone, skip free block check, etc.
+ ; ap - Additional arguments used depending on flags.
+ ; See hfs_alloc_extra_args_t and below.
+ ;
+ ; Output:
+ ; (result) - Error code, zero for successful allocation
+ ; extent - If successful, the allocated extent.
+ ;
+ ; Side effects:
+ ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
+ ;
+ ; HFS_ALLOC_TENTATIVE
+ ; Blocks will be reserved but not marked allocated. They can be
+ ; stolen if free space is limited. Tentative blocks can be used by
+ ; passing HFS_ALLOC_USE_TENTATIVE and passing in the resevation.
+ ; @ap->reservation_out is used to store the reservation.
+ ;
+ ; HFS_ALLOC_USE_TENTATIVE
+ ; Use blocks previously returned with HFS_ALLOC_TENTATIVE.
+ ; @ap->reservation_in should be set to whatever @ap->reservation_out
+ ; was set to when HFS_ALLOC_TENTATIVE was used. If the tentative
+ ; reservation was stolen, a normal allocation will take place.
+ ;
+ ; HFS_ALLOC_LOCKED
+ ; Blocks will be reserved but not marked allocated. Unlike tentative
+ ; reservations they cannot be stolen. It is safe to write to these
+ ; blocks. @ap->reservation_out is used to store the reservation.
+ ;
+ ; HFS_ALLOC_COMMIT
+ ; This will take blocks previously returned with HFS_ALLOC_LOCKED and
+ ; mark them allocated on disk. @ap->reservation_in is used.
+ ;
+ ; HFS_ALLOC_ROLL_BACK
+ ; Take blocks that were just recently deallocated and mark them
+ ; allocated. This is for roll back situations. Blocks got
+ ; deallocated and then something went wrong and we need to roll back
+ ; by marking the blocks allocated.
+ ;
+ ; HFS_ALLOC_FORCECONTIG
+ ; It will not return fewer than @min_blocks.
+ ;
+ ; HFS_ALLOC_TRY_HARD
+ ; We will perform an exhaustive search to try and find @max_blocks.
+ ; It will not return fewer than @min_blocks.
+ ;
+ ;________________________________________________________________________________
+ */
+OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
+ HFSPlusExtentDescriptor *extent,
+ hfs_block_alloc_flags_t flags,
+ hfs_alloc_extra_args_t *ap)
{
- struct buf *bp = (struct buf *)blockRef;
-
- if (blockRef == 0) {
- if (dirty)
- panic("ReleaseBitmapBlock: missing bp");
- return (0);
- }
+ u_int32_t freeBlocks;
+ OSErr err = 0;
+ Boolean updateAllocPtr = false; // true if nextAllocation needs to be updated
+ Boolean useMetaZone;
+ Boolean forceContiguous = false;
+ Boolean forceFlush;
- if (bp) {
- if (dirty) {
- // XXXdbg
- struct hfsmount *hfsmp = VCBTOHFS(vcb);
-
- if (hfsmp->jnl) {
- journal_modify_block_end(hfsmp->jnl, bp);
- } else {
- buf_bdwrite(bp);
- }
- } else {
- buf_brelse(bp);
- }
- }
+ uint32_t startingBlock = extent->startBlock;
+ uint32_t minBlocks = extent->blockCount;
+ uint32_t maxBlocks = (ap && ap->max_blocks) ? ap->max_blocks : minBlocks;
- return (0);
-}
+ if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
+ if (ISSET(flags, HFS_ALLOC_COMMIT)) {
+ extent->startBlock = (*ap->reservation_in)->rl_start;
+ extent->blockCount = rl_len(*ap->reservation_in);
+ goto mark_allocated;
+ }
-/*
-_______________________________________________________________________
+ if (ISSET(flags, HFS_ALLOC_ROLL_BACK))
+ goto mark_allocated;
-Routine: BlockAllocateContig
+ freeBlocks = hfs_freeblks(hfsmp, 0);
-Function: Allocate a contiguous group of allocation blocks. The
- allocation is all-or-nothing. The caller guarantees that
- there are enough free blocks (though they may not be
- contiguous, in which case this call will fail).
+ if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
+ struct rl_entry *range = *ap->reservation_in;
-Inputs:
- vcb Pointer to volume where space is to be allocated
- startingBlock Preferred first block for allocation
- minBlocks Minimum number of contiguous blocks to allocate
- maxBlocks Maximum number of contiguous blocks to allocate
- useMetaZone
+ if (range && range->rl_start != -1) {
+ /*
+ * It's possible that we have a tentative reservation
+ * but there aren't enough free blocks due to loaned blocks
+ * or insufficient space in the backing store.
+ */
+ uint32_t count = min(min(maxBlocks, rl_len(range)), freeBlocks);
-Outputs:
- actualStartBlock First block of range allocated, or 0 if error
- actualNumBlocks Number of blocks allocated, or 0 if error
-_______________________________________________________________________
-*/
-static OSErr BlockAllocateContig(
- ExtendedVCB *vcb,
- UInt32 startingBlock,
- UInt32 minBlocks,
- UInt32 maxBlocks,
- Boolean useMetaZone,
- UInt32 *actualStartBlock,
- UInt32 *actualNumBlocks)
-{
- OSErr err;
+ if (count >= minBlocks) {
+ extent->startBlock = range->rl_start;
+ extent->blockCount = count;
- //
- // Find a contiguous group of blocks at least minBlocks long.
- // Determine the number of contiguous blocks available (up
- // to maxBlocks).
- //
+ // Should we go straight to commit?
+ if (!ISSET(flags, HFS_ALLOC_LOCKED))
+ SET(flags, HFS_ALLOC_COMMIT);
+
+ goto mark_allocated;
+ }
+ }
- /*
- * NOTE: If the only contiguous free extent of at least minBlocks
- * crosses startingBlock (i.e. starts before, ends after), then we
- * won't find it. Earlier versions *did* find this case by letting
- * the second search look past startingBlock by minBlocks. But
- * with the free extent cache, this can lead to duplicate entries
- * in the cache, causing the same blocks to be allocated twice.
- */
- err = BlockFindContiguous(vcb, startingBlock, vcb->totalBlocks, minBlocks,
- maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
- if (err == dskFulErr && startingBlock != 0) {
/*
- * Constrain the endingBlock so we don't bother looking for ranges
- * that would overlap those found in the previous call.
+ * We can't use the tentative reservation so free it and allocate
+ * normally.
*/
- err = BlockFindContiguous(vcb, 1, startingBlock, minBlocks, maxBlocks,
- useMetaZone, actualStartBlock, actualNumBlocks);
+ hfs_free_tentative(hfsmp, ap->reservation_in);
+ CLR(flags, HFS_ALLOC_USE_TENTATIVE);
}
- if (err != noErr) goto Exit;
- // sanity check
- if ((*actualStartBlock + *actualNumBlocks) > vcb->totalBlocks)
- panic("BlockAllocateContig: allocation overflow on \"%s\"", vcb->vcbVN);
+ if (ISSET(flags, HFS_ALLOC_FORCECONTIG | HFS_ALLOC_TRY_HARD))
+ forceContiguous = true;
- //
- // Now mark those blocks allocated.
- //
- err = BlockMarkAllocated(vcb, *actualStartBlock, *actualNumBlocks);
-
-Exit:
- if (err != noErr) {
- *actualStartBlock = 0;
- *actualNumBlocks = 0;
+ if (flags & HFS_ALLOC_METAZONE) {
+ useMetaZone = true;
+ } else {
+ useMetaZone = false;
}
-
- return err;
-}
-
-/*
-_______________________________________________________________________
-Routine: BlockAllocateAny
+ if (flags & HFS_ALLOC_FLUSHTXN) {
+ forceFlush = true;
+ }
+ else {
+ forceFlush = false;
+ }
-Function: Allocate one or more allocation blocks. If there are fewer
- free blocks than requested, all free blocks will be
- allocated. The caller guarantees that there is at least
- one free block.
+ assert(hfsmp->freeBlocks >= hfsmp->tentativeBlocks);
+
+ // See if we have to steal tentative blocks
+ if (freeBlocks < hfsmp->tentativeBlocks + minBlocks)
+ SET(flags, HFS_ALLOC_IGNORE_TENTATIVE);
+
+ /* Skip free block check if blocks are being allocated for relocating
+ * data during truncating a volume.
+ *
+ * During hfs_truncatefs(), the volume free block count is updated
+ * before relocating data to reflect the total number of free blocks
+ * that will exist on the volume after resize is successful. This
+ * means that we have reserved allocation blocks required for relocating
+ * the data and hence there is no need to check the free blocks.
+ * It will also prevent resize failure when the number of blocks in
+ * an extent being relocated is more than the free blocks that will
+ * exist after the volume is resized.
+ */
+ if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
+ // If the disk is already full, don't bother.
+ if (freeBlocks == 0) {
+ err = dskFulErr;
+ goto exit;
+ }
+ if (forceContiguous && freeBlocks < minBlocks) {
+ err = dskFulErr;
+ goto exit;
+ }
-Inputs:
- vcb Pointer to volume where space is to be allocated
- startingBlock Preferred first block for allocation
- endingBlock Last block to check + 1
- maxBlocks Maximum number of contiguous blocks to allocate
- useMetaZone
+ /*
+ * Clip if necessary so we don't over-subscribe the free blocks.
+ */
+ if (minBlocks > freeBlocks) {
+ minBlocks = freeBlocks;
+ }
+ if (maxBlocks > freeBlocks) {
+ maxBlocks = freeBlocks;
+ }
+ }
-Outputs:
- actualStartBlock First block of range allocated, or 0 if error
- actualNumBlocks Number of blocks allocated, or 0 if error
-_______________________________________________________________________
-*/
-static OSErr BlockAllocateAny(
- ExtendedVCB *vcb,
- UInt32 startingBlock,
- register UInt32 endingBlock,
- UInt32 maxBlocks,
- Boolean useMetaZone,
- UInt32 *actualStartBlock,
- UInt32 *actualNumBlocks)
-{
- OSErr err;
- register UInt32 block; // current block number
- register UInt32 currentWord; // Pointer to current word within bitmap block
- register UInt32 bitMask; // Word with given bits already set (ready to OR in)
- register UInt32 wordsLeft; // Number of words left in this bitmap block
- UInt32 *buffer = NULL;
- UInt32 *currCache = NULL;
- UInt32 blockRef;
- UInt32 bitsPerBlock;
- UInt32 wordsPerBlock;
- Boolean dirty = false;
- struct hfsmount *hfsmp = VCBTOHFS(vcb);
+ if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
+ err = hfs_alloc_try_hard(hfsmp, extent, maxBlocks, flags);
+ if (err)
+ goto exit;
- // Since this routine doesn't wrap around
- if (maxBlocks > (endingBlock - startingBlock)) {
- maxBlocks = endingBlock - startingBlock;
+ goto mark_allocated;
}
- /*
- * Skip over metadata blocks.
- */
- if (!useMetaZone)
- startingBlock = NextBitmapBlock(vcb, startingBlock);
-
//
- // Pre-read the first bitmap block
+ // If caller didn't specify a starting block number, then use the volume's
+ // next block to allocate from.
//
- err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef);
- if (err != noErr) goto Exit;
- buffer = currCache;
+ if (startingBlock == 0) {
+ hfs_lock_mount (hfsmp);
+
+ /* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
+ if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+ startingBlock = hfsmp->sparseAllocation;
+ }
+ else {
+ startingBlock = hfsmp->nextAllocation;
+ }
+ hfs_unlock_mount(hfsmp);
+ updateAllocPtr = true;
+ }
- //
- // Set up the current position within the block
- //
- {
- UInt32 wordIndexInBlock;
-
- bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
- wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
- wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
- buffer += wordIndexInBlock;
- wordsLeft = wordsPerBlock - wordIndexInBlock;
- currentWord = SWAP_BE32 (*buffer);
- bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
+ if (startingBlock >= hfsmp->allocLimit) {
+ startingBlock = 0; /* overflow so start at beginning */
}
-
+
//
- // Find the first unallocated block
+ // If the request must be contiguous, then find a sequence of free blocks
+ // that is long enough. Otherwise, find the first free block.
//
- block=startingBlock;
- while (block < endingBlock) {
- if ((currentWord & bitMask) == 0)
- break;
+ if (forceContiguous) {
+ err = BlockFindContig(hfsmp, startingBlock, minBlocks, maxBlocks,
+ flags, &extent->startBlock, &extent->blockCount);
+ /*
+ * If we allocated from a new position then also update the roving allocator.
+ * This will keep the roving allocation pointer up-to-date even
+ * if we are using the new R/B tree allocator, since
+ * it doesn't matter to us here, how the underlying allocator found
+ * the block to vend out.
+ */
+ if ((err == noErr) &&
+ (extent->startBlock > startingBlock) &&
+ ((extent->startBlock < hfsmp->hfs_metazone_start) ||
+ (extent->startBlock > hfsmp->hfs_metazone_end))) {
+ updateAllocPtr = true;
+ }
+ } else {
+ /*
+ * Scan the bitmap once, gather the N largest free extents, then
+ * allocate from these largest extents. Repeat as needed until
+ * we get all the space we needed. We could probably build up
+ * that list when the higher level caller tried (and failed) a
+ * contiguous allocation first.
+ *
+ * Note that the free-extent cache will be cease to be updated if
+ * we are using the red-black tree for allocations. If we jettison
+ * the tree, then we will reset the free-extent cache and start over.
+ */
- // Next bit
- ++block;
- bitMask >>= 1;
- if (bitMask == 0) {
- // Next word
- bitMask = kHighBitInWordMask;
- ++buffer;
+ /* Disable HFS_ALLOC_FLUSHTXN if needed */
+ if (forceFlush) {
+ flags &= ~HFS_ALLOC_FLUSHTXN;
+ }
- if (--wordsLeft == 0) {
- // Next block
- buffer = currCache = NULL;
- err = ReleaseBitmapBlock(vcb, blockRef, false);
- if (err != noErr) goto Exit;
+ /*
+ * BlockFindKnown only examines the free extent cache; anything in there will
+ * have been committed to stable storage already.
+ */
+ err = BlockFindKnown(hfsmp, maxBlocks, &extent->startBlock,
+ &extent->blockCount);
+
+ /* dskFulErr out of BlockFindKnown indicates an empty Free Extent Cache */
+
+ if (err == dskFulErr) {
+ /*
+ * Now we have to do a bigger scan. Start at startingBlock and go up until the
+ * allocation limit. We 'trust' the summary bitmap in this call, if it tells us
+ * that it could not find any free space.
+ */
+ err = BlockFindAny(hfsmp, startingBlock, hfsmp->allocLimit,
+ maxBlocks, flags, true,
+ &extent->startBlock, &extent->blockCount);
+ }
+ if (err == dskFulErr) {
+ /*
+ * Vary the behavior here if the summary table is on or off.
+ * If it is on, then we don't trust it it if we get into this case and
+ * basically do a full scan for maximum coverage.
+ * If it is off, then we trust the above and go up until the startingBlock.
+ */
+ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+ err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
+ flags, false,
+ &extent->startBlock, &extent->blockCount);
+ }
+ else {
+ err = BlockFindAny(hfsmp, 1, startingBlock, maxBlocks,
+ flags, false,
+ &extent->startBlock, &extent->blockCount);
+ }
+
+ /*
+ * Last Resort: Find/use blocks that may require a journal flush.
+ */
+ if (err == dskFulErr && forceFlush) {
+ flags |= HFS_ALLOC_FLUSHTXN;
+ err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
+ flags, false,
+ &extent->startBlock, &extent->blockCount);
+ }
+ }
+ }
- /*
- * Skip over metadata blocks.
- */
- if (!useMetaZone) {
- block = NextBitmapBlock(vcb, block);
- if (block >= endingBlock) {
- err = dskFulErr;
- goto Exit;
- }
- }
- err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
- if (err != noErr) goto Exit;
- buffer = currCache;
+ if (err)
+ goto exit;
- wordsLeft = wordsPerBlock;
- }
- currentWord = SWAP_BE32 (*buffer);
- }
- }
+mark_allocated:
- // Did we get to the end of the bitmap before finding a free block?
- // If so, then couldn't allocate anything.
- if (block >= endingBlock) {
- err = dskFulErr;
- goto Exit;
- }
+ // Handle alignment
+ if (ap && ap->alignment && extent->blockCount < ap->max_blocks) {
+ /*
+ * See the comment in FileMgrInternal.h for alignment
+ * semantics.
+ */
+ uint32_t rounding = ((extent->blockCount + ap->alignment_offset)
+ % ap->alignment);
- // Return the first block in the allocated range
- *actualStartBlock = block;
- dirty = true;
-
- // If we could get the desired number of blocks before hitting endingBlock,
- // then adjust endingBlock so we won't keep looking. Ideally, the comparison
- // would be (block + maxBlocks) < endingBlock, but that could overflow. The
- // comparison below yields identical results, but without overflow.
- if (block < (endingBlock-maxBlocks)) {
- endingBlock = block + maxBlocks; // if we get this far, we've found enough
+ // @minBlocks is still the minimum
+ if (extent->blockCount >= minBlocks + rounding)
+ extent->blockCount -= rounding;
}
-
- // XXXdbg
- if (hfsmp->jnl) {
- journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+
+ err = BlockMarkAllocatedInternal(hfsmp, extent->startBlock,
+ extent->blockCount, flags);
+
+ if (err)
+ goto exit;
+
+ if (ISSET(hfsmp->hfs_flags, HFS_CS) && extent->blockCount != 0
+ && !ISSET(flags, HFS_ALLOC_TENTATIVE)) {
+ if (ISSET(flags, HFS_ALLOC_FAST_DEV)) {
+#if !HFS_ALLOC_TEST /* need this guard because this file is compiled outside of the kernel */
+ hfs_pin_block_range(hfsmp, HFS_PIN_IT,
+ extent->startBlock, extent->blockCount,
+ vfs_context_kernel());
+#endif
+ } else {
+ _dk_cs_map_t cm = {
+ .cm_extent = {
+ (hfs_blk_to_bytes(extent->startBlock, hfsmp->blockSize)
+ + hfsmp->hfsPlusIOPosOffset),
+ hfs_blk_to_bytes(extent->blockCount, hfsmp->blockSize)
+ }
+ };
+
+ errno_t err2 = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP,
+ (caddr_t)&cm, 0, vfs_context_current());
+
+ /*
+ * Ignore errors for now; we are fully provisioned so in
+ * theory CoreStorage should be able to handle this
+ * allocation. Should we want to change this in future, then
+ * we should think carefully how we handle errors. Allowing
+ * CoreStorage to truncate our allocation is problematic
+ * because we might have minimum and alignment requirements
+ * and backing out changes we have already made is
+ * non-trivial.
+ */
+
+ if (err2 || cm.cm_bytes_mapped < cm.cm_extent.length) {
+ printf("hfs: _DKIOCCSMAP error: %d, bytes_mapped: %llu\n",
+ err2, cm.cm_bytes_mapped);
+ }
+ }
}
+ // if we actually allocated something then go update the
+ // various bits of state that we maintain regardless of
+ // whether there was an error (i.e. partial allocations
+ // still need to update things like the free block count).
//
- // Allocate all of the consecutive blocks
- //
- while ((currentWord & bitMask) == 0) {
- // Allocate this block
- currentWord |= bitMask;
-
- // Move to the next block. If no more, then exit.
- ++block;
- if (block == endingBlock)
- break;
+ if (extent->blockCount != 0) {
+ //
+ // If we used the volume's roving allocation pointer, then we need to update it.
+ // Adding in the length of the current allocation might reduce the next allocate
+ // call by avoiding a re-scan of the already allocated space. However, the clump
+ // just allocated can quite conceivably end up being truncated or released when
+ // the file is closed or its EOF changed. Leaving the allocation pointer at the
+ // start of the last allocation will avoid unnecessary fragmentation in this case.
+ //
+ hfs_lock_mount (hfsmp);
- // Next bit
- bitMask >>= 1;
- if (bitMask == 0) {
- *buffer = SWAP_BE32 (currentWord); // update value in bitmap
-
- // Next word
- bitMask = kHighBitInWordMask;
- ++buffer;
-
- if (--wordsLeft == 0) {
- // Next block
- buffer = currCache = NULL;
- err = ReleaseBitmapBlock(vcb, blockRef, true);
- if (err != noErr) goto Exit;
+ if (!ISSET(flags, HFS_ALLOC_USE_TENTATIVE | HFS_ALLOC_COMMIT)) {
+ lck_spin_lock(&hfsmp->vcbFreeExtLock);
+ if (hfsmp->vcbFreeExtCnt == 0 && hfsmp->hfs_freed_block_count == 0) {
+ hfsmp->sparseAllocation = extent->startBlock;
+ }
+ lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+ if (extent->blockCount < hfsmp->hfs_freed_block_count) {
+ hfsmp->hfs_freed_block_count -= extent->blockCount;
+ } else {
+ hfsmp->hfs_freed_block_count = 0;
+ }
- /*
- * Skip over metadata blocks.
- */
- if (!useMetaZone) {
- UInt32 nextBlock;
+ if (updateAllocPtr &&
+ ((extent->startBlock < hfsmp->hfs_metazone_start) ||
+ (extent->startBlock > hfsmp->hfs_metazone_end))) {
+ HFS_UPDATE_NEXT_ALLOCATION(hfsmp, extent->startBlock);
+ }
- nextBlock = NextBitmapBlock(vcb, block);
- if (nextBlock != block) {
- goto Exit; /* allocation gap, so stop */
- }
- }
+ (void) remove_free_extent_cache(hfsmp, extent->startBlock, extent->blockCount);
+ }
- err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
- if (err != noErr) goto Exit;
- buffer = currCache;
+ if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
+ (*ap->reservation_in)->rl_start += extent->blockCount;
+ hfsmp->tentativeBlocks -= extent->blockCount;
+ if (rl_len(*ap->reservation_in) <= 0)
+ hfs_free_tentative(hfsmp, ap->reservation_in);
+ } else if (ISSET(flags, HFS_ALLOC_COMMIT)) {
+ // Handle committing locked extents
+ assert(hfsmp->lockedBlocks >= extent->blockCount);
+ (*ap->reservation_in)->rl_start += extent->blockCount;
+ hfsmp->lockedBlocks -= extent->blockCount;
+ hfs_free_locked(hfsmp, ap->reservation_in);
+ }
+
+ /*
+ * Update the number of free blocks on the volume
+ *
+ * Skip updating the free blocks count if the block are
+ * being allocated to relocate data as part of hfs_truncatefs()
+ */
+
+ if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
+ hfsmp->tentativeBlocks += extent->blockCount;
+ } else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
+ hfsmp->lockedBlocks += extent->blockCount;
+ } else if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
+ hfsmp->freeBlocks -= extent->blockCount;
+ }
+ MarkVCBDirty(hfsmp);
+ hfs_unlock_mount(hfsmp);
+
+ hfs_generate_volume_notifications(hfsmp);
+
+ if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
+ add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount,
+ 0, ap->reservation_out);
+ } else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
+ add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount,
+ 1, ap->reservation_out);
+ }
- // XXXdbg
- if (hfsmp->jnl) {
- journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+ if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
+ /*
+ * See if we used tentative blocks. Note that we cannot
+ * free the reservations here because we don't have access
+ * to the external pointers. All we can do is update the
+ * reservations and they'll be cleaned up when whatever is
+ * holding the pointers calls us back.
+ *
+ * We use the rangelist code to detect overlaps and
+ * constrain the tentative block allocation. Note that
+ * @end is inclusive so that our rangelist code will
+ * resolve the various cases for us. As a result, we need
+ * to ensure that we account for it properly when removing
+ * the blocks from the tentative count in the mount point
+ * and re-inserting the remainder (either head or tail)
+ */
+ struct rl_entry *range, *next_range;
+ struct rl_head *ranges = &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS];
+ const uint32_t start = extent->startBlock;
+ const uint32_t end = start + extent->blockCount - 1;
+ TAILQ_FOREACH_SAFE(range, ranges, rl_link, next_range) {
+ switch (rl_overlap(range, start, end)) {
+ case RL_OVERLAPCONTAINSRANGE:
+ // Keep the bigger part
+ if (start - range->rl_start > range->rl_end - end) {
+ // Discard the tail
+ hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
+ hfs_release_summary(hfsmp, end + 1, range->rl_end - end);
+ const uint32_t old_end = range->rl_end;
+ range->rl_end = start - 1;
+ add_free_extent_cache(hfsmp, end + 1, old_end - end);
+ } else {
+ // Discard the head
+ hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
+ hfs_release_summary(hfsmp, range->rl_start,
+ start - range->rl_start);
+ const uint32_t old_start = range->rl_start;
+ range->rl_start = end + 1;
+ add_free_extent_cache(hfsmp, old_start,
+ start - old_start);
+ }
+ assert(range->rl_end >= range->rl_start);
+ break;
+ case RL_MATCHINGOVERLAP:
+ case RL_OVERLAPISCONTAINED:
+ hfsmp->tentativeBlocks -= rl_len(range);
+ range->rl_end = range->rl_start - 1;
+ hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
+ break;
+ case RL_OVERLAPSTARTSBEFORE:
+ hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
+ range->rl_end = start - 1;
+ assert(range->rl_end >= range->rl_start);
+ break;
+ case RL_OVERLAPENDSAFTER:
+ hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
+ range->rl_start = end + 1;
+ assert(range->rl_end >= range->rl_start);
+ break;
+ case RL_NOOVERLAP:
+ break;
}
-
- wordsLeft = wordsPerBlock;
}
-
- currentWord = SWAP_BE32 (*buffer);
}
}
- *buffer = SWAP_BE32 (currentWord); // update the last change
-Exit:
- if (err == noErr) {
- *actualNumBlocks = block - *actualStartBlock;
+exit:
- // sanity check
- if ((*actualStartBlock + *actualNumBlocks) > vcb->totalBlocks)
- panic("BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
+ if (ALLOC_DEBUG) {
+ if (err == noErr) {
+ if (extent->startBlock >= hfsmp->totalBlocks) {
+ panic ("BlockAllocate: vending invalid blocks!");
+ }
+ if (extent->startBlock >= hfsmp->allocLimit) {
+ panic ("BlockAllocate: vending block past allocLimit!");
+ }
+
+ if ((extent->startBlock + extent->blockCount) >= hfsmp->totalBlocks) {
+ panic ("BlockAllocate: vending too many invalid blocks!");
+ }
+
+ if ((extent->startBlock + extent->blockCount) >= hfsmp->allocLimit) {
+ panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
+ }
+ }
}
- else {
- *actualStartBlock = 0;
- *actualNumBlocks = 0;
+
+ if (err) {
+ // Just to be safe...
+ extent->startBlock = 0;
+ extent->blockCount = 0;
}
-
- if (currCache)
- (void) ReleaseBitmapBlock(vcb, blockRef, dirty);
+
+ if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, extent->startBlock, extent->blockCount, 0, 0);
return err;
}
/*
-_______________________________________________________________________
-
-Routine: BlockAllocateKnown
-
-Function: Try to allocate space from known free space in the free
- extent cache.
-
-Inputs:
- vcb Pointer to volume where space is to be allocated
- maxBlocks Maximum number of contiguous blocks to allocate
-
-Outputs:
- actualStartBlock First block of range allocated, or 0 if error
- actualNumBlocks Number of blocks allocated, or 0 if error
-
-Returns:
- dskFulErr Free extent cache is empty
-_______________________________________________________________________
+;________________________________________________________________________________
+;
+; Routine: BlockDeallocate
+;
+; Function: Update the bitmap to deallocate a run of disk allocation blocks
+;
+; Input Arguments:
+; vcb - Pointer to ExtendedVCB for the volume to free space on
+; firstBlock - First allocation block to be freed
+; numBlocks - Number of allocation blocks to free up (must be > 0!)
+;
+; Output:
+; (result) - Result code
+;
+; Side effects:
+; The volume bitmap is read and updated; the volume bitmap cache may be changed.
+; The Allocator's red-black trees may also be modified as a result.
+;
+;________________________________________________________________________________
*/
-static OSErr BlockAllocateKnown(
- ExtendedVCB *vcb,
- UInt32 maxBlocks,
- UInt32 *actualStartBlock,
- UInt32 *actualNumBlocks)
+
+OSErr BlockDeallocate (
+ ExtendedVCB *vcb, // Which volume to deallocate space on
+ u_int32_t firstBlock, // First block in range to deallocate
+ u_int32_t numBlocks, // Number of contiguous blocks to deallocate
+ hfs_block_alloc_flags_t flags)
{
- UInt32 i;
- UInt32 foundBlocks;
- UInt32 newStartBlock, newBlockCount;
-
- if (vcb->vcbFreeExtCnt == 0)
- return dskFulErr;
+ if (ISSET(flags, HFS_ALLOC_TENTATIVE | HFS_ALLOC_LOCKED))
+ return 0;
- // Just grab up to maxBlocks of the first (largest) free exent.
- *actualStartBlock = vcb->vcbFreeExt[0].startBlock;
- foundBlocks = vcb->vcbFreeExt[0].blockCount;
- if (foundBlocks > maxBlocks)
- foundBlocks = maxBlocks;
- *actualNumBlocks = foundBlocks;
-
- // Adjust the start and length of that extent.
- newStartBlock = vcb->vcbFreeExt[0].startBlock + foundBlocks;
- newBlockCount = vcb->vcbFreeExt[0].blockCount - foundBlocks;
-
- // The first extent might not be the largest anymore. Bubble up any
- // (now larger) extents to the top of the list.
- for (i=1; i<vcb->vcbFreeExtCnt; ++i)
- {
- if (vcb->vcbFreeExt[i].blockCount > newBlockCount)
- {
- vcb->vcbFreeExt[i-1].startBlock = vcb->vcbFreeExt[i].startBlock;
- vcb->vcbFreeExt[i-1].blockCount = vcb->vcbFreeExt[i].blockCount;
- }
- else
- {
- break;
- }
- }
-
- // If this is now the smallest known free extent, then it might be smaller than
- // other extents we didn't keep track of. So, just forget about this extent.
- // After the previous loop, (i-1) is the index of the extent we just allocated from.
- if (i == vcb->vcbFreeExtCnt)
- {
- // It's now the smallest extent, so forget about it
- --vcb->vcbFreeExtCnt;
- }
- else
- {
- // It's not the smallest, so store it in its proper place
- vcb->vcbFreeExt[i-1].startBlock = newStartBlock;
- vcb->vcbFreeExt[i-1].blockCount = newBlockCount;
- }
-
- // sanity check
- if ((*actualStartBlock + *actualNumBlocks) > vcb->totalBlocks)
- panic("BlockAllocateKnown: allocation overflow on \"%s\"", vcb->vcbVN);
+ OSErr err;
+ struct hfsmount *hfsmp;
+ hfsmp = VCBTOHFS(vcb);
+
+ if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0);
//
- // Now mark the found extent in the bitmap
+ // If no blocks to deallocate, then exit early
//
- return BlockMarkAllocated(vcb, *actualStartBlock, *actualNumBlocks);
-}
+ if (numBlocks == 0) {
+ err = noErr;
+ goto Exit;
+ }
+ if (ALLOC_DEBUG) {
+ if (firstBlock >= hfsmp->totalBlocks) {
+ panic ("BlockDeallocate: freeing invalid blocks!");
+ }
-/*
-_______________________________________________________________________
+ if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {
+ panic ("BlockDeallocate: freeing too many invalid blocks!");
+ }
+ }
-Routine: BlockMarkAllocated
+ /*
+ * If we're using the summary bitmap, then try to mark the bits
+ * as potentially usable/free before actually deallocating them.
+ * It is better to be slightly speculative here for correctness.
+ */
-Function: Mark a contiguous group of blocks as allocated (set in the
- bitmap). It assumes those bits are currently marked
- deallocated (clear in the bitmap).
+ (void) hfs_release_summary (hfsmp, firstBlock, numBlocks);
-Inputs:
- vcb Pointer to volume where space is to be allocated
- startingBlock First block number to mark as allocated
- numBlocks Number of blocks to mark as allocated
-_______________________________________________________________________
-*/
-__private_extern__
-OSErr BlockMarkAllocated(
- ExtendedVCB *vcb,
- UInt32 startingBlock,
- register UInt32 numBlocks)
-{
- OSErr err;
- register UInt32 *currentWord; // Pointer to current word within bitmap block
- register UInt32 wordsLeft; // Number of words left in this bitmap block
- register UInt32 bitMask; // Word with given bits already set (ready to OR in)
- UInt32 firstBit; // Bit index within word of first bit to allocate
- UInt32 numBits; // Number of bits in word to allocate
- UInt32 *buffer = NULL;
- UInt32 blockRef;
- UInt32 bitsPerBlock;
- UInt32 wordsPerBlock;
- // XXXdbg
- struct hfsmount *hfsmp = VCBTOHFS(vcb);
+ err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
+ if (err) {
+ goto Exit;
+ }
//
- // Pre-read the bitmap block containing the first word of allocation
+ // Update the volume's free block count, and mark the VCB as dirty.
//
+ hfs_lock_mount(hfsmp);
+ /*
+ * Do not update the free block count. This flags is specified
+ * when a volume is being truncated.
+ */
+ if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
+ vcb->freeBlocks += numBlocks;
+ }
- err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
- if (err != noErr) goto Exit;
- //
- // Initialize currentWord, and wordsLeft.
- //
- {
- UInt32 wordIndexInBlock;
-
- bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
- wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+ vcb->hfs_freed_block_count += numBlocks;
- wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
- currentWord = buffer + wordIndexInBlock;
- wordsLeft = wordsPerBlock - wordIndexInBlock;
- }
-
- // XXXdbg
- if (hfsmp->jnl) {
- journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+ if (vcb->nextAllocation == (firstBlock + numBlocks)) {
+ HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
}
- //
- // If the first block to allocate doesn't start on a word
- // boundary in the bitmap, then treat that first word
- // specially.
- //
+ if (hfsmp->jnl == NULL) {
+ /*
+ * In the journal case, we'll add the free extent once the journal
+ * calls us back to tell us it wrote the transaction to disk.
+ */
+ (void) add_free_extent_cache(vcb, firstBlock, numBlocks);
- firstBit = startingBlock % kBitsPerWord;
- if (firstBit != 0) {
- bitMask = kAllBitsSetInWord >> firstBit; // turn off all bits before firstBit
- numBits = kBitsPerWord - firstBit; // number of remaining bits in this word
- if (numBits > numBlocks) {
- numBits = numBlocks; // entire allocation is inside this one word
- bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); // turn off bits after last
- }
-#if DEBUG_BUILD
- if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
- panic("BlockMarkAllocated: blocks already allocated!");
+ /*
+ * If the journal case, we'll only update sparseAllocation once the
+ * free extent cache becomes empty (when we remove the last entry
+ * from the cache). Skipping it here means we're less likely to
+ * find a recently freed extent via the bitmap before it gets added
+ * to the free extent cache.
+ */
+ if (firstBlock < vcb->sparseAllocation) {
+ vcb->sparseAllocation = firstBlock;
}
-#endif
- *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
- numBlocks -= numBits; // adjust number of blocks left to allocate
-
- ++currentWord; // move to next word
- --wordsLeft; // one less word left in this block
}
- //
- // Allocate whole words (32 blocks) at a time.
- //
+ MarkVCBDirty(vcb);
+ hfs_unlock_mount(hfsmp);
- bitMask = kAllBitsSetInWord; // put this in a register for 68K
- while (numBlocks >= kBitsPerWord) {
- if (wordsLeft == 0) {
+ hfs_generate_volume_notifications(VCBTOHFS(vcb));
+Exit:
+
+ if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0);
+
+ return err;
+}
+
+
+u_int8_t freebitcount[16] = {
+ 4, 3, 3, 2, 3, 2, 2, 1, /* 0 1 2 3 4 5 6 7 */
+ 3, 2, 2, 1, 2, 1, 1, 0, /* 8 9 A B C D E F */
+};
+
+u_int32_t
+MetaZoneFreeBlocks(ExtendedVCB *vcb)
+{
+ u_int32_t freeblocks;
+ u_int32_t *currCache;
+ uintptr_t blockRef;
+ u_int32_t bit;
+ u_int32_t lastbit;
+ int bytesleft;
+ int bytesperblock;
+ u_int8_t byte;
+ u_int8_t *buffer;
+
+ blockRef = 0;
+ bytesleft = freeblocks = 0;
+ buffer = NULL;
+ bit = VCBTOHFS(vcb)->hfs_metazone_start;
+ if (bit == 1)
+ bit = 0;
+
+ lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
+ bytesperblock = vcb->vcbVBMIOSize;
+
+ /*
+ * Count all the bits from bit to lastbit.
+ */
+ while (bit < lastbit) {
+ /*
+ * Get next bitmap block.
+ */
+ if (bytesleft == 0) {
+ if (blockRef) {
+ (void) ReleaseBitmapBlock(vcb, blockRef, false);
+ blockRef = 0;
+ }
+ if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef,
+ HFS_ALLOC_IGNORE_TENTATIVE) != 0) {
+ return (0);
+ }
+ buffer = (u_int8_t *)currCache;
+ bytesleft = bytesperblock;
+ }
+ byte = *buffer++;
+ freeblocks += freebitcount[byte & 0x0F];
+ freeblocks += freebitcount[(byte >> 4) & 0x0F];
+ bit += kBitsPerByte;
+ --bytesleft;
+ }
+ if (blockRef)
+ (void) ReleaseBitmapBlock(vcb, blockRef, false);
+
+ return (freeblocks);
+}
+
+
+/*
+ * Obtain the next allocation block (bit) that's
+ * outside the metadata allocation zone.
+ */
+static u_int32_t NextBitmapBlock(
+ ExtendedVCB *vcb,
+ u_int32_t bit)
+{
+ struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+ if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
+ return (bit);
+ /*
+ * Skip over metadata allocation zone.
+ */
+ if ((bit >= hfsmp->hfs_metazone_start) &&
+ (bit <= hfsmp->hfs_metazone_end)) {
+ bit = hfsmp->hfs_metazone_end + 1;
+ }
+ return (bit);
+}
+
+
+// Assumes @bitmap is aligned to 8 bytes and multiple of 8 bytes.
+static void bits_set(void *bitmap, int start, int end)
+{
+ const int start_bit = start & 63;
+ const int end_bit = end & 63;
+
+#define LEFT_MASK(bit) OSSwapHostToBigInt64(0xffffffffffffffffull << (64 - bit))
+#define RIGHT_MASK(bit) OSSwapHostToBigInt64(0xffffffffffffffffull >> bit)
+
+ uint64_t *p = (uint64_t *)bitmap + start / 64;
+
+ if ((start & ~63) == (end & ~63)) {
+ // Start and end in same 64 bits
+ *p |= RIGHT_MASK(start_bit) & LEFT_MASK(end_bit);
+ } else {
+ *p++ |= RIGHT_MASK(start_bit);
+
+ int nquads = (end - end_bit - start - 1) / 64;
+
+ while (nquads--)
+ *p++ = 0xffffffffffffffffull;
+
+ if (end_bit)
+ *p |= LEFT_MASK(end_bit);
+ }
+}
+
+// Modifies the buffer and applies any reservations that we might have
+static buf_t process_reservations(hfsmount_t *hfsmp, buf_t bp, off_t offset,
+ hfs_block_alloc_flags_t flags,
+ bool always_copy)
+{
+ bool taken_copy = false;
+ void *buffer = (void *)buf_dataptr(bp);
+ const uint32_t nbytes = buf_count(bp);
+ const off_t end = offset + nbytes * 8 - 1;
+
+ for (int i = (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
+ ? HFS_LOCKED_BLOCKS : HFS_TENTATIVE_BLOCKS); i < 2; ++i) {
+ struct rl_entry *entry;
+ TAILQ_FOREACH(entry, &hfsmp->hfs_reserved_ranges[i], rl_link) {
+ uint32_t a, b;
+
+ enum rl_overlaptype overlap_type = rl_overlap(entry, offset, end);
+
+ if (overlap_type == RL_NOOVERLAP)
+ continue;
+
+ /*
+ * If always_copy is false, we only take a copy if B_LOCKED is
+ * set because ReleaseScanBitmapRange doesn't invalidate the
+ * buffer in that case.
+ */
+ if (!taken_copy && (always_copy || ISSET(buf_flags(bp), B_LOCKED))) {
+ buf_t new_bp = buf_create_shadow(bp, true, 0, NULL, NULL);
+ buf_brelse(bp);
+ bp = new_bp;
+ buf_setflags(bp, B_NOCACHE);
+ buffer = (void *)buf_dataptr(bp);
+ taken_copy = true;
+ }
+
+ switch (overlap_type) {
+ case RL_OVERLAPCONTAINSRANGE:
+ case RL_MATCHINGOVERLAP:
+ memset(buffer, 0xff, nbytes);
+ return bp;
+ case RL_OVERLAPISCONTAINED:
+ a = entry->rl_start;
+ b = entry->rl_end;
+ break;
+ case RL_OVERLAPSTARTSBEFORE:
+ a = offset;
+ b = entry->rl_end;
+ break;
+ case RL_OVERLAPENDSAFTER:
+ a = entry->rl_start;
+ b = end;
+ break;
+ case RL_NOOVERLAP:
+ __builtin_unreachable();
+ }
+
+ a -= offset;
+ b -= offset;
+
+ assert(a < buf_count(bp) * 8);
+ assert(b < buf_count(bp) * 8);
+ assert(b >= a);
+
+ // b is inclusive
+ bits_set(buffer, a, b + 1);
+ }
+ } // for (;;)
+
+ return bp;
+}
+
+/*
+;_______________________________________________________________________
+;
+; Routine: ReadBitmapBlock
+;
+; Function: Read in a bitmap block corresponding to a given allocation
+; block (bit). Return a pointer to the bitmap block.
+;
+; Inputs:
+; vcb -- Pointer to ExtendedVCB
+; bit -- Allocation block whose bitmap block is desired
+;
+; Outputs:
+; buffer -- Pointer to bitmap block corresonding to "block"
+; blockRef
+;_______________________________________________________________________
+*/
+static OSErr ReadBitmapBlock(ExtendedVCB *vcb,
+ u_int32_t bit,
+ u_int32_t **buffer,
+ uintptr_t *blockRef,
+ hfs_block_alloc_flags_t flags)
+{
+ OSErr err;
+ struct buf *bp = NULL;
+ struct vnode *vp = NULL;
+ daddr64_t block;
+ u_int32_t blockSize;
+
+ if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0);
+
+ /*
+ * volume bitmap blocks are protected by the allocation file lock
+ */
+ REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
+
+ blockSize = (u_int32_t)vcb->vcbVBMIOSize;
+ block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
+
+ /* HFS+ / HFSX */
+ if (vcb->vcbSigWord != kHFSSigWord) {
+ vp = vcb->hfs_allocation_vp; /* use allocation file vnode */
+ }
+#if CONFIG_HFS_STD
+ else {
+ /* HFS Standard */
+ vp = VCBTOHFS(vcb)->hfs_devvp; /* use device I/O vnode */
+ block += vcb->vcbVBMSt; /* map to physical block */
+ }
+#endif
+
+ err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);
+
+ if (bp) {
+ if (err) {
+ buf_brelse(bp);
+ *blockRef = 0;
+ *buffer = NULL;
+ } else {
+ if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED)) {
+ bp = process_reservations(vcb, bp, block * blockSize * 8,
+ flags, /* always_copy: */ true);
+ }
+
+ buf_setfsprivate(bp, (void *)(uintptr_t)flags);
+
+ *blockRef = (uintptr_t)bp;
+ *buffer = (u_int32_t *)buf_dataptr(bp);
+ }
+ }
+
+ if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0);
+
+ return err;
+}
+
+
+/*
+;_______________________________________________________________________
+;
+; Routine: ReadBitmapRange
+;
+; Function: Read in a range of the bitmap starting at the given offset.
+; Use the supplied size to determine the amount of I/O to generate
+; against the bitmap file. Return a pointer to the bitmap block.
+;
+; Inputs:
+; hfsmp -- Pointer to hfs mount
+; offset -- byte offset into the bitmap file
+; size -- How much I/O to generate against the bitmap file.
+;
+; Outputs:
+; buffer -- Pointer to bitmap block data corresonding to "block"
+; blockRef -- struct 'buf' pointer which MUST be released in a subsequent call.
+;_______________________________________________________________________
+*/
+static OSErr ReadBitmapRange(struct hfsmount *hfsmp, uint32_t offset,
+ uint32_t iosize, uint32_t **buffer, struct buf **blockRef)
+{
+
+ OSErr err;
+ struct buf *bp = NULL;
+ struct vnode *vp = NULL;
+ daddr64_t block;
+
+ /* This function isn't supported for HFS standard */
+ if (hfsmp->vcbSigWord != kHFSPlusSigWord) {
+ return EINVAL;
+ }
+
+ if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
+ KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_START, offset, iosize, 0, 0, 0);
+ }
+
+ /*
+ * volume bitmap blocks are protected by the allocation file lock
+ */
+ REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
+
+ vp = hfsmp->hfs_allocation_vp; /* use allocation file vnode */
+
+ /*
+ * The byte offset argument must be converted into bitmap-relative logical
+ * block numbers before using it in buf_meta_bread.
+ *
+ * buf_meta_bread (and the things it calls) will eventually try to
+ * reconstruct the byte offset into the file by multiplying the logical
+ * block number passed in below by the vcbVBMIOSize field in the mount
+ * point. So we prepare for that by converting the byte offset back into
+ * logical blocks in terms of VBMIOSize units.
+ *
+ * The amount of I/O requested and the byte offset should be computed
+ * based on the helper function in the frame that called us, so we can
+ * get away with just doing a simple divide here.
+ */
+ block = (daddr64_t)(offset / hfsmp->vcbVBMIOSize);
+
+ err = (int) buf_meta_bread(vp, block, iosize, NOCRED, &bp);
+
+ if (bp) {
+ if (err) {
+ buf_brelse(bp);
+ *blockRef = 0;
+ *buffer = NULL;
+ } else {
+ bp = process_reservations(hfsmp, bp, (offset * 8), 0,
+ /* always_copy: */ false);
+
+ *blockRef = bp;
+ *buffer = (u_int32_t *)buf_dataptr(bp);
+ }
+ }
+
+ if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
+ KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_END, err, 0, 0, 0, 0);
+ }
+
+ return err;
+}
+
+
+/*
+;_______________________________________________________________________
+;
+; Routine: ReleaseBitmapBlock
+;
+; Function: Relase a bitmap block.
+;
+; Inputs:
+; vcb
+; blockRef
+; dirty
+;_______________________________________________________________________
+*/
+static OSErr ReleaseBitmapBlock(
+ ExtendedVCB *vcb,
+ uintptr_t blockRef,
+ Boolean dirty)
+{
+ struct buf *bp = (struct buf *)blockRef;
+
+ if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0);
+
+ if (blockRef == 0) {
+ if (dirty)
+ panic("hfs: ReleaseBitmapBlock: missing bp");
+ return (0);
+ }
+
+ if (bp) {
+ if (dirty) {
+ hfs_block_alloc_flags_t flags = (uintptr_t)buf_fsprivate(bp);
+
+ if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED))
+ panic("Modified read-only bitmap buffer!");
+
+ struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+ if (hfsmp->jnl) {
+ journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
+ } else {
+ buf_bdwrite(bp);
+ }
+ } else {
+ buf_brelse(bp);
+ }
+ }
+
+ if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0);
+
+ return (0);
+}
+
+/*
+ * ReleaseScanBitmapRange
+ *
+ * This is used to release struct bufs that were created for use by
+ * bitmap scanning code. Because they may be of sizes different than the
+ * typical runtime manipulation code, we want to force them to be purged out
+ * of the buffer cache ASAP, so we'll release them differently than in the
+ * ReleaseBitmapBlock case.
+ *
+ * Additionally, because we know that we're only reading the blocks and that they
+ * should have been clean prior to reading them, we will never
+ * issue a write to them (thus dirtying them).
+ */
+
+static OSErr ReleaseScanBitmapRange(struct buf *bp ) {
+
+ if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
+ KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, 0, 0, 0, 0, 0);
+ }
+
+ if (bp) {
+ /* Mark the buffer invalid if it isn't locked, then release it */
+ if ((buf_flags(bp) & B_LOCKED) == 0) {
+ buf_markinvalid(bp);
+ }
+ buf_brelse(bp);
+ }
+
+ if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
+ KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_SCAN_BITMAP | DBG_FUNC_END, 0, 0, 0, 0, 0);
+ }
+
+ return (0);
+}
+
+/*
+ * @extent.startBlock, on input, contains a preferred block for the
+ * allocation. @extent.blockCount, on input, contains the minimum
+ * number of blocks acceptable. Upon success, the result is conveyed
+ * in @extent.
+ */
+static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
+ HFSPlusExtentDescriptor *extent,
+ uint32_t max_blocks,
+ hfs_block_alloc_flags_t flags)
+{
+ OSErr err = dskFulErr;
+
+ const uint32_t min_blocks = extent->blockCount;
+
+ // It's > rather than >= because the last block is always reserved
+ if (extent->startBlock > 0 && extent->startBlock < hfsmp->allocLimit
+ && hfsmp->allocLimit - extent->startBlock > max_blocks) {
+ /*
+ * This is just checking to see if there's an extent starting
+ * at extent->startBlock that will suit. We only check for
+ * @max_blocks here; @min_blocks is ignored.
+ */
+
+ err = BlockFindContiguous(hfsmp, extent->startBlock, extent->startBlock + max_blocks,
+ max_blocks, max_blocks, true, true,
+ &extent->startBlock, &extent->blockCount, flags);
+
+ if (err != dskFulErr)
+ return err;
+ }
+
+ err = BlockFindKnown(hfsmp, max_blocks, &extent->startBlock,
+ &extent->blockCount);
+
+ if (!err) {
+ if (extent->blockCount >= max_blocks)
+ return 0;
+ } else if (err != dskFulErr)
+ return err;
+
+ // Try a more exhaustive search
+ return BlockFindContiguous(hfsmp, 1, hfsmp->allocLimit,
+ min_blocks, max_blocks,
+ /* useMetaZone: */ true,
+ /* trustSummary: */ true,
+ &extent->startBlock, &extent->blockCount, flags);
+}
+
+/*
+_______________________________________________________________________
+
+Routine: BlockFindContig
+
+Function: Find a contiguous group of allocation blocks. If the
+ minimum cannot be satisfied, nothing is returned. The
+ caller guarantees that there are enough free blocks
+ (though they may not be contiguous, in which case this
+ call will fail).
+
+Inputs:
+ vcb Pointer to volume where space is to be allocated
+ startingBlock Preferred first block for allocation
+ minBlocks Minimum number of contiguous blocks to allocate
+ maxBlocks Maximum number of contiguous blocks to allocate
+ flags
+
+Outputs:
+ actualStartBlock First block of range allocated, or 0 if error
+ actualNumBlocks Number of blocks allocated, or 0 if error
+_______________________________________________________________________
+*/
+static OSErr BlockFindContig(
+ ExtendedVCB *vcb,
+ u_int32_t startingBlock,
+ u_int32_t minBlocks,
+ u_int32_t maxBlocks,
+ hfs_block_alloc_flags_t flags,
+ u_int32_t *actualStartBlock,
+ u_int32_t *actualNumBlocks)
+{
+ OSErr retval = noErr;
+ uint32_t currentStart = startingBlock;
+
+ uint32_t foundStart = 0; // values to emit to caller
+ uint32_t foundCount = 0;
+
+ uint32_t collision_start = 0; // if we have to re-allocate a recently deleted extent, use this
+ uint32_t collision_count = 0;
+
+ int err;
+ int allowReuse = (flags & HFS_ALLOC_FLUSHTXN);
+ Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
+
+ int recently_deleted = 0;
+ struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+ if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
+
+ while ((retval == noErr) && (foundStart == 0) && (foundCount == 0)) {
+
+ /* Try and find something that works. */
+
+ /*
+ * NOTE: If the only contiguous free extent of at least minBlocks
+ * crosses startingBlock (i.e. starts before, ends after), then we
+ * won't find it. Earlier versions *did* find this case by letting
+ * the second search look past startingBlock by minBlocks. But
+ * with the free extent cache, this can lead to duplicate entries
+ * in the cache, causing the same blocks to be allocated twice.
+ */
+ retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks,
+ maxBlocks, useMetaZone, true, &foundStart, &foundCount, flags);
+
+ if (retval == dskFulErr && currentStart != 0) {
+ /*
+ * We constrain the endingBlock so we don't bother looking for ranges
+ * that would overlap those found in the previous call, if the summary bitmap
+ * is not on for this volume. If it is, then we assume that it was not trust
+ * -worthy and do a full scan.
+ */
+ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+ retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks,
+ maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
+ }
+ else {
+ retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks,
+ maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
+ }
+ }
+
+ if (retval != noErr) {
+ goto bailout;
+ }
+
+ /* Do we overlap with the recently found collision extent? */
+ if (collision_start) {
+ if (extents_overlap (foundStart, foundCount, collision_start, collision_count)) {
+ /*
+ * We've looped around, and the only thing we could use was the collision extent.
+ * Since we are allowed to use it, go ahead and do so now.
+ */
+ if(allowReuse) {
+ /*
+ * then we couldn't find anything except values which might have been
+ * recently deallocated. just return our cached value if we are allowed to.
+ */
+ foundStart = collision_start;
+ foundCount = collision_count;
+ goto bailout;
+ }
+ else {
+ /* Otherwise, we looped around and couldn't find anything that wouldn't require a journal flush. */
+ retval = dskFulErr;
+ goto bailout;
+ }
+ }
+ }
+
+ /* OK, we know we must not have collided . See if this one is recently deleted */
+ if (hfsmp->jnl) {
+ recently_deleted = 0;
+ uint32_t nextStart;
+ err = CheckUnmappedBytes (hfsmp, (uint64_t)foundStart,
+ (uint64_t) foundCount, &recently_deleted, &nextStart);
+ if (err == 0) {
+ if(recently_deleted != 0) {
+ /*
+ * these blocks were recently deleted/deallocated. Cache the extent, but
+ * but keep searching to see if we can find one that won't collide here.
+ */
+ if (collision_start == 0) {
+ collision_start = foundStart;
+ collision_count = foundCount;
+ }
+ recently_deleted = 0;
+
+ /*
+ * advance currentStart to the point just past the overlap we just found. Note that
+ * we will automatically loop around to start of the bitmap as needed.
+ */
+ currentStart = nextStart;
+ /* Unset foundStart/Count to allow it to loop around again. */
+ foundStart = 0;
+ foundCount = 0;
+ }
+ }
+ } // end jnl/deleted case
+
+ /*
+ * If we found something good, we'd break out of the loop at the top; foundCount
+ * and foundStart should be set.
+ */
+
+ } // end while loop.
+
+bailout:
+
+ if (retval == noErr) {
+ *actualStartBlock = foundStart;
+ *actualNumBlocks = foundCount;
+ }
+
+ if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_END, foundStart, foundCount, retval, 0, 0);
+
+ return retval;
+
+}
+
+
+/*
+_______________________________________________________________________
+
+Routine: BlockFindAny
+
+Function: Find one or more allocation blocks and may return fewer than
+ requested. The caller guarantees that there is at least one
+ free block.
+
+Inputs:
+ vcb Pointer to volume where space is to be allocated
+ startingBlock Preferred first block for allocation
+ endingBlock Last block to check + 1
+ maxBlocks Maximum number of contiguous blocks to allocate
+ useMetaZone
+
+Outputs:
+ actualStartBlock First block of range allocated, or 0 if error
+ actualNumBlocks Number of blocks allocated, or 0 if error
+_______________________________________________________________________
+*/
+
+static OSErr BlockFindAny(
+ ExtendedVCB *vcb,
+ u_int32_t startingBlock,
+ register u_int32_t endingBlock,
+ u_int32_t maxBlocks,
+ hfs_block_alloc_flags_t flags,
+ Boolean trustSummary,
+ u_int32_t *actualStartBlock,
+ u_int32_t *actualNumBlocks)
+{
+
+ /*
+ * If it is enabled, scan through the summary table to find the first free block.
+ *
+ * If it reports that there are not any free blocks, we could have a false
+ * positive, so in that case, use the input arguments as a pass through.
+ */
+ uint32_t start_blk = startingBlock;
+ uint32_t end_blk = endingBlock;
+ struct hfsmount *hfsmp;
+ OSErr err;
+
+ hfsmp = (struct hfsmount*)vcb;
+ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+ uint32_t suggested_start;
+
+ /*
+ * If the summary table is enabled, scan through it to find the first free
+ * block. If there was an error, or we couldn't find anything free in the
+ * summary table, then just leave the start_blk fields unmodified. We wouldn't
+ * have gotten to this point if the mount point made it look like there was possibly
+ * free space in the FS.
+ */
+ err = hfs_find_summary_free (hfsmp, startingBlock, &suggested_start);
+ if (err == 0) {
+ start_blk = suggested_start;
+ }
+ else {
+ /* Differentiate between ENOSPC and a more esoteric error in the above call. */
+ if ((err == ENOSPC) && (trustSummary)) {
+ /*
+ * The 'trustSummary' argument is for doing a full scan if we really
+ * really, need the space and we think it's somewhere but can't find it in the
+ * summary table. If it's true, then we trust the summary table and return
+ * dskFulErr if we couldn't find it above.
+ */
+ return dskFulErr;
+ }
+ /*
+ * If either trustSummary was false or we got a different errno, then we
+ * want to fall through to the real bitmap single i/o code...
+ */
+ }
+ }
+
+ err = BlockFindAnyBitmap(vcb, start_blk, end_blk, maxBlocks,
+ flags, actualStartBlock, actualNumBlocks);
+
+ return err;
+}
+
+
+/*
+ * BlockFindAnyBitmap finds free ranges by scanning the bitmap to
+ * figure out where the free allocation blocks are. Inputs and
+ * outputs are the same as for BlockFindAny.
+ */
+
+static OSErr BlockFindAnyBitmap(
+ ExtendedVCB *vcb,
+ u_int32_t startingBlock,
+ register u_int32_t endingBlock,
+ u_int32_t maxBlocks,
+ hfs_block_alloc_flags_t flags,
+ u_int32_t *actualStartBlock,
+ u_int32_t *actualNumBlocks)
+{
+ OSErr err;
+ register u_int32_t block = 0; // current block number
+ register u_int32_t currentWord; // Pointer to current word within bitmap block
+ register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
+ register u_int32_t wordsLeft; // Number of words left in this bitmap block
+ u_int32_t *buffer = NULL;
+ u_int32_t *currCache = NULL;
+ uintptr_t blockRef = 0;
+ u_int32_t bitsPerBlock;
+ u_int32_t wordsPerBlock;
+ Boolean dirty = false;
+ struct hfsmount *hfsmp = VCBTOHFS(vcb);
+ Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
+ Boolean forceFlush = (flags & HFS_ALLOC_FLUSHTXN);
+
+ if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);
+
+restartSearchAny:
+
+ /*
+ * When we're skipping the metadata zone and the start/end
+ * range overlaps with the metadata zone then adjust the
+ * start to be outside of the metadata zone. If the range
+ * is entirely inside the metadata zone then we can deny the
+ * request (dskFulErr).
+ */
+ if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
+ if (startingBlock <= vcb->hfs_metazone_end) {
+ if (endingBlock > (vcb->hfs_metazone_end + 2))
+ startingBlock = vcb->hfs_metazone_end + 1;
+ else {
+ err = dskFulErr;
+ goto Exit;
+ }
+ }
+ }
+
+ // Since this routine doesn't wrap around
+ if (maxBlocks > (endingBlock - startingBlock)) {
+ maxBlocks = endingBlock - startingBlock;
+ }
+
+ //
+ // Pre-read the first bitmap block
+ //
+ err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef, flags);
+ if (err != noErr) goto Exit;
+ buffer = currCache;
+
+ //
+ // Set up the current position within the block
+ //
+ {
+ u_int32_t wordIndexInBlock;
+
+ bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
+ wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+
+ wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+ buffer += wordIndexInBlock;
+ wordsLeft = wordsPerBlock - wordIndexInBlock;
+ currentWord = SWAP_BE32 (*buffer);
+ bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
+ }
+
+ /*
+ * While loop 1:
+ * Find the first unallocated block starting at 'block'
+ */
+ uint32_t summary_block_scan = 0;
+
+ block=startingBlock;
+ while (block < endingBlock) {
+ if ((currentWord & bitMask) == 0)
+ break;
+
+ // Next bit
+ ++block;
+ bitMask >>= 1;
+ if (bitMask == 0) {
+ // Next word
+ bitMask = kHighBitInWordMask;
+ ++buffer;
+
+ if (--wordsLeft == 0) {
+ // Next block
+ buffer = currCache = NULL;
+ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+ /*
+ * If summary_block_scan is non-zero, then we must have
+ * pulled a bitmap file block into core, and scanned through
+ * the entire thing. Because we're in this loop, we are
+ * implicitly trusting that the bitmap didn't have any knowledge
+ * about this particular block. As a result, update the bitmap
+ * (lazily, now that we've scanned it) with our findings that
+ * this particular block is completely used up.
+ */
+ if (summary_block_scan != 0) {
+ uint32_t summary_bit;
+ (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
+ hfs_set_summary (hfsmp, summary_bit, 1);
+ summary_block_scan = 0;
+ }
+ }
+
+ err = ReleaseBitmapBlock(vcb, blockRef, false);
+ if (err != noErr) goto Exit;
+
+ /*
+ * Skip over metadata blocks.
+ */
+ if (!useMetaZone) {
+ block = NextBitmapBlock(vcb, block);
+ }
+ if (block >= endingBlock) {
+ err = dskFulErr;
+ goto Exit;
+ }
+
+ err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
+ if (err != noErr) goto Exit;
+ buffer = currCache;
+ summary_block_scan = block;
+ wordsLeft = wordsPerBlock;
+ }
+ currentWord = SWAP_BE32 (*buffer);
+ }
+ }
+
+ // Did we get to the end of the bitmap before finding a free block?
+ // If so, then couldn't allocate anything.
+ if (block >= endingBlock) {
+ err = dskFulErr;
+ goto Exit;
+ }
+
+
+ /*
+ * Don't move forward just yet. Verify that either one of the following
+ * two conditions is true:
+ * 1) journaling is not enabled
+ * 2) block is not currently on any pending TRIM list.
+ */
+ if (hfsmp->jnl != NULL && (forceFlush == false)) {
+ int recently_deleted = 0;
+ uint32_t nextblk;
+ err = CheckUnmappedBytes (hfsmp, (uint64_t) block, 1, &recently_deleted, &nextblk);
+ if ((err == 0) && (recently_deleted)) {
+
+ /* release the bitmap block & unset currCache. we may jump past it. */
+ err = ReleaseBitmapBlock(vcb, blockRef, false);
+ currCache = NULL;
+ if (err != noErr) {
+ goto Exit;
+ }
+ /* set our start to nextblk, and re-do the search. */
+ startingBlock = nextblk;
+ goto restartSearchAny;
+ }
+ }
+
+
+ // Return the first block in the allocated range
+ *actualStartBlock = block;
+ dirty = true;
+
+ // If we could get the desired number of blocks before hitting endingBlock,
+ // then adjust endingBlock so we won't keep looking. Ideally, the comparison
+ // would be (block + maxBlocks) < endingBlock, but that could overflow. The
+ // comparison below yields identical results, but without overflow.
+ if (block < (endingBlock-maxBlocks)) {
+ endingBlock = block + maxBlocks; // if we get this far, we've found enough
+ }
+
+ /*
+ * While loop 2:
+ * Scan the bitmap, starting at 'currentWord' in the current
+ * bitmap block. Continue iterating through the bitmap until
+ * either we hit an allocated block, or until we have accumuluated
+ * maxBlocks worth of bitmap.
+ */
+
+ /* Continue until we see an allocated block */
+ while ((currentWord & bitMask) == 0) {
+ // Move to the next block. If no more, then exit.
+ ++block;
+ if (block == endingBlock) {
+ break;
+ }
+
+ // Next bit
+ bitMask >>= 1;
+ if (bitMask == 0) {
+ // Next word
+ bitMask = kHighBitInWordMask;
+ ++buffer;
+
+ if (--wordsLeft == 0) {
+ // Next block
+ buffer = currCache = NULL;
+
+ /* We're only reading the bitmap here, so mark it as clean */
+ err = ReleaseBitmapBlock(vcb, blockRef, false);
+ if (err != noErr) {
+ goto Exit;
+ }
+
+ /*
+ * Skip over metadata blocks.
+ */
+ if (!useMetaZone) {
+ u_int32_t nextBlock;
+ nextBlock = NextBitmapBlock(vcb, block);
+ if (nextBlock != block) {
+ goto Exit; /* allocation gap, so stop */
+ }
+ }
+
+ if (block >= endingBlock) {
+ goto Exit;
+ }
+
+ err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
+ if (err != noErr) {
+ goto Exit;
+ }
+ buffer = currCache;
+ wordsLeft = wordsPerBlock;
+ }
+ currentWord = SWAP_BE32 (*buffer);
+ }
+ }
+
+Exit:
+ if (currCache) {
+ /* Release the bitmap reference prior to marking bits in-use */
+ (void) ReleaseBitmapBlock(vcb, blockRef, false);
+ currCache = NULL;
+ }
+
+ if (err == noErr) {
+ *actualNumBlocks = block - *actualStartBlock;
+
+ // sanity check
+ if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
+ panic("hfs: BlockFindAnyBitmap: allocation overflow on \"%s\"", vcb->vcbVN);
+ }
+ }
+ else {
+ *actualStartBlock = 0;
+ *actualNumBlocks = 0;
+ }
+
+ if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
+
+ return err;
+}
+
+
+/*
+_______________________________________________________________________
+
+Routine: BlockFindKnown
+
+Function: Return a potential extent from the free extent cache. The
+ returned extent *must* be marked allocated and removed
+ from the cache by the *caller*.
+
+Inputs:
+ vcb Pointer to volume where space is to be allocated
+ maxBlocks Maximum number of contiguous blocks to allocate
+
+Outputs:
+ actualStartBlock First block of range allocated, or 0 if error
+ actualNumBlocks Number of blocks allocated, or 0 if error
+
+Returns:
+ dskFulErr Free extent cache is empty
+_______________________________________________________________________
+*/
+
+static OSErr BlockFindKnown(
+ ExtendedVCB *vcb,
+ u_int32_t maxBlocks,
+ u_int32_t *actualStartBlock,
+ u_int32_t *actualNumBlocks)
+{
+ OSErr err;
+ u_int32_t foundBlocks;
+ struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+ if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
+
+ hfs_lock_mount (hfsmp);
+ lck_spin_lock(&vcb->vcbFreeExtLock);
+ if ( vcb->vcbFreeExtCnt == 0 ||
+ vcb->vcbFreeExt[0].blockCount == 0) {
+ lck_spin_unlock(&vcb->vcbFreeExtLock);
+ hfs_unlock_mount(hfsmp);
+ if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
+ return dskFulErr;
+ }
+ lck_spin_unlock(&vcb->vcbFreeExtLock);
+ hfs_unlock_mount(hfsmp);
+
+ lck_spin_lock(&vcb->vcbFreeExtLock);
+
+ // Just grab up to maxBlocks of the first (largest) free exent.
+ *actualStartBlock = vcb->vcbFreeExt[0].startBlock;
+ foundBlocks = vcb->vcbFreeExt[0].blockCount;
+ if (foundBlocks > maxBlocks)
+ foundBlocks = maxBlocks;
+ *actualNumBlocks = foundBlocks;
+
+ lck_spin_unlock(&vcb->vcbFreeExtLock);
+
+ // sanity check
+ if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
+ {
+ printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
+ hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
+ err = EIO;
+ } else
+ err = 0;
+
+ if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
+
+ return err;
+}
+
+/*
+ * BlockMarkAllocated
+ *
+ * This is a wrapper function around the internal calls which will actually mark the blocks
+ * as in-use. It will mark the blocks in the red-black tree if appropriate. We need to do
+ * this logic here to avoid callers having to deal with whether or not the red-black tree
+ * is enabled.
+ */
+
+OSErr BlockMarkAllocated(
+ ExtendedVCB *vcb,
+ u_int32_t startingBlock,
+ register u_int32_t numBlocks)
+{
+ struct hfsmount *hfsmp;
+
+ hfsmp = VCBTOHFS(vcb);
+
+ return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks, 0);
+
+}
+
+
+/*
+_______________________________________________________________________
+
+Routine: BlockMarkAllocatedInternal
+
+Function: Mark a contiguous group of blocks as allocated (set in the
+ bitmap). It assumes those bits are currently marked
+ deallocated (clear in the bitmap). Note that this function
+ must be called regardless of whether or not the bitmap or
+ tree-based allocator is used, as all allocations must correctly
+ be marked on-disk. If the tree-based approach is running, then
+ this will be done before the node is removed from the tree.
+
+Inputs:
+ vcb Pointer to volume where space is to be allocated
+ startingBlock First block number to mark as allocated
+ numBlocks Number of blocks to mark as allocated
+_______________________________________________________________________
+*/
+static
+OSErr BlockMarkAllocatedInternal (
+ ExtendedVCB *vcb,
+ u_int32_t startingBlock,
+ u_int32_t numBlocks,
+ hfs_block_alloc_flags_t flags)
+{
+ OSErr err;
+ register u_int32_t *currentWord; // Pointer to current word within bitmap block
+ register u_int32_t wordsLeft; // Number of words left in this bitmap block
+ register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
+ u_int32_t firstBit; // Bit index within word of first bit to allocate
+ u_int32_t numBits; // Number of bits in word to allocate
+ u_int32_t *buffer = NULL;
+ uintptr_t blockRef = 0;
+ u_int32_t bitsPerBlock;
+ u_int32_t wordsPerBlock;
+ // XXXdbg
+ struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+ if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, flags, 0, 0);
+
+#if DEBUG
+
+ struct rl_entry *range;
+ TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS], rl_link) {
+ assert(rl_overlap(range, startingBlock,
+ startingBlock + numBlocks - 1) == RL_NOOVERLAP);
+ }
+
+#endif
+
+ int force_flush = 0;
+ /*
+ * Since we are about to mark these bits as in-use
+ * in the bitmap, decide if we need to alert the caller
+ * that a journal flush might be appropriate. It's safe to
+ * poke at the journal pointer here since we MUST have
+ * called start_transaction by the time this function is invoked.
+ * If the journal is enabled, then it will have taken the requisite
+ * journal locks. If it is not enabled, then we have taken
+ * a shared lock on the global lock.
+ */
+ if (hfsmp->jnl) {
+ uint32_t ignore;
+ err = CheckUnmappedBytes (hfsmp, (uint64_t) startingBlock, (uint64_t)numBlocks, &force_flush, &ignore);
+ if ((err == 0) && (force_flush)) {
+ journal_request_immediate_flush (hfsmp->jnl);
+ }
+ }
+
+ hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
+
+ /*
+ * Don't make changes to the disk if we're just reserving. Note that
+ * we could do better in the tentative case because we could, in theory,
+ * avoid the journal flush above. However, that would mean that we would
+ * need to catch the callback to stop it incorrectly addding the extent
+ * to our free cache.
+ */
+ if (ISSET(flags, HFS_ALLOC_LOCKED | HFS_ALLOC_TENTATIVE)) {
+ err = 0;
+ goto Exit;
+ }
+
+ //
+ // Pre-read the bitmap block containing the first word of allocation
+ //
+
+ err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+ HFS_ALLOC_IGNORE_RESERVED);
+ if (err != noErr) goto Exit;
+ //
+ // Initialize currentWord, and wordsLeft.
+ //
+ {
+ u_int32_t wordIndexInBlock;
+
+ bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
+ wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+
+ wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+ currentWord = buffer + wordIndexInBlock;
+ wordsLeft = wordsPerBlock - wordIndexInBlock;
+ }
+
+ // XXXdbg
+ if (hfsmp->jnl) {
+ journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+ }
+
+ //
+ // If the first block to allocate doesn't start on a word
+ // boundary in the bitmap, then treat that first word
+ // specially.
+ //
+
+ firstBit = startingBlock % kBitsPerWord;
+ if (firstBit != 0) {
+ bitMask = kAllBitsSetInWord >> firstBit; // turn off all bits before firstBit
+ numBits = kBitsPerWord - firstBit; // number of remaining bits in this word
+ if (numBits > numBlocks) {
+ numBits = numBlocks; // entire allocation is inside this one word
+ bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); // turn off bits after last
+ }
+#if DEBUG
+ if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+ panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
+ }
+#endif
+ *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
+ numBlocks -= numBits; // adjust number of blocks left to allocate
+
+ ++currentWord; // move to next word
+ --wordsLeft; // one less word left in this block
+ }
+
+ //
+ // Allocate whole words (32 blocks) at a time.
+ //
+
+ bitMask = kAllBitsSetInWord; // put this in a register for 68K
+ while (numBlocks >= kBitsPerWord) {
+ if (wordsLeft == 0) {
+ // Read in the next bitmap block
+ startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
+
+ buffer = NULL;
+ err = ReleaseBitmapBlock(vcb, blockRef, true);
+ if (err != noErr) goto Exit;
+
+ err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+ HFS_ALLOC_IGNORE_RESERVED);
+ if (err != noErr) goto Exit;
+
+ // XXXdbg
+ if (hfsmp->jnl) {
+ journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+ }
+
+ // Readjust currentWord and wordsLeft
+ currentWord = buffer;
+ wordsLeft = wordsPerBlock;
+ }
+#if DEBUG
+ if (*currentWord != 0) {
+ panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
+ }
+#endif
+ *currentWord = SWAP_BE32 (bitMask);
+ numBlocks -= kBitsPerWord;
+
+ ++currentWord; // move to next word
+ --wordsLeft; // one less word left in this block
+ }
+
+ //
+ // Allocate any remaining blocks.
+ //
+
+ if (numBlocks != 0) {
+ bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
+ if (wordsLeft == 0) {
+ // Read in the next bitmap block
+ startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
+
+ buffer = NULL;
+ err = ReleaseBitmapBlock(vcb, blockRef, true);
+ if (err != noErr) goto Exit;
+
+ err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+ HFS_ALLOC_IGNORE_RESERVED);
+ if (err != noErr) goto Exit;
+
+ // XXXdbg
+ if (hfsmp->jnl) {
+ journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+ }
+
+ // Readjust currentWord and wordsLeft
+ currentWord = buffer;
+ wordsLeft = wordsPerBlock;
+ }
+#if DEBUG
+ if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+ panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
+ }
+#endif
+ *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
+
+ // No need to update currentWord or wordsLeft
+ }
+
+Exit:
+
+ if (buffer)
+ (void)ReleaseBitmapBlock(vcb, blockRef, true);
+
+ if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
+
+ return err;
+}
+
+
+/*
+ * BlockMarkFree
+ *
+ * This is a wrapper function around the internal calls which will actually mark the blocks
+ * as freed. It will mark the blocks in the red-black tree if appropriate. We need to do
+ * this logic here to avoid callers having to deal with whether or not the red-black tree
+ * is enabled.
+ *
+ */
+OSErr BlockMarkFree(
+ ExtendedVCB *vcb,
+ u_int32_t startingBlock,
+ register u_int32_t numBlocks)
+{
+ struct hfsmount *hfsmp;
+ hfsmp = VCBTOHFS(vcb);
+
+ return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
+}
+
+
+/*
+ * BlockMarkFreeUnused
+ *
+ * Scan the bitmap block beyond end of current file system for bits
+ * that are marked as used. If any of the bits are marked as used,
+ * this function marks them free.
+ *
+ * Note: This was specifically written to mark all bits beyond
+ * end of current file system during hfs_extendfs(), which makes
+ * sure that all the new blocks added to the file system are
+ * marked as free. We expect that all the blocks beyond end of
+ * current file system are always marked as free, but there might
+ * be cases where are marked as used. This function assumes that
+ * the number of blocks marked as used incorrectly are relatively
+ * small, otherwise this can overflow journal transaction size
+ * on certain file system configurations (example, large unused
+ * bitmap with relatively small journal).
+ *
+ * Input:
+ * startingBlock: First block of the range to mark unused
+ * numBlocks: Number of blocks in the range to mark unused
+ *
+ * Returns: zero on success, non-zero on error.
+ */
+OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t numBlocks)
+{
+ int error = 0;
+ struct hfsmount *hfsmp = VCBTOHFS(vcb);
+ u_int32_t curNumBlocks;
+ u_int32_t bitsPerBlock;
+ u_int32_t lastBit;
+
+ /* Use the optimal bitmap I/O size instead of bitmap block size */
+ bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
+
+ /*
+ * First clear any non bitmap allocation block aligned bits
+ *
+ * Calculate the first bit in the bitmap block next to
+ * the bitmap block containing the bit for startingBlock.
+ * Using this value, we calculate the total number of
+ * bits to be marked unused from startingBlock to the
+ * end of bitmap block containing startingBlock.
+ */
+ lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
+ curNumBlocks = lastBit - startingBlock;
+ if (curNumBlocks > numBlocks) {
+ curNumBlocks = numBlocks;
+ }
+ error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
+ if (error) {
+ return error;
+ }
+ startingBlock += curNumBlocks;
+ numBlocks -= curNumBlocks;
+
+ /*
+ * Check a full bitmap block for any 'used' bit. If any bit is used,
+ * mark all the bits only in that bitmap block as free. This ensures
+ * that we do not write unmodified bitmap blocks and do not
+ * overwhelm the journal.
+ *
+ * The code starts by checking full bitmap block at a time, and
+ * marks entire bitmap block as free only if any bit in that bitmap
+ * block is marked as used. In the end, it handles the last bitmap
+ * block which might be partially full by only checking till the
+ * caller-specified last bit and if any bit is set, only mark that
+ * range as free.
+ */
+ while (numBlocks) {
+ if (numBlocks >= bitsPerBlock) {
+ curNumBlocks = bitsPerBlock;
+ } else {
+ curNumBlocks = numBlocks;
+ }
+ if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
+ error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
+ if (error) {
+ return error;
+ }
+ }
+ startingBlock += curNumBlocks;
+ numBlocks -= curNumBlocks;
+ }
+
+ return error;
+}
+
+/*
+_______________________________________________________________________
+
+Routine: BlockMarkFreeInternal
+
+Function: Mark a contiguous group of blocks as free (clear in the
+ bitmap). It assumes those bits are currently marked
+ allocated (set in the bitmap).
+
+Inputs:
+ vcb Pointer to volume where space is to be freed
+ startingBlock First block number to mark as freed
+ numBlocks Number of blocks to mark as freed
+ do_validate If true, validate that the blocks being
+ deallocated to check if they are within totalBlocks
+ for current volume and whether they were allocated
+ before they are marked free.
+_______________________________________________________________________
+*/
+static
+OSErr BlockMarkFreeInternal(
+ ExtendedVCB *vcb,
+ u_int32_t startingBlock_in,
+ register u_int32_t numBlocks_in,
+ Boolean do_validate)
+{
+ OSErr err;
+ u_int32_t startingBlock = startingBlock_in;
+ u_int32_t numBlocks = numBlocks_in;
+ uint32_t unmapStart = startingBlock_in;
+ uint32_t unmapCount = numBlocks_in;
+ uint32_t wordIndexInBlock;
+ u_int32_t *currentWord; // Pointer to current word within bitmap block
+ u_int32_t wordsLeft; // Number of words left in this bitmap block
+ u_int32_t bitMask; // Word with given bits already set (ready to OR in)
+ u_int32_t currentBit; // Bit index within word of current bit to allocate
+ u_int32_t numBits; // Number of bits in word to allocate
+ u_int32_t *buffer = NULL;
+ uintptr_t blockRef = 0;
+ u_int32_t bitsPerBlock;
+ u_int32_t wordsPerBlock;
+ // XXXdbg
+ struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+ if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);
+
+ /*
+ * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
+ * need to be able to free blocks being relocated during hfs_truncatefs.
+ */
+ if ((do_validate == true) &&
+ (startingBlock + numBlocks > vcb->totalBlocks)) {
+#if ALLOC_DEBUG || DEBUG
+ panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
+ __builtin_unreachable();
+#else
+ printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
+ hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
+ err = EIO;
+ goto Exit;
+#endif
+ }
+
+ //
+ // Pre-read the bitmap block containing the first word of allocation
+ //
+
+ err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+ HFS_ALLOC_IGNORE_RESERVED);
+ if (err != noErr) goto Exit;
+ // XXXdbg
+ if (hfsmp->jnl) {
+ journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+ }
+
+ uint32_t min_unmap = 0, max_unmap = UINT32_MAX;
+
+ // Work out the bounds of any unmap we can send down
+ struct rl_entry *range;
+ for (int i = 0; i < 2; ++i) {
+ TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
+ if (range->rl_start < startingBlock
+ && range->rl_end >= min_unmap) {
+ min_unmap = range->rl_end + 1;
+ }
+ if (range->rl_end >= startingBlock + numBlocks
+ && range->rl_start < max_unmap) {
+ max_unmap = range->rl_start;
+ }
+ }
+ }
+
+ //
+ // Figure out how many bits and words per bitmap block.
+ //
+ bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
+ wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+ wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+
+ //
+ // Look for a range of free blocks immediately before startingBlock
+ // (up to the start of the current bitmap block). Set unmapStart to
+ // the first free block.
+ //
+ currentWord = buffer + wordIndexInBlock;
+ currentBit = startingBlock % kBitsPerWord;
+ bitMask = kHighBitInWordMask >> currentBit;
+ while (unmapStart > min_unmap) {
+ // Move currentWord/bitMask back by one bit
+ bitMask <<= 1;
+ if (bitMask == 0) {
+ if (--currentWord < buffer)
+ break;
+ bitMask = kLowBitInWordMask;
+ }
+
+ if (*currentWord & SWAP_BE32(bitMask))
+ break; // Found an allocated block. Stop searching.
+ --unmapStart;
+ ++unmapCount;
+ }
+
+ //
+ // If the first block to free doesn't start on a word
+ // boundary in the bitmap, then treat that first word
+ // specially.
+ //
+
+ currentWord = buffer + wordIndexInBlock;
+ wordsLeft = wordsPerBlock - wordIndexInBlock;
+ currentBit = startingBlock % kBitsPerWord;
+ if (currentBit != 0) {
+ bitMask = kAllBitsSetInWord >> currentBit; // turn off all bits before currentBit
+ numBits = kBitsPerWord - currentBit; // number of remaining bits in this word
+ if (numBits > numBlocks) {
+ numBits = numBlocks; // entire allocation is inside this one word
+ bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits)); // turn off bits after last
+ }
+ if ((do_validate == true) &&
+ (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
+ goto Corruption;
+ }
+ *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
+ numBlocks -= numBits; // adjust number of blocks left to free
+
+ ++currentWord; // move to next word
+ --wordsLeft; // one less word left in this block
+ }
+
+ //
+ // Free whole words (32 blocks) at a time.
+ //
+
+ while (numBlocks >= kBitsPerWord) {
+ if (wordsLeft == 0) {
// Read in the next bitmap block
startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
-
+
+ buffer = NULL;
+ err = ReleaseBitmapBlock(vcb, blockRef, true);
+ if (err != noErr) goto Exit;
+
+ err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+ HFS_ALLOC_IGNORE_RESERVED);
+ if (err != noErr) goto Exit;
+
+ // XXXdbg
+ if (hfsmp->jnl) {
+ journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+ }
+
+ // Readjust currentWord and wordsLeft
+ currentWord = buffer;
+ wordsLeft = wordsPerBlock;
+ }
+ if ((do_validate == true) &&
+ (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
+ goto Corruption;
+ }
+ *currentWord = 0; // clear the entire word
+ numBlocks -= kBitsPerWord;
+
+ ++currentWord; // move to next word
+ --wordsLeft; // one less word left in this block
+ }
+
+ //
+ // Free any remaining blocks.
+ //
+
+ if (numBlocks != 0) {
+ bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
+ if (wordsLeft == 0) {
+ // Read in the next bitmap block
+ startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
+
+ buffer = NULL;
+ err = ReleaseBitmapBlock(vcb, blockRef, true);
+ if (err != noErr) goto Exit;
+
+ err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+ HFS_ALLOC_IGNORE_RESERVED);
+ if (err != noErr) goto Exit;
+
+ // XXXdbg
+ if (hfsmp->jnl) {
+ journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+ }
+
+ // Readjust currentWord and wordsLeft
+ currentWord = buffer;
+ wordsLeft = wordsPerBlock;
+ }
+ if ((do_validate == true) &&
+ (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
+ goto Corruption;
+ }
+ *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
+
+ // No need to update currentWord or wordsLeft
+ }
+
+ //
+ // Look for a range of free blocks immediately after the range we just freed
+ // (up to the end of the current bitmap block).
+ //
+ wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord;
+ wordsLeft = wordsPerBlock - wordIndexInBlock;
+ currentWord = buffer + wordIndexInBlock;
+ currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord;
+ bitMask = kHighBitInWordMask >> currentBit;
+ while (unmapStart + unmapCount < max_unmap) {
+ // Move currentWord/bitMask/wordsLeft forward one bit
+ bitMask >>= 1;
+ if (bitMask == 0) {
+ if (--wordsLeft == 0)
+ break;
+ ++currentWord;
+ bitMask = kHighBitInWordMask;
+ }
+
+ if (*currentWord & SWAP_BE32(bitMask))
+ break; // Found an allocated block. Stop searching.
+ ++unmapCount;
+ }
+
+Exit:
+
+ if (buffer)
+ (void)ReleaseBitmapBlock(vcb, blockRef, true);
+
+ if (err == noErr) {
+ hfs_unmap_free_extent(vcb, unmapStart, unmapCount);
+ }
+
+ if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
+
+ return err;
+
+Corruption:
+#if DEBUG
+ panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
+ __builtin_unreachable();
+#else
+ printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s <%u, %u>\n",
+ vcb->vcbVN, startingBlock_in, numBlocks_in);
+ hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
+ err = EIO;
+ goto Exit;
+#endif
+}
+
+
+/*
+_______________________________________________________________________
+
+Routine: BlockFindContiguous
+
+Function: Find a contiguous range of blocks that are free (bits
+ clear in the bitmap). If a contiguous range of the
+ minimum size can't be found, an error will be returned.
+ This is only needed to support the bitmap-scanning logic,
+ as the red-black tree should be able to do this by internally
+ searching its tree.
+
+Inputs:
+ vcb Pointer to volume where space is to be allocated
+ startingBlock Preferred first block of range
+ endingBlock Last possible block in range + 1
+ minBlocks Minimum number of blocks needed. Must be > 0.
+ maxBlocks Maximum (ideal) number of blocks desired
+ useMetaZone OK to dip into metadata allocation zone
+
+Outputs:
+ actualStartBlock First block of range found, or 0 if error
+ actualNumBlocks Number of blocks found, or 0 if error
+
+Returns:
+ noErr Found at least minBlocks contiguous
+ dskFulErr No contiguous space found, or all less than minBlocks
+_______________________________________________________________________
+*/
+
+static OSErr BlockFindContiguous(
+ ExtendedVCB *vcb,
+ u_int32_t startingBlock,
+ u_int32_t endingBlock,
+ u_int32_t minBlocks,
+ u_int32_t maxBlocks,
+ Boolean useMetaZone,
+ Boolean trustSummary,
+ u_int32_t *actualStartBlock,
+ u_int32_t *actualNumBlocks,
+ hfs_block_alloc_flags_t flags)
+{
+ OSErr err;
+ register u_int32_t currentBlock; // Block we're currently looking at.
+ u_int32_t firstBlock; // First free block in current extent.
+ u_int32_t stopBlock; // If we get to this block, stop searching for first free block.
+ u_int32_t foundBlocks; // Number of contiguous free blocks in current extent.
+ u_int32_t *buffer = NULL;
+ register u_int32_t *currentWord;
+ register u_int32_t bitMask;
+ register u_int32_t wordsLeft;
+ register u_int32_t tempWord;
+ uintptr_t blockRef = 0;
+ u_int32_t wordsPerBlock;
+ u_int32_t updated_free_extent = 0;
+ struct hfsmount *hfsmp = (struct hfsmount*) vcb;
+ HFSPlusExtentDescriptor best = { 0, 0 };
+
+ if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);
+
+ /*
+ * When we're skipping the metadata zone and the start/end
+ * range overlaps with the metadata zone then adjust the
+ * start to be outside of the metadata zone. If the range
+ * is entirely inside the metadata zone then we can deny the
+ * request (dskFulErr).
+ */
+ if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
+ if (startingBlock <= vcb->hfs_metazone_end) {
+ if (endingBlock > (vcb->hfs_metazone_end + 2))
+ startingBlock = vcb->hfs_metazone_end + 1;
+ else
+ goto DiskFull;
+ }
+ }
+
+ if ((endingBlock - startingBlock) < minBlocks)
+ {
+ // The set of blocks we're checking is smaller than the minimum number
+ // of blocks, so we couldn't possibly find a good range.
+ goto DiskFull;
+ }
+
+ stopBlock = endingBlock - minBlocks + 1;
+ currentBlock = startingBlock;
+ firstBlock = 0;
+
+ /*
+ * Skip over metadata blocks.
+ */
+ if (!useMetaZone)
+ currentBlock = NextBitmapBlock(vcb, currentBlock);
+
+ /*
+ * Use the summary table if we can. Skip over any totally
+ * allocated blocks. currentBlock should now point to the first
+ * block beyond the metadata zone if the metazone allocations are not
+ * allowed in this invocation.
+ */
+ if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
+ uint32_t suggestion;
+ err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
+ if (err && err != ENOSPC)
+ goto ErrorExit;
+ if (err == ENOSPC || suggestion >= stopBlock)
+ goto DiskFull;
+ currentBlock = suggestion;
+ }
+
+
+ //
+ // Pre-read the first bitmap block.
+ //
+ err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
+ if ( err != noErr ) goto ErrorExit;
+
+ //
+ // Figure out where currentBlock is within the buffer.
+ //
+ wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+
+ wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1); // Current index into buffer
+ currentWord = buffer + wordsLeft;
+ wordsLeft = wordsPerBlock - wordsLeft;
+
+ uint32_t remaining = (hfsmp->freeBlocks - hfsmp->lockedBlocks
+ - (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
+ ? 0 : hfsmp->tentativeBlocks));
+
+ /*
+ * This outer do-while loop is the main body of this function. Its job is
+ * to search through the blocks (until we hit 'stopBlock'), and iterate
+ * through swaths of allocated bitmap until it finds free regions.
+ */
+
+ do
+ {
+ foundBlocks = 0;
+ /*
+ * We will try and update the summary table as we search
+ * below. Note that we will never update the summary table
+ * for the first and last blocks that the summary table
+ * covers. Ideally, we should, but the benefits probably
+ * aren't that significant so we leave things alone for now.
+ */
+ uint32_t summary_block_scan = 0;
+ /*
+ * Inner while loop 1:
+ * Look for free blocks, skipping over allocated ones.
+ *
+ * Initialization starts with checking the initial partial word
+ * if applicable.
+ */
+ bitMask = currentBlock & kBitsWithinWordMask;
+ if (bitMask)
+ {
+ tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
+ bitMask = kHighBitInWordMask >> bitMask;
+ while (tempWord & bitMask)
+ {
+ bitMask >>= 1;
+ ++currentBlock;
+ }
+
+ // Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)?
+ if (bitMask)
+ goto FoundUnused;
+
+ // Didn't find any unused bits, so we're done with this word.
+ ++currentWord;
+ --wordsLeft;
+ }
+
+ //
+ // Check whole words
+ //
+ while (currentBlock < stopBlock)
+ {
+ // See if it's time to read another block.
+ if (wordsLeft == 0)
+ {
+ buffer = NULL;
+ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+ /*
+ * If summary_block_scan is non-zero, then we must have
+ * pulled a bitmap file block into core, and scanned through
+ * the entire thing. Because we're in this loop, we are
+ * implicitly trusting that the bitmap didn't have any knowledge
+ * about this particular block. As a result, update the bitmap
+ * (lazily, now that we've scanned it) with our findings that
+ * this particular block is completely used up.
+ */
+ if (summary_block_scan != 0) {
+ uint32_t summary_bit;
+ (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
+ hfs_set_summary (hfsmp, summary_bit, 1);
+ summary_block_scan = 0;
+ }
+ }
+ err = ReleaseBitmapBlock(vcb, blockRef, false);
+ if (err != noErr) goto ErrorExit;
+
+ /*
+ * Skip over metadata blocks.
+ */
+ if (!useMetaZone) {
+ currentBlock = NextBitmapBlock(vcb, currentBlock);
+ if (currentBlock >= stopBlock) {
+ goto LoopExit;
+ }
+ }
+
+ /* Skip over fully allocated bitmap blocks if we can */
+ if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
+ uint32_t suggestion;
+ err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
+ if (err && err != ENOSPC)
+ goto ErrorExit;
+ if (err == ENOSPC || suggestion >= stopBlock)
+ goto LoopExit;
+ currentBlock = suggestion;
+ }
+
+ err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
+ if ( err != noErr ) goto ErrorExit;
+
+ /*
+ * Set summary_block_scan to be the block we just read into the block cache.
+ *
+ * At this point, we've just read an allocation block worth of bitmap file
+ * into the buffer above, but we don't know if it is completely allocated or not.
+ * If we find that it is completely allocated/full then we will jump
+ * through this loop again and set the appropriate summary bit as fully allocated.
+ */
+ summary_block_scan = currentBlock;
+ currentWord = buffer;
+ wordsLeft = wordsPerBlock;
+ }
+
+ // See if any of the bits are clear
+ if ((tempWord = SWAP_BE32(*currentWord)) + 1) // non-zero if any bits were clear
+ {
+ // Figure out which bit is clear
+ bitMask = kHighBitInWordMask;
+ while (tempWord & bitMask)
+ {
+ bitMask >>= 1;
+ ++currentBlock;
+ }
+
+ break; // Found the free bit; break out to FoundUnused.
+ }
+
+ // Keep looking at the next word
+ currentBlock += kBitsPerWord;
+ ++currentWord;
+ --wordsLeft;
+ }
+
+FoundUnused:
+ // Make sure the unused bit is early enough to use
+ if (currentBlock >= stopBlock)
+ {
+ break;
+ }
+
+ // Remember the start of the extent
+ firstBlock = currentBlock;
+
+
+ /*
+ * Inner while loop 2:
+ * We get here if we find a free block. Count the number
+ * of contiguous free blocks observed.
+ *
+ * Initialization starts with checking the initial partial word
+ * if applicable.
+ */
+ bitMask = currentBlock & kBitsWithinWordMask;
+ if (bitMask)
+ {
+ tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
+ bitMask = kHighBitInWordMask >> bitMask;
+ while (bitMask && !(tempWord & bitMask))
+ {
+ bitMask >>= 1;
+ ++currentBlock;
+ }
+
+ // Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)?
+ if (bitMask)
+ goto FoundUsed;
+
+ // Didn't find any used bits, so we're done with this word.
+ ++currentWord;
+ --wordsLeft;
+ }
+
+ //
+ // Check whole words
+ //
+ while (currentBlock < endingBlock)
+ {
+ // See if it's time to read another block.
+ if (wordsLeft == 0)
+ {
+ buffer = NULL;
+ err = ReleaseBitmapBlock(vcb, blockRef, false);
+ if (err != noErr) goto ErrorExit;
+
+ /*
+ * Skip over metadata blocks.
+ */
+ if (!useMetaZone) {
+ u_int32_t nextBlock;
+
+ nextBlock = NextBitmapBlock(vcb, currentBlock);
+ if (nextBlock != currentBlock) {
+ goto LoopExit; /* allocation gap, so stop */
+ }
+ }
+
+ err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
+ if ( err != noErr ) goto ErrorExit;
+
+ currentWord = buffer;
+ wordsLeft = wordsPerBlock;
+ }
+
+ // See if any of the bits are set
+ if ((tempWord = SWAP_BE32(*currentWord)) != 0)
+ {
+ // Figure out which bit is set
+ bitMask = kHighBitInWordMask;
+ while (!(tempWord & bitMask))
+ {
+ bitMask >>= 1;
+ ++currentBlock;
+ }
+
+ break; // Found the used bit; break out to FoundUsed.
+ }
+
+ // Keep looking at the next word
+ currentBlock += kBitsPerWord;
+ ++currentWord;
+ --wordsLeft;
+
+ // If we found at least maxBlocks, we can quit early.
+ if ((currentBlock - firstBlock) >= maxBlocks)
+ break;
+ }
+
+FoundUsed:
+ // Make sure we didn't run out of bitmap looking for a used block.
+ // If so, pin to the end of the bitmap.
+ if (currentBlock > endingBlock)
+ currentBlock = endingBlock;
+
+ // Figure out how many contiguous free blocks there were.
+ // Pin the answer to maxBlocks.
+ foundBlocks = currentBlock - firstBlock;
+ if (foundBlocks > maxBlocks)
+ foundBlocks = maxBlocks;
+
+ if (remaining) {
+ if (foundBlocks > remaining) {
+#if DEBUG || DEVELOPMENT
+ printf("hfs: found more blocks than are indicated free!\n");
+#endif
+ remaining = UINT32_MAX;
+ } else
+ remaining -= foundBlocks;
+ }
+
+ if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
+ if (foundBlocks > best.blockCount) {
+ best.startBlock = firstBlock;
+ best.blockCount = foundBlocks;
+ }
+
+ if (foundBlocks >= maxBlocks || best.blockCount >= remaining)
+ break;
+
+ /*
+ * Note that we will go ahead and add this free extent to our
+ * cache below but that's OK because we'll remove it again if we
+ * decide to use this extent.
+ */
+ } else if (foundBlocks >= minBlocks)
+ break; // Found what we needed!
+
+ /*
+ * We did not find the total blocks we were looking for, but
+ * add this free block run to our free extent cache list, if possible.
+ */
+
+ // If we're ignoring tentative ranges, we need to account for them here
+ if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
+ struct rl_entry free_extent = rl_make(firstBlock, firstBlock + foundBlocks - 1);
+ struct rl_entry *range;;
+ TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS], rl_link) {
+ rl_subtract(&free_extent, range);
+ if (rl_len(range) == 0)
+ break;
+ }
+ firstBlock = free_extent.rl_start;
+ foundBlocks = rl_len(&free_extent);
+ }
+
+ if (foundBlocks) {
+ if (hfsmp->jnl == NULL) {
+ /* If there is no journal, go ahead and add to the free ext cache. */
+ updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
+ }
+ else {
+ /*
+ * If journaled, only add to the free extent cache if this block is not
+ * waiting for a TRIM to complete; that implies that the transaction that freed it
+ * has not yet been committed to stable storage.
+ */
+ int recently_deleted = 0;
+ uint32_t nextblock;
+ err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock,
+ (uint64_t)foundBlocks, &recently_deleted, &nextblock);
+ if ((err) || (recently_deleted == 0)) {
+ /* if we hit an error, or the blocks not recently freed, go ahead and insert it */
+ updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
+ }
+ err = 0;
+ }
+ }
+ } while (currentBlock < stopBlock);
+LoopExit:
+
+ if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
+ firstBlock = best.startBlock;
+ foundBlocks = best.blockCount;
+ }
+
+ // Return the outputs.
+ if (foundBlocks < minBlocks)
+ {
+DiskFull:
+ err = dskFulErr;
+ErrorExit:
+ *actualStartBlock = 0;
+ *actualNumBlocks = 0;
+ }
+ else
+ {
+ err = noErr;
+ *actualStartBlock = firstBlock;
+ *actualNumBlocks = foundBlocks;
+ /*
+ * Sanity check for overflow
+ */
+ if ((firstBlock + foundBlocks) > vcb->allocLimit) {
+ panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
+ vcb->vcbVN, startingBlock, endingBlock, currentBlock,
+ firstBlock, stopBlock, minBlocks, foundBlocks);
+ }
+ }
+
+ if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
+ int i;
+ u_int32_t min_start = vcb->totalBlocks;
+
+ // set the nextAllocation pointer to the smallest free block number
+ // we've seen so on the next mount we won't rescan unnecessarily
+ lck_spin_lock(&vcb->vcbFreeExtLock);
+ for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
+ if (vcb->vcbFreeExt[i].startBlock < min_start) {
+ min_start = vcb->vcbFreeExt[i].startBlock;
+ }
+ }
+ lck_spin_unlock(&vcb->vcbFreeExtLock);
+ if (min_start != vcb->totalBlocks) {
+ if (min_start < vcb->nextAllocation) {
+ vcb->nextAllocation = min_start;
+ }
+ if (min_start < vcb->sparseAllocation) {
+ vcb->sparseAllocation = min_start;
+ }
+ }
+ }
+
+ if (buffer)
+ (void) ReleaseBitmapBlock(vcb, blockRef, false);
+
+ if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
+
+ return err;
+}
+
+
+/*
+ * Count number of bits set in the given 32-bit unsigned number
+ *
+ * Returns:
+ * Number of bits set
+ */
+static int num_bits_set(u_int32_t num)
+{
+ int count;
+
+ for (count = 0; num; count++) {
+ num &= num - 1;
+ }
+
+ return count;
+}
+
+/*
+ * For a given range of blocks, find the total number of blocks
+ * allocated. If 'stop_on_first' is true, it stops as soon as it
+ * encounters the first allocated block. This option is useful
+ * to determine if any block is allocated or not.
+ *
+ * Inputs:
+ * startingBlock First allocation block number of the range to be scanned.
+ * numBlocks Total number of blocks that need to be scanned.
+ * stop_on_first Stop the search after the first allocated block is found.
+ *
+ * Output:
+ * allocCount Total number of allocation blocks allocated in the given range.
+ *
+ * On error, it is the number of allocated blocks found
+ * before the function got an error.
+ *
+ * If 'stop_on_first' is set,
+ * allocCount = 1 if any allocated block was found.
+ * allocCount = 0 if no allocated block was found.
+ *
+ * Returns:
+ * 0 on success, non-zero on failure.
+ */
+static int
+hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock,
+ u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
+{
+ u_int32_t *currentWord; // Pointer to current word within bitmap block
+ u_int32_t wordsLeft; // Number of words left in this bitmap block
+ u_int32_t bitMask; // Word with given bits already set (ready to test)
+ u_int32_t firstBit; // Bit index within word of first bit to allocate
+ u_int32_t numBits; // Number of bits in word to allocate
+ u_int32_t *buffer = NULL;
+ uintptr_t blockRef;
+ u_int32_t bitsPerBlock;
+ u_int32_t wordsPerBlock;
+ u_int32_t blockCount = 0;
+ int error;
+
+ if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0);
+
+ /*
+ * Pre-read the bitmap block containing the first word of allocation
+ */
+ error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
+ HFS_ALLOC_IGNORE_TENTATIVE);
+ if (error)
+ goto JustReturn;
+
+ /*
+ * Initialize currentWord, and wordsLeft.
+ */
+ {
+ u_int32_t wordIndexInBlock;
+
+ bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
+ wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
+
+ wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+ currentWord = buffer + wordIndexInBlock;
+ wordsLeft = wordsPerBlock - wordIndexInBlock;
+ }
+
+ /*
+ * First test any non word aligned bits.
+ */
+ firstBit = startingBlock % kBitsPerWord;
+ if (firstBit != 0) {
+ bitMask = kAllBitsSetInWord >> firstBit;
+ numBits = kBitsPerWord - firstBit;
+ if (numBits > numBlocks) {
+ numBits = numBlocks;
+ bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
+ }
+ if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+ if (stop_on_first) {
+ blockCount = 1;
+ goto Exit;
+ }
+ blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
+ }
+ numBlocks -= numBits;
+ ++currentWord;
+ --wordsLeft;
+ }
+
+ /*
+ * Test whole words (32 blocks) at a time.
+ */
+ while (numBlocks >= kBitsPerWord) {
+ if (wordsLeft == 0) {
+ /* Read in the next bitmap block. */
+ startingBlock += bitsPerBlock;
+
buffer = NULL;
- err = ReleaseBitmapBlock(vcb, blockRef, true);
- if (err != noErr) goto Exit;
+ error = ReleaseBitmapBlock(hfsmp, blockRef, false);
+ if (error) goto Exit;
+
+ error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
+ HFS_ALLOC_IGNORE_TENTATIVE);
+ if (error) goto Exit;
+
+ /* Readjust currentWord and wordsLeft. */
+ currentWord = buffer;
+ wordsLeft = wordsPerBlock;
+ }
+ if (*currentWord != 0) {
+ if (stop_on_first) {
+ blockCount = 1;
+ goto Exit;
+ }
+ blockCount += num_bits_set(*currentWord);
+ }
+ numBlocks -= kBitsPerWord;
+ ++currentWord;
+ --wordsLeft;
+ }
+
+ /*
+ * Test any remaining blocks.
+ */
+ if (numBlocks != 0) {
+ bitMask = ~(kAllBitsSetInWord >> numBlocks);
+ if (wordsLeft == 0) {
+ /* Read in the next bitmap block */
+ startingBlock += bitsPerBlock;
+
+ buffer = NULL;
+ error = ReleaseBitmapBlock(hfsmp, blockRef, false);
+ if (error) goto Exit;
+
+ error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
+ HFS_ALLOC_IGNORE_TENTATIVE);
+ if (error) goto Exit;
+
+ currentWord = buffer;
+ wordsLeft = wordsPerBlock;
+ }
+ if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+ if (stop_on_first) {
+ blockCount = 1;
+ goto Exit;
+ }
+ blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
+ }
+ }
+Exit:
+ if (buffer) {
+ (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
+ }
+ if (allocCount) {
+ *allocCount = blockCount;
+ }
+
+JustReturn:
+ if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0);
+
+ return (error);
+}
+
+/*
+ * Count total number of blocks that are allocated in the given
+ * range from the bitmap. This is used to preflight total blocks
+ * that need to be relocated during volume resize.
+ *
+ * The journal or allocation file lock must be held.
+ *
+ * Returns:
+ * 0 on success, non-zero on failure.
+ * On failure, allocCount is zero.
+ */
+ int
+hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
+ u_int32_t numBlocks, u_int32_t *allocCount)
+{
+ return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
+}
+
+/*
+ * Test to see if any blocks in a range are allocated.
+ *
+ * Note: On error, this function returns 1, which means that
+ * one or more blocks in the range are allocated. This function
+ * is primarily used for volume resize and we do not want
+ * to report to the caller that the blocks are free when we
+ * were not able to deterministically find it out. So on error,
+ * we always report that the blocks are allocated.
+ *
+ * The journal or allocation file lock must be held.
+ *
+ * Returns
+ * 0 if all blocks in the range are free.
+ * 1 if blocks in the range are allocated, or there was an error.
+ */
+ int
+hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
+{
+ int error;
+ u_int32_t allocCount;
+
+ error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
+ if (error) {
+ /* On error, we always say that the blocks are allocated
+ * so that volume resize does not return false success.
+ */
+ return 1;
+ } else {
+ /* The function was deterministically able to find out
+ * if there was any block allocated or not. In that case,
+ * the value in allocCount is good enough to be returned
+ * back to the caller.
+ */
+ return allocCount;
+ }
+}
+
+/*
+ * CONFIG_HFS_RBTREE
+ * Check to see if the red-black tree is live. Allocation file lock must be held
+ * shared or exclusive to call this function. Note that we may call this even if
+ * HFS is built without activating the red-black tree code.
+ */
+__private_extern__
+int
+hfs_isrbtree_active(struct hfsmount *hfsmp){
+
+#pragma unused (hfsmp)
+
+ /* Just return 0 for now */
+ return 0;
+}
+
+
+
+/* Summary Table Functions */
+/*
+ * hfs_check_summary:
+ *
+ * This function should be used to query the summary table to see if we can
+ * bypass a bitmap block or not when we're trying to find a free allocation block.
+ *
+ *
+ * Inputs:
+ * allocblock - allocation block number. Will be used to infer the correct summary bit.
+ * hfsmp -- filesystem in question.
+ *
+ * Output Arg:
+ * *freeblocks - set to 1 if we believe at least one free blocks in this vcbVBMIOSize
+ * page of bitmap file.
+ *
+ *
+ * Returns:
+ * 0 on success
+ * EINVAL on error
+ *
+ */
+
+static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t allocblock, uint32_t *freeblocks) {
+
+ int err = EINVAL;
+ if (hfsmp->vcbVBMIOSize) {
+ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+ uint32_t index;
+ if (hfs_get_summary_index (hfsmp, allocblock, &index)) {
+ *freeblocks = 0;
+ return EINVAL;
+ }
+
+ /* Ok, now that we have the bit index into the array, what byte is it in ? */
+ uint32_t byteindex = index / kBitsPerByte;
+ uint8_t current_byte = hfsmp->hfs_summary_table[byteindex];
+ uint8_t bit_in_byte = index % kBitsPerByte;
+
+ if (current_byte & (1 << bit_in_byte)) {
+ /*
+ * We do not believe there is anything free in the
+ * entire vcbVBMIOSize'd block.
+ */
+ *freeblocks = 0;
+ }
+ else {
+ /* Looks like there might be a free block here... */
+ *freeblocks = 1;
+ }
+ }
+ err = 0;
+ }
+
+ return err;
+}
+
+
+#if 0
+/*
+ * hfs_get_next_summary
+ *
+ * From a given allocation block, jump to the allocation block at the start of the
+ * next vcbVBMIOSize boundary. This is useful when trying to quickly skip over
+ * large swaths of bitmap once we have determined that the bitmap is relatively full.
+ *
+ * Inputs: hfsmount, starting allocation block number
+ * Output Arg: *newblock will contain the allocation block number to start
+ * querying.
+ *
+ * Returns:
+ * 0 on success
+ * EINVAL if the block argument is too large to be used, or the summary table not live.
+ * EFBIG if there are no more summary bits to be queried
+ */
+static int
+hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
+
+ u_int32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
+ u_int32_t start_offset;
+ u_int32_t next_offset;
+ int err = EINVAL;
+
+ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+ if ((err = hfs_get_summary_index(hfsmp, block, &start_offset))) {
+ return err;
+ }
+
+ next_offset = start_offset++;
+
+ if ((start_offset >= hfsmp->hfs_summary_size) || (next_offset >= hfsmp->hfs_summary_size)) {
+ /* Can't jump to the next summary bit. */
+ return EINVAL;
+ }
+
+ /* Otherwise, compute and return */
+ *newblock = next_offset * bits_per_iosize;
+ if (*newblock >= hfsmp->totalBlocks) {
+ return EINVAL;
+ }
+ err = 0;
+ }
+
+ return err;
+}
+
+#endif
+
+/*
+ * hfs_release_summary
+ *
+ * Given an extent that is about to be de-allocated on-disk, determine the number
+ * of summary bitmap bits that need to be marked as 'potentially available'.
+ * Then go ahead and mark them as free.
+ *
+ * Inputs:
+ * hfsmp - hfs mount
+ * block - starting allocation block.
+ * length - length of the extent.
+ *
+ * Returns:
+ * EINVAL upon any errors.
+ */
+static int hfs_release_summary(struct hfsmount *hfsmp, uint32_t start_blk, uint32_t length) {
+ int err = EINVAL;
+ uint32_t end_blk = (start_blk + length) - 1;
+
+ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+ /* Figure out what the starting / ending block's summary bits are */
+ uint32_t start_bit;
+ uint32_t end_bit;
+ uint32_t current_bit;
+
+ err = hfs_get_summary_index (hfsmp, start_blk, &start_bit);
+ if (err) {
+ goto release_err;
+ }
+ err = hfs_get_summary_index (hfsmp, end_blk, &end_bit);
+ if (err) {
+ goto release_err;
+ }
+
+ if (ALLOC_DEBUG) {
+ if (start_bit > end_bit) {
+ panic ("HFS: start > end!, %d %d ", start_bit, end_bit);
+ }
+ }
+ current_bit = start_bit;
+ while (current_bit <= end_bit) {
+ err = hfs_set_summary (hfsmp, current_bit, 0);
+ current_bit++;
+ }
+ }
+
+release_err:
+ return err;
+}
+
+/*
+ * hfs_find_summary_free
+ *
+ * Given a allocation block as input, returns an allocation block number as output as a
+ * suggestion for where to start scanning the bitmap in order to find free blocks. It will
+ * determine the vcbVBMIOsize of the input allocation block, convert that into a summary
+ * bit, then keep iterating over the summary bits in order to find the first free one.
+ *
+ * Inputs:
+ * hfsmp - hfs mount
+ * block - starting allocation block
+ * newblock - output block as suggestion
+ *
+ * Returns:
+ * 0 on success
+ * ENOSPC if we could not find a free block
+ */
+
+int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
+
+ int err = ENOSPC;
+ uint32_t bit_index = 0;
+ uint32_t maybe_has_blocks = 0;
+
+ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+ uint32_t byte_index;
+ uint8_t curbyte;
+ uint8_t bit_in_byte;
+ uint32_t summary_cap;
+
+ /*
+ * We generate a cap for the summary search because the summary table
+ * always represents a full summary of the bitmap FILE, which may
+ * be way more bits than are necessary for the actual filesystem
+ * whose allocations are mapped by the bitmap.
+ *
+ * Compute how much of hfs_summary_size is useable for the given number
+ * of allocation blocks eligible on this FS.
+ */
+ err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit - 1, &summary_cap);
+ if (err) {
+ goto summary_exit;
+ }
+
+ /* Check the starting block first */
+ err = hfs_check_summary (hfsmp, block, &maybe_has_blocks);
+ if (err) {
+ goto summary_exit;
+ }
+
+ if (maybe_has_blocks) {
+ /*
+ * It looks like the initial start block could have something.
+ * Short-circuit and just use that.
+ */
+ *newblock = block;
+ goto summary_exit;
+ }
+
+ /*
+ * OK, now we know that the first block was useless.
+ * Get the starting summary bit, and find it in the array
+ */
+ maybe_has_blocks = 0;
+ err = hfs_get_summary_index (hfsmp, block, &bit_index);
+ if (err) {
+ goto summary_exit;
+ }
+
+ /* Iterate until we find something. */
+ while (bit_index <= summary_cap) {
+ byte_index = bit_index / kBitsPerByte;
+ curbyte = hfsmp->hfs_summary_table[byte_index];
+ bit_in_byte = bit_index % kBitsPerByte;
+
+ if (curbyte & (1 << bit_in_byte)) {
+ /* nothing here. increment and move on */
+ bit_index++;
+ }
+ else {
+ /*
+ * found something! convert bit_index back into
+ * an allocation block for use. 'newblock' will now
+ * contain the proper allocation block # based on the bit
+ * index.
+ */
+ err = hfs_get_summary_allocblock (hfsmp, bit_index, newblock);
+ if (err) {
+ goto summary_exit;
+ }
+ maybe_has_blocks = 1;
+ break;
+ }
+ }
+
+ /* If our loop didn't find anything, set err to ENOSPC */
+ if (maybe_has_blocks == 0) {
+ err = ENOSPC;
+ }
+ }
+
+ /* If the summary table is not active for this mount, we'll just return ENOSPC */
+summary_exit:
+ if (maybe_has_blocks) {
+ err = 0;
+ }
+
+ return err;
+}
+
+/*
+ * hfs_get_summary_allocblock
+ *
+ * Convert a summary bit into an allocation block number to use to start searching for free blocks.
+ *
+ * Inputs:
+ * hfsmp - hfs mount
+ * summarybit - summmary bit index
+ * *alloc - allocation block number in the bitmap file.
+ *
+ * Output:
+ * 0 on success
+ * EINVAL on failure
+ */
+int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t
+ summarybit, uint32_t *alloc) {
+ uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
+ uint32_t allocblk;
+
+ allocblk = summarybit * bits_per_iosize;
+
+ if (allocblk >= hfsmp->totalBlocks) {
+ return EINVAL;
+ }
+ else {
+ *alloc = allocblk;
+ }
+
+ return 0;
+}
+
+
+/*
+ * hfs_set_summary:
+ *
+ * This function should be used to manipulate the summary table
+ *
+ * The argument 'inuse' will set the value of the bit in question to one or zero
+ * depending on its value.
+ *
+ * Inputs:
+ * hfsmp - hfs mount
+ * summarybit - the bit index into the summary table to set/unset.
+ * inuse - the value to assign to the bit.
+ *
+ * Returns:
+ * 0 on success
+ * EINVAL on error
+ *
+ */
+
+static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse) {
+
+ int err = EINVAL;
+ if (hfsmp->vcbVBMIOSize) {
+ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+
+ if (ALLOC_DEBUG) {
+ if (hfsmp->hfs_summary_table == NULL) {
+ panic ("hfs_set_summary: no table for %p ", hfsmp);
+ }
+ }
+
+ /* Ok, now that we have the bit index into the array, what byte is it in ? */
+ uint32_t byte_index = summarybit / kBitsPerByte;
+ uint8_t current_byte = hfsmp->hfs_summary_table[byte_index];
+ uint8_t bit_in_byte = summarybit % kBitsPerByte;
+
+ if (inuse) {
+ current_byte = (current_byte | (1 << bit_in_byte));
+ }
+ else {
+ current_byte = (current_byte & ~(1 << bit_in_byte));
+ }
+
+ hfsmp->hfs_summary_table[byte_index] = current_byte;
+ }
+ err = 0;
+ }
+
+ return err;
+}
+
+
+/*
+ * hfs_get_summary_index:
+ *
+ * This is a helper function which determines what summary bit represents the vcbVBMIOSize worth
+ * of IO against the bitmap file.
+ *
+ * Returns:
+ * 0 on success
+ * EINVAL on failure
+ */
+static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t* index) {
+ uint32_t summary_bit;
+ uint32_t bits_per_iosize;
+ int err = EINVAL;
+
+ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+ /* Is the input block bigger than the total number of blocks? */
+ if (block >= hfsmp->totalBlocks) {
+ return EINVAL;
+ }
+
+ /* Is there even a vbmIOSize set? */
+ if (hfsmp->vcbVBMIOSize == 0) {
+ return EINVAL;
+ }
+
+ bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
+
+ summary_bit = block / bits_per_iosize;
+
+ *index = summary_bit;
+ err = 0;
+ }
+
+ return err;
+}
+
+/*
+ * hfs_init_summary
+ *
+ * From a given mount structure, compute how big the summary table should be for the given
+ * filesystem, then allocate and bzero the memory.
+ *
+ * Returns:
+ * 0 on success
+ * EINVAL on failure
+ */
+int
+hfs_init_summary (struct hfsmount *hfsmp) {
+
+ uint32_t summary_size;
+ uint32_t summary_size_bytes;
+ uint8_t *summary_table;
+
+ if (hfsmp->hfs_allocation_cp == NULL) {
+ if (ALLOC_DEBUG) {
+ printf("hfs: summary table cannot progress without a bitmap cnode! \n");
+ }
+ return EINVAL;
+ }
+ /*
+ * The practical maximum size of the summary table is 16KB:
+ *
+ * (512MB maximum bitmap size / (4k -- min alloc block size)) / 8 bits/byte.
+ *
+ * HFS+ will allow filesystems with allocation block sizes smaller than 4k, but
+ * the end result is that we'll start to issue I/O in 2k or 1k sized chunks, which makes
+ * supporting this much worse. The math would instead look like this:
+ * (512MB / 2k) / 8 == 32k.
+ *
+ * So, we will disallow the summary table if the allocation block size is < 4k.
+ */
+
+ if (hfsmp->blockSize < HFS_MIN_SUMMARY_BLOCKSIZE) {
+ printf("hfs: summary table not allowed on FS with block size of %d\n", hfsmp->blockSize);
+ return EINVAL;
+ }
+
+ summary_size = hfsmp->hfs_allocation_cp->c_blocks;
+
+ if (ALLOC_DEBUG) {
+ printf("HFS Summary Table Initialization: Bitmap %u blocks\n",
+ hfsmp->hfs_allocation_cp->c_blocks);
+ }
+
+ /*
+ * If the bitmap IO size is not the same as the allocation block size then
+ * then re-compute the number of summary bits necessary. Note that above, the
+ * the default size is the number of allocation blocks in the bitmap *FILE*
+ * (not the number of bits in the bitmap itself). If the allocation block size
+ * is large enough though, we may need to increase this.
+ */
+ if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
+ uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
+ lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
+
+ /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
+ summary_size = (uint32_t) lrg_size;
+ }
+
+ /*
+ * If the block size is the same as the IO Size, then the total number of blocks
+ * is already equal to the number of IO units, which is our number of summary bits.
+ */
+
+ summary_size_bytes = summary_size / kBitsPerByte;
+ /* Always add one byte, just in case we have a dangling number of bits */
+ summary_size_bytes++;
+
+ if (ALLOC_DEBUG) {
+ printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, summary_size);
+ printf("HFS Summary Table Size (in bytes) %d \n", summary_size_bytes);
+ }
+
+ /* Store the field in the mount point, and then MALLOC/bzero the memory */
+ hfsmp->hfs_summary_size = summary_size;
+ hfsmp->hfs_summary_bytes = summary_size_bytes;
+
+ MALLOC (summary_table, uint8_t*, summary_size_bytes, M_TEMP, M_WAITOK);
+ if (summary_table == NULL) {
+ return ENOMEM;
+ }
+ bzero (summary_table, summary_size_bytes);
+
+ /* enable the summary table */
+ hfsmp->hfs_flags |= HFS_SUMMARY_TABLE;
+ hfsmp->hfs_summary_table = summary_table;
+
+ if (ALLOC_DEBUG) {
+ if (hfsmp->hfs_summary_table == NULL) {
+ panic ("HFS Summary Init: no table for %p\n", hfsmp);
+ }
+ }
+ return 0;
+}
+
+/*
+ * hfs_rebuild_summary
+ *
+ * This function should be used to allocate a new hunk of memory for use as a summary
+ * table, then copy the existing data into it. We use it whenever the filesystem's size
+ * changes. When a resize is in progress, you can still use the extant summary
+ * table if it is active.
+ *
+ * Inputs:
+ * hfsmp -- FS in question
+ * newlength -- new length of the FS in allocation blocks.
+ *
+ * Outputs:
+ * 0 on success, EINVAL on failure. If this function fails, the summary table
+ * will be disabled for future use.
+ *
+ */
+static int hfs_rebuild_summary (struct hfsmount *hfsmp) {
+
+ uint32_t new_summary_size;
+
+ new_summary_size = hfsmp->hfs_allocation_cp->c_blocks;
+
+
+ if (ALLOC_DEBUG) {
+ printf("HFS Summary Table Re-init: bitmap %u blocks\n", new_summary_size);
+ }
+
+ /*
+ * If the bitmap IO size is not the same as the allocation block size, then re-compute
+ * the number of summary bits necessary. Note that above, the default size is the number
+ * of allocation blocks in the bitmap *FILE* (not the number of bits that the bitmap manages).
+ * If the allocation block size is large enough though, we may need to increase this, as
+ * bitmap IO is capped at 64k per IO
+ */
+ if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
+ uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
+ lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
+
+ /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
+ new_summary_size = (uint32_t) lrg_size;
+ }
+
+ /*
+ * Ok, we have the new summary bitmap theoretical max size. See if it's the same as
+ * what we've got already...
+ */
+ if (new_summary_size != hfsmp->hfs_summary_size) {
+ uint32_t summarybytes = new_summary_size / kBitsPerByte;
+ uint32_t copysize;
+ uint8_t *newtable;
+ /* Add one byte for slop */
+ summarybytes++;
+
+ if (ALLOC_DEBUG) {
+ printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, new_summary_size);
+ printf("HFS Summary Table Size (in bytes) %d \n", summarybytes);
+ }
+
+ /* Attempt to MALLOC the memory */
+ MALLOC (newtable, uint8_t*, summarybytes, M_TEMP, M_WAITOK);
+ if (newtable == NULL) {
+ /*
+ * ERROR! We need to disable the table now
+ */
+ FREE (hfsmp->hfs_summary_table, M_TEMP);
+ hfsmp->hfs_summary_table = NULL;
+ hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE;
+ return EINVAL;
+ }
+ bzero (newtable, summarybytes);
+
+ /*
+ * The new table may be smaller than the old one. If this is true, then
+ * we can't copy the full size of the existing summary table into the new
+ * one.
+ *
+ * The converse is not an issue since we bzeroed the table above.
+ */
+ copysize = hfsmp->hfs_summary_bytes;
+ if (summarybytes < hfsmp->hfs_summary_bytes) {
+ copysize = summarybytes;
+ }
+ memcpy (newtable, hfsmp->hfs_summary_table, copysize);
+
+ /* We're all good. Destroy the old copy and update ptrs */
+ FREE (hfsmp->hfs_summary_table, M_TEMP);
+
+ hfsmp->hfs_summary_table = newtable;
+ hfsmp->hfs_summary_size = new_summary_size;
+ hfsmp->hfs_summary_bytes = summarybytes;
+ }
+
+ return 0;
+}
+
+
+#if ALLOC_DEBUG
+/*
+ * hfs_validate_summary
+ *
+ * Validation routine for the summary table. Debug-only function.
+ *
+ * Bitmap lock must be held.
+ *
+ */
+void hfs_validate_summary (struct hfsmount *hfsmp) {
+ uint32_t i;
+ int err;
+
+ /*
+ * Iterate over all of the bits in the summary table, and verify if
+ * there really are free blocks in the pages that we believe may
+ * may contain free blocks.
+ */
+
+ if (hfsmp->hfs_summary_table == NULL) {
+ panic ("HFS Summary: No HFS summary table!");
+ }
+
+ /* 131072 bits == 16384 bytes. This is the theoretical max size of the summary table. we add 1 byte for slop */
+ if (hfsmp->hfs_summary_size == 0 || hfsmp->hfs_summary_size > 131080) {
+ panic("HFS Summary: Size is bad! %d", hfsmp->hfs_summary_size);
+ }
+
+ if (hfsmp->vcbVBMIOSize == 0) {
+ panic("HFS Summary: no VCB VBM IO Size !");
+ }
+
+ printf("hfs: summary validation beginning on %s\n", hfsmp->vcbVN);
+ printf("hfs: summary validation %d summary bits, %d summary blocks\n", hfsmp->hfs_summary_size, hfsmp->totalBlocks);
+
+
+ /* iterate through all possible summary bits */
+ for (i = 0; i < hfsmp->hfs_summary_size ; i++) {
+
+ uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
+ uint32_t byte_offset = hfsmp->vcbVBMIOSize * i;
+
+ /* Compute the corresponding allocation block for the summary bit. */
+ uint32_t alloc_block = i * bits_per_iosize;
+
+ /*
+ * We use a uint32_t pointer here because it will speed up
+ * access to the real bitmap data on disk.
+ */
+ uint32_t *block_data;
+ struct buf *bp;
+ int counter;
+ int counter_max;
+ int saw_free_bits = 0;
+
+ /* Get the block */
+ if ((err = ReadBitmapRange (hfsmp, byte_offset, hfsmp->vcbVBMIOSize, &block_data, &bp))) {
+ panic ("HFS Summary: error (%d) in ReadBitmapRange!", err);
+ }
+
+ /* Query the status of the bit and then make sure we match */
+ uint32_t maybe_has_free_blocks;
+ err = hfs_check_summary (hfsmp, alloc_block, &maybe_has_free_blocks);
+ if (err) {
+ panic ("HFS Summary: hfs_check_summary returned error (%d) ", err);
+ }
+ counter_max = hfsmp->vcbVBMIOSize / kBytesPerWord;
+
+ for (counter = 0; counter < counter_max; counter++) {
+ uint32_t word = block_data[counter];
+
+ /* We assume that we'll not find any free bits here. */
+ if (word != kAllBitsSetInWord) {
+ if (maybe_has_free_blocks) {
+ /* All done */
+ saw_free_bits = 1;
+ break;
+ }
+ else {
+ panic ("HFS Summary: hfs_check_summary saw free bits!");
+ }
+ }
+ }
+
+ if (maybe_has_free_blocks && (saw_free_bits == 0)) {
+ panic ("HFS Summary: did not see free bits !");
+ }
+
+ /* Release the block. */
+ if ((err = ReleaseScanBitmapRange (bp))) {
+ panic ("HFS Summary: Error (%d) in ReleaseScanBitmapRange", err);
+ }
+ }
+
+ printf("hfs: summary validation completed successfully on %s\n", hfsmp->vcbVN);
+
+ return;
+}
+#endif
+
+/*
+ * hfs_alloc_scan_range:
+ *
+ * This function should be used to scan large ranges of the allocation bitmap
+ * at one time. It makes two key assumptions:
+ *
+ * 1) Bitmap lock is held during the duration of the call (exclusive)
+ * 2) There are no pages in the buffer cache for any of the bitmap
+ * blocks that we may encounter. It *MUST* be completely empty.
+ *
+ * The expected use case is when we are scanning the bitmap in full while we are
+ * still mounting the filesystem in order to issue TRIMs or build up the summary
+ * table for the mount point. It should be done after any potential journal replays
+ * are completed and their I/Os fully issued.
+ *
+ * The key reason for assumption (2) above is that this function will try to issue
+ * I/O against the bitmap file in chunks as large a possible -- essentially as
+ * much as the buffer layer will handle (1MB). Because the size of these I/Os
+ * is larger than what would be expected during normal runtime we must invalidate
+ * the buffers as soon as we are done with them so that they do not persist in
+ * the buffer cache for other threads to find, as they'll typically be doing
+ * allocation-block size I/Os instead.
+ *
+ * Input Args:
+ * hfsmp - hfs mount data structure
+ * startbit - allocation block # to start our scan. It must be aligned
+ * on a vcbVBMIOsize boundary.
+ * list - journal trim list data structure for issuing TRIMs
+ *
+ * Output Args:
+ * bitToScan - Return the next bit to scan if this function is called again.
+ * Caller will supply this into the next invocation
+ * of this call as 'startbit'.
+ */
+
+static int hfs_alloc_scan_range(struct hfsmount *hfsmp, u_int32_t startbit,
+ u_int32_t *bitToScan, struct jnl_trim_list *list) {
+
+ int error;
+ int readwrite = 1;
+ u_int32_t curAllocBlock;
+ struct buf *blockRef = NULL;
+ u_int32_t *buffer = NULL;
+ u_int32_t free_offset = 0; //tracks the start of the current free range
+ u_int32_t size = 0; // tracks the length of the current free range.
+ u_int32_t iosize = 0; //how much io we should generate against the bitmap
+ u_int32_t byte_off; // byte offset into the bitmap file.
+ u_int32_t completed_size; // how much io was actually completed
+ u_int32_t last_bitmap_block;
+ u_int32_t current_word;
+ u_int32_t word_index = 0;
+
+ /* summary table building */
+ uint32_t summary_bit = 0;
+ uint32_t saw_free_blocks = 0;
+ uint32_t last_marked = 0;
+
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ readwrite = 0;
+ }
+
+ /*
+ * Compute how much I/O we should generate here.
+ * hfs_scan_range_size will validate that the start bit
+ * converted into a byte offset into the bitmap file,
+ * is aligned on a VBMIOSize boundary.
+ */
+ error = hfs_scan_range_size (hfsmp, startbit, &iosize);
+ if (error) {
+ if (ALLOC_DEBUG) {
+ panic ("hfs_alloc_scan_range: hfs_scan_range_size error %d\n", error);
+ }
+ return error;
+ }
+
+ if (iosize < hfsmp->vcbVBMIOSize) {
+ if (ALLOC_DEBUG) {
+ panic ("hfs_alloc_scan_range: iosize too small! (iosize %d)\n", iosize);
+ }
+ return EINVAL;
+ }
+
+ /* hfs_scan_range_size should have verified startbit. Convert it to bytes */
+ byte_off = startbit / kBitsPerByte;
+
+ /*
+ * When the journal replays blocks, it does so by writing directly to the disk
+ * device (bypassing any filesystem vnodes and such). When it finishes its I/Os
+ * it also immediately re-reads and invalidates the range covered by the bp so
+ * it does not leave anything lingering in the cache (for iosize reasons).
+ *
+ * As such, it is safe to do large I/Os here with ReadBitmapRange.
+ *
+ * NOTE: It is not recommended, but it is possible to call the function below
+ * on sections of the bitmap that may be in core already as long as the pages are not
+ * dirty. In that case, we'd notice that something starting at that
+ * logical block of the bitmap exists in the metadata cache, and we'd check
+ * if the iosize requested is the same as what was already allocated for it.
+ * Odds are pretty good we're going to request something larger. In that case,
+ * we just free the existing memory associated with the buf and reallocate a
+ * larger range. This function should immediately invalidate it as soon as we're
+ * done scanning, so this shouldn't cause any coherency issues.
+ */
+
+ error = ReadBitmapRange(hfsmp, byte_off, iosize, &buffer, &blockRef);
+ if (error) {
+ if (ALLOC_DEBUG) {
+ panic ("hfs_alloc_scan_range: start %d iosize %d ReadBitmapRange error %d\n", startbit, iosize, error);
+ }
+ return error;
+ }
+
+ /*
+ * At this point, we have a giant wired buffer that represents some portion of
+ * the bitmap file that we want to analyze. We may not have gotten all 'iosize'
+ * bytes though, so clip our ending bit to what we actually read in.
+ */
+ completed_size = buf_count(blockRef);
+ last_bitmap_block = completed_size * kBitsPerByte;
+ last_bitmap_block = last_bitmap_block + startbit;
+
+ /* Cap the last block to the total number of blocks if required */
+ if (last_bitmap_block > hfsmp->totalBlocks) {
+ last_bitmap_block = hfsmp->totalBlocks;
+ }
+
+ /* curAllocBlock represents the logical block we're analyzing. */
+ curAllocBlock = startbit;
+ word_index = 0;
+ size = 0;
+
+ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+ if (hfs_get_summary_index (hfsmp, startbit, &summary_bit)) {
+ error = EINVAL;
+ if (ALLOC_DEBUG) {
+ panic ("hfs_alloc_scan_range: Could not acquire summary index for %u", startbit);
+ }
+ return error;
+ }
+ /*
+ * summary_bit should now be set to the summary bit corresponding to
+ * the allocation block of the first bit that we're supposed to scan
+ */
+ }
+ saw_free_blocks = 0;
+
+ while (curAllocBlock < last_bitmap_block) {
+ u_int32_t bit;
+
+ /* Update the summary table as needed */
+ if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+ if (ALLOC_DEBUG) {
+ if (hfsmp->hfs_summary_table == NULL) {
+ panic ("hfs_alloc_scan_range: no summary table!");
+ }
+ }
+
+ uint32_t temp_summary;
+ error = hfs_get_summary_index (hfsmp, curAllocBlock, &temp_summary);
+ if (error) {
+ if (ALLOC_DEBUG) {
+ panic ("hfs_alloc_scan_range: could not get summary index for %u", curAllocBlock);
+ }
+ return EINVAL;
+ }
+
+ if (ALLOC_DEBUG) {
+ if (temp_summary < summary_bit) {
+ panic ("hfs_alloc_scan_range: backwards summary bit?\n");
+ }
+ }
+
+ /*
+ * If temp_summary is greater than summary_bit, then this
+ * means that the next allocation block crosses a vcbVBMIOSize boundary
+ * and we should treat this range of on-disk data as part of a new summary
+ * bit.
+ */
+ if (temp_summary > summary_bit) {
+ if (saw_free_blocks == 0) {
+ /* Mark the bit as totally consumed in the summary table */
+ hfs_set_summary (hfsmp, summary_bit, 1);
+ }
+ else {
+ /* Mark the bit as potentially free in summary table */
+ hfs_set_summary (hfsmp, summary_bit, 0);
+ }
+ last_marked = summary_bit;
+ /*
+ * Any time we set the summary table, update our counter which tracks
+ * what the last bit that was fully marked in the summary table.
+ *
+ * Then reset our marker which says we haven't seen a free bit yet.
+ */
+ saw_free_blocks = 0;
+ summary_bit = temp_summary;
+ }
+ } /* End summary table conditions */
+
+ current_word = SWAP_BE32(buffer[word_index]);
+ /* Iterate through the word 1 bit at a time... */
+ for (bit = 0 ; bit < kBitsPerWord ; bit++, curAllocBlock++) {
+ if (curAllocBlock >= last_bitmap_block) {
+ break;
+ }
+ u_int32_t allocated = (current_word & (kHighBitInWordMask >> bit));
- err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
- if (err != noErr) goto Exit;
+ if (allocated) {
+ if (size != 0) {
+ if (readwrite) {
+ /* Insert the previously tracked range of free blocks to the trim list */
+ hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
+ }
+ add_free_extent_cache (hfsmp, free_offset, size);
+ size = 0;
+ free_offset = 0;
+ }
+ }
+ else {
+ /* Not allocated */
+ size++;
+ if (free_offset == 0) {
+ /* Start a new run of free spcae at curAllocBlock */
+ free_offset = curAllocBlock;
+ }
+ if (saw_free_blocks == 0) {
+ saw_free_blocks = 1;
+ }
+ }
+ } /* end for loop iterating through the word */
- // XXXdbg
- if (hfsmp->jnl) {
- journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+ if (curAllocBlock < last_bitmap_block) {
+ word_index++;
+ }
+
+ } /* End while loop (iterates through last_bitmap_block) */
+
+
+ /*
+ * We've (potentially) completed our pass through this region of bitmap,
+ * but one thing we may not have done is updated that last summary bit for
+ * the last page we scanned, because we would have never transitioned across
+ * a vcbVBMIOSize boundary again. Check for that and update the last bit
+ * as needed.
+ *
+ * Note that 'last_bitmap_block' is *not* inclusive WRT the very last bit in the bitmap
+ * for the region of bitmap on-disk that we were scanning. (it is one greater).
+ */
+ if ((curAllocBlock >= last_bitmap_block) &&
+ (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
+ uint32_t temp_summary;
+ /* temp_block should be INSIDE the region we just scanned, so subtract 1 */
+ uint32_t temp_block = last_bitmap_block - 1;
+ error = hfs_get_summary_index (hfsmp, temp_block, &temp_summary);
+ if (error) {
+ if (ALLOC_DEBUG) {
+ panic ("hfs_alloc_scan_range: end bit curAllocBlock %u, last_bitmap_block %u", curAllocBlock, last_bitmap_block);
}
+ return EINVAL;
+ }
- // Readjust currentWord and wordsLeft
- currentWord = buffer;
- wordsLeft = wordsPerBlock;
+ /* Did we already update this in the table? */
+ if (temp_summary > last_marked) {
+ if (saw_free_blocks == 0) {
+ hfs_set_summary (hfsmp, temp_summary, 1);
+ }
+ else {
+ hfs_set_summary (hfsmp, temp_summary, 0);
+ }
}
-#if DEBUG_BUILD
- if (*currentWord != 0) {
- panic("BlockMarkAllocated: blocks already allocated!");
+ }
+
+ /*
+ * We may have been tracking a range of free blocks that hasn't been inserted yet.
+ * Keep the logic for the TRIM and free extent separate from that of the summary
+ * table management even though they are closely linked.
+ */
+ if (size != 0) {
+ if (readwrite) {
+ hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
}
-#endif
- *currentWord = SWAP_BE32 (bitMask);
- numBlocks -= kBitsPerWord;
+ add_free_extent_cache (hfsmp, free_offset, size);
+ }
- ++currentWord; // move to next word
- --wordsLeft; // one less word left in this block
+ /*
+ * curAllocBlock represents the next block we need to scan when we return
+ * to this function.
+ */
+ *bitToScan = curAllocBlock;
+ ReleaseScanBitmapRange(blockRef);
+
+ return 0;
+
+}
+
+
+
+/*
+ * Compute the maximum I/O size to generate against the bitmap file
+ * Will attempt to generate at LEAST VBMIOsize I/Os for interior ranges of the bitmap.
+ *
+ * Inputs:
+ * hfsmp -- hfsmount to look at
+ * bitmap_off -- bit offset into the bitmap file
+ *
+ * Outputs:
+ * iosize -- iosize to generate.
+ *
+ * Returns:
+ * 0 on success; EINVAL otherwise
+ */
+static int hfs_scan_range_size (struct hfsmount *hfsmp, uint32_t bitmap_st, uint32_t *iosize) {
+
+ /*
+ * The maximum bitmap size is 512MB regardless of ABN size, so we can get away
+ * with 32 bit math in this function.
+ */
+
+ uint32_t bitmap_len;
+ uint32_t remaining_bitmap;
+ uint32_t target_iosize;
+ uint32_t bitmap_off;
+
+ /* Is this bit index not word aligned? If so, immediately fail. */
+ if (bitmap_st % kBitsPerWord) {
+ if (ALLOC_DEBUG) {
+ panic ("hfs_scan_range_size unaligned start bit! bitmap_st %d \n", bitmap_st);
+ }
+ return EINVAL;
}
-
- //
- // Allocate any remaining blocks.
- //
-
- if (numBlocks != 0) {
- bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
- if (wordsLeft == 0) {
- // Read in the next bitmap block
- startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
-
- buffer = NULL;
- err = ReleaseBitmapBlock(vcb, blockRef, true);
- if (err != noErr) goto Exit;
- err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
- if (err != noErr) goto Exit;
+ /* bitmap_off is in bytes, not allocation blocks/bits */
+ bitmap_off = bitmap_st / kBitsPerByte;
- // XXXdbg
- if (hfsmp->jnl) {
- journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+ if ((hfsmp->totalBlocks <= bitmap_st) || (bitmap_off > (512 * 1024 * 1024))) {
+ if (ALLOC_DEBUG) {
+ panic ("hfs_scan_range_size: invalid start! bitmap_st %d, bitmap_off %d\n", bitmap_st, bitmap_off);
+ }
+ return EINVAL;
+ }
+
+ /*
+ * Also invalid if it's not at least aligned to HFS bitmap logical
+ * block boundaries. We don't have to emit an iosize that's an
+ * exact multiple of the VBMIOSize, but it must start on such
+ * a boundary.
+ *
+ * The vcbVBMIOSize may be SMALLER than the allocation block size
+ * on a FS with giant allocation blocks, but it will never be
+ * greater than it, so it should be safe to start I/O
+ * aligned on a VBMIOsize boundary.
+ */
+ if (bitmap_off & (hfsmp->vcbVBMIOSize - 1)) {
+ if (ALLOC_DEBUG) {
+ panic ("hfs_scan_range_size: unaligned start! bitmap_off %d\n", bitmap_off);
+ }
+ return EINVAL;
+ }
+
+ /*
+ * Generate the total bitmap file length in bytes, then round up
+ * that value to the end of the last allocation block, if needed (It
+ * will probably be needed). We won't scan past the last actual
+ * allocation block.
+ *
+ * Unless we're completing the bitmap scan (or bitmap < 1MB), we
+ * have to complete the I/O on VBMIOSize boundaries, but we can only read
+ * up until the end of the bitmap file.
+ */
+ bitmap_len = roundup(hfsmp->totalBlocks, hfsmp->blockSize * 8) / 8;
+
+ remaining_bitmap = bitmap_len - bitmap_off;
+
+ /*
+ * io size is the MIN of the maximum I/O we can generate or the
+ * remaining amount of bitmap.
+ */
+ target_iosize = MIN((MAXBSIZE), remaining_bitmap);
+ *iosize = target_iosize;
+
+ return 0;
+}
+
+
+
+
+/*
+ * This function is basically the same as hfs_isallocated, except it's designed for
+ * use with the red-black tree validation code. It assumes we're only checking whether
+ * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
+ * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
+ *
+ * This should not be called in general purpose scanning code.
+ */
+int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
+
+ u_int32_t *currentWord; // Pointer to current word within bitmap block
+ u_int32_t bitMask; // Word with given bits already set (ready to test)
+ u_int32_t firstBit; // Bit index within word of first bit to allocate
+ u_int32_t numBits; // Number of bits in word to allocate
+ u_int32_t bitsPerBlock;
+ uintptr_t blockRef = 0;
+ u_int32_t wordsPerBlock;
+ u_int32_t numBlocks = 1;
+ u_int32_t *buffer = NULL;
+
+ int inuse = 0;
+ int error;
+
+
+ if (bp_buf) {
+ /* just use passed-in buffer if avail. */
+ buffer = bp_buf;
+ }
+ else {
+ /*
+ * Pre-read the bitmap block containing the first word of allocation
+ */
+ error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
+ HFS_ALLOC_IGNORE_TENTATIVE);
+ if (error)
+ return (error);
+ }
+
+ /*
+ * Initialize currentWord, and wordsLeft.
+ */
+ u_int32_t wordIndexInBlock;
+
+ bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
+ wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
+
+ wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+ currentWord = buffer + wordIndexInBlock;
+
+ /*
+ * First test any non word aligned bits.
+ */
+ firstBit = startingBlock % kBitsPerWord;
+ bitMask = kAllBitsSetInWord >> firstBit;
+ numBits = kBitsPerWord - firstBit;
+ if (numBits > numBlocks) {
+ numBits = numBlocks;
+ bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
+ }
+ if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+ inuse = 1;
+ goto Exit;
+ }
+ numBlocks -= numBits;
+ ++currentWord;
+
+Exit:
+ if(bp_buf == NULL) {
+ if (buffer) {
+ (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
+ }
+ }
+ return (inuse);
+
+
+
+}
+
+/*
+ * This function resets all of the data structures relevant to the
+ * free extent cache stored in the hfsmount struct.
+ *
+ * If we are using the red-black tree code then we need to account for the fact that
+ * we may encounter situations where we need to jettison the tree. If that is the
+ * case, then we fail-over to the bitmap scanning logic, but we need to ensure that
+ * the free ext cache is zeroed before we start using it.
+ *
+ * We also reset and disable the cache when allocLimit is updated... which
+ * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()).
+ * It is independent of the type of allocator being used currently.
+ */
+void ResetVCBFreeExtCache(struct hfsmount *hfsmp)
+{
+ int bytes;
+ void *freeExt;
+
+ if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0);
+
+ lck_spin_lock(&hfsmp->vcbFreeExtLock);
+
+ /* reset Free Extent Count */
+ hfsmp->vcbFreeExtCnt = 0;
+
+ /* reset the actual array */
+ bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor);
+ freeExt = (void*)(hfsmp->vcbFreeExt);
+
+ bzero (freeExt, bytes);
+
+ lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+
+ if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0);
+
+ return;
+}
+
+/*
+ * This function is used to inform the allocator if we have to effectively shrink
+ * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs.
+ *
+ * The bitmap lock must be held when calling this function. This function also modifies the
+ * allocLimit field in the hfs mount point structure in the general case.
+ *
+ * In the shrinking case, we'll have to remove all free extents from the red-black
+ * tree past the specified offset new_end_block. In the growth case, we'll have to force
+ * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
+ *
+ * new_end_block represents the total number of blocks available for allocation in the resized
+ * filesystem. Block #new_end_block should not be allocatable in the resized filesystem since it
+ * will be out of the (0, n-1) range that are indexable in the bitmap.
+ *
+ * Returns 0 on success
+ * errno on failure
+ */
+__private_extern__
+u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
+
+ /*
+ * Update allocLimit to the argument specified
+ */
+ hfsmp->allocLimit = new_end_block;
+
+ /* Invalidate the free extent cache completely so that
+ * it does not have any extents beyond end of current
+ * volume.
+ */
+ ResetVCBFreeExtCache(hfsmp);
+
+ /* Force a rebuild of the summary table. */
+ (void) hfs_rebuild_summary (hfsmp);
+
+ // Delete any tentative ranges that are in the area we're shrinking
+ struct rl_entry *range, *next_range;
+ TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
+ rl_link, next_range) {
+ if (rl_overlap(range, new_end_block, RL_INFINITY) != RL_NOOVERLAP)
+ hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
+ }
+
+ return 0;
+}
+
+/*
+ * Remove an extent from the list of free extents.
+ *
+ * This is a low-level routine. It does not handle overlaps or splitting;
+ * that is the responsibility of the caller. The input extent must exactly
+ * match an extent already in the list; it will be removed, and any following
+ * extents in the list will be shifted up.
+ *
+ * Inputs:
+ * startBlock - Start of extent to remove
+ * blockCount - Number of blocks in extent to remove
+ *
+ * Result:
+ * The index of the extent that was removed.
+ */
+static void remove_free_extent_list(struct hfsmount *hfsmp, int index)
+{
+ if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) {
+ if (ALLOC_DEBUG)
+ panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
+ else
+ printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
+ return;
+ }
+ int shift_count = hfsmp->vcbFreeExtCnt - index - 1;
+ if (shift_count > 0) {
+ memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
+ }
+ hfsmp->vcbFreeExtCnt--;
+}
+
+
+/*
+ * Add an extent to the list of free extents.
+ *
+ * This is a low-level routine. It does not handle overlaps or coalescing;
+ * that is the responsibility of the caller. This routine *does* make
+ * sure that the extent it is adding is inserted in the correct location.
+ * If the list is full, this routine will handle either removing the last
+ * extent in the list to make room for the new extent, or ignoring the
+ * new extent if it is "worse" than the last extent in the list.
+ *
+ * Inputs:
+ * startBlock - Start of extent to add
+ * blockCount - Number of blocks in extent to add
+ *
+ * Result:
+ * The index where the extent that was inserted, or kMaxFreeExtents
+ * if the extent was not inserted (the list was full, and the extent
+ * being added was "worse" than everything in the list).
+ */
+static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
+{
+ uint32_t i;
+
+ /* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */
+ if (ALLOC_DEBUG) {
+ uint32_t endBlock = startBlock + blockCount;
+ for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
+ if (endBlock < hfsmp->vcbFreeExt[i].startBlock ||
+ startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) {
+ continue;
}
-
- // Readjust currentWord and wordsLeft
- currentWord = buffer;
- wordsLeft = wordsPerBlock;
+ panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d",
+ hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i);
}
-#if DEBUG_BUILD
- if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
- panic("BlockMarkAllocated: blocks already allocated!");
+ }
+
+ /* Figure out what index the new extent should be inserted at. */
+ for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
+ if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+ /* The list is sorted by increasing offset. */
+ if (startBlock < hfsmp->vcbFreeExt[i].startBlock) {
+ break;
+ }
+ } else {
+ /* The list is sorted by decreasing size. */
+ if (blockCount > hfsmp->vcbFreeExt[i].blockCount) {
+ break;
+ }
+ }
+ }
+
+ /* When we get here, i is the index where the extent should be inserted. */
+ if (i == kMaxFreeExtents) {
+ /*
+ * The new extent is worse than anything already in the list,
+ * and the list is full, so just ignore the extent to be added.
+ */
+ return i;
+ }
+
+ /*
+ * Grow the list (if possible) to make room for an insert.
+ */
+ if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents)
+ hfsmp->vcbFreeExtCnt++;
+
+ /*
+ * If we'll be keeping any extents after the insert position, then shift them.
+ */
+ int shift_count = hfsmp->vcbFreeExtCnt - i - 1;
+ if (shift_count > 0) {
+ memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
+ }
+
+ /* Finally, store the new extent at its correct position. */
+ hfsmp->vcbFreeExt[i].startBlock = startBlock;
+ hfsmp->vcbFreeExt[i].blockCount = blockCount;
+ return i;
+}
+
+
+/*
+ * Remove an entry from free extent cache after it has been allocated.
+ *
+ * This is a high-level routine. It handles removing a portion of a
+ * cached extent, potentially splitting it into two (if the cache was
+ * already full, throwing away the extent that would sort last). It
+ * also handles removing an extent that overlaps multiple extents in
+ * the cache.
+ *
+ * Inputs:
+ * hfsmp - mount point structure
+ * startBlock - starting block of the extent to be removed.
+ * blockCount - number of blocks of the extent to be removed.
+ */
+static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
+{
+ u_int32_t i, insertedIndex;
+ u_int32_t currentStart, currentEnd, endBlock;
+ int extentsRemoved = 0;
+
+ if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
+
+ endBlock = startBlock + blockCount;
+
+ lck_spin_lock(&hfsmp->vcbFreeExtLock);
+
+ /*
+ * Iterate over all of the extents in the free extent cache, removing or
+ * updating any entries that overlap with the input extent.
+ */
+ for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
+ currentStart = hfsmp->vcbFreeExt[i].startBlock;
+ currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount;
+
+ /*
+ * If the current extent is entirely before or entirely after the
+ * the extent to be removed, then we keep it as-is.
+ */
+ if (currentEnd <= startBlock || currentStart >= endBlock) {
+ continue;
+ }
+
+ /*
+ * If the extent being removed entirely contains the current extent,
+ * then remove the current extent.
+ */
+ if (startBlock <= currentStart && endBlock >= currentEnd) {
+ remove_free_extent_list(hfsmp, i);
+
+ /*
+ * We just removed the extent at index i. The extent at
+ * index i+1 just got shifted to index i. So decrement i
+ * to undo the loop's "++i", and the next iteration will
+ * examine index i again, which contains the next extent
+ * in the list.
+ */
+ --i;
+ ++extentsRemoved;
+ continue;
+ }
+
+ /*
+ * If the extent being removed is strictly "in the middle" of the
+ * current extent, then we need to split the current extent into
+ * two discontiguous extents (the "head" and "tail"). The good
+ * news is that we don't need to examine any other extents in
+ * the list.
+ */
+ if (startBlock > currentStart && endBlock < currentEnd) {
+ remove_free_extent_list(hfsmp, i);
+ add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
+ add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
+ break;
+ }
+
+ /*
+ * The only remaining possibility is that the extent to be removed
+ * overlaps the start or end (but not both!) of the current extent.
+ * So we need to replace the current extent with a shorter one.
+ *
+ * The only tricky part is that the updated extent might be at a
+ * different index than the original extent. If the updated extent
+ * was inserted after the current extent, then we need to re-examine
+ * the entry at index i, since it now contains the extent that was
+ * previously at index i+1. If the updated extent was inserted
+ * before or at the same index as the removed extent, then the
+ * following extents haven't changed position.
+ */
+ remove_free_extent_list(hfsmp, i);
+ if (startBlock > currentStart) {
+ /* Remove the tail of the current extent. */
+ insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
+ } else {
+ /* Remove the head of the current extent. */
+ insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
+ }
+ if (insertedIndex > i) {
+ --i; /* Undo the "++i" in the loop, so we examine the entry at index i again. */
+ }
+ }
+
+ lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+
+ sanity_check_free_ext(hfsmp, 0);
+
+ if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);
+
+ return;
+}
+
+
+/*
+ * Add an entry to free extent cache after it has been deallocated.
+ *
+ * This is a high-level routine. It will merge overlapping or contiguous
+ * extents into a single, larger extent.
+ *
+ * If the extent provided has blocks beyond current allocLimit, it is
+ * clipped to allocLimit (so that we won't accidentally find and allocate
+ * space beyond allocLimit).
+ *
+ * Inputs:
+ * hfsmp - mount point structure
+ * startBlock - starting block of the extent to be removed.
+ * blockCount - number of blocks of the extent to be removed.
+ *
+ * Returns:
+ * true - if the extent was added successfully to the list
+ * false - if the extent was not added to the list, maybe because
+ * the extent was beyond allocLimit, or is not best
+ * candidate to be put in the cache.
+ */
+static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
+{
+ Boolean retval = false;
+ uint32_t endBlock;
+ uint32_t currentEnd;
+ uint32_t i;
+
+ if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
+
+#if DEBUG
+ for (i = 0; i < 2; ++i) {
+ struct rl_entry *range;
+ TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
+ assert(rl_overlap(range, startBlock,
+ startBlock + blockCount - 1) == RL_NOOVERLAP);
}
+ }
#endif
- *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
- // No need to update currentWord or wordsLeft
+ /* No need to add extent that is beyond current allocLimit */
+ if (startBlock >= hfsmp->allocLimit) {
+ goto out_not_locked;
+ }
+
+ /* If end of the free extent is beyond current allocLimit, clip the extent */
+ if ((startBlock + blockCount) > hfsmp->allocLimit) {
+ blockCount = hfsmp->allocLimit - startBlock;
+ }
+
+ lck_spin_lock(&hfsmp->vcbFreeExtLock);
+
+ /*
+ * Make a pass through the free extent cache, looking for known extents that
+ * overlap or are contiguous with the extent to be added. We'll remove those
+ * extents from the cache, and incorporate them into the new extent to be added.
+ */
+ endBlock = startBlock + blockCount;
+ for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) {
+ currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount;
+ if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) {
+ /* Extent i does not overlap and is not contiguous, so keep it. */
+ continue;
+ } else {
+ /* We need to remove extent i and combine it with the input extent. */
+ if (hfsmp->vcbFreeExt[i].startBlock < startBlock)
+ startBlock = hfsmp->vcbFreeExt[i].startBlock;
+ if (currentEnd > endBlock)
+ endBlock = currentEnd;
+
+ remove_free_extent_list(hfsmp, i);
+ /*
+ * We just removed the extent at index i. The extent at
+ * index i+1 just got shifted to index i. So decrement i
+ * to undo the loop's "++i", and the next iteration will
+ * examine index i again, which contains the next extent
+ * in the list.
+ */
+ --i;
+ }
+ }
+ add_free_extent_list(hfsmp, startBlock, endBlock - startBlock);
+
+ lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+
+out_not_locked:
+ sanity_check_free_ext(hfsmp, 0);
+
+ if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+ KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);
+
+ return retval;
+}
+
+/* Debug function to check if the free extent cache is good or not */
+static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
+{
+ u_int32_t i, j;
+
+ /* Do not do anything if debug is not on */
+ if (ALLOC_DEBUG == 0) {
+ return;
}
-Exit:
+ lck_spin_lock(&hfsmp->vcbFreeExtLock);
- if (buffer)
- (void)ReleaseBitmapBlock(vcb, blockRef, true);
+ if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents)
+ panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt);
- return err;
-}
+ /*
+ * Iterate the Free extent cache and ensure no entries are bogus or refer to
+ * allocated blocks.
+ */
+ for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
+ u_int32_t start, nblocks;
+
+ start = hfsmp->vcbFreeExt[i].startBlock;
+ nblocks = hfsmp->vcbFreeExt[i].blockCount;
+
+ /* Check if any of the blocks in free extent cache are allocated.
+ * This should not be enabled always because it might take
+ * very long for large extents that get added to the list.
+ *
+ * We have to drop vcbFreeExtLock while we call hfs_isallocated
+ * because it is going to do I/O. Note that the free extent
+ * cache could change. That's a risk we take when using this
+ * debugging code. (Another alternative would be to try to
+ * detect when the free extent cache changed, and perhaps
+ * restart if the list changed while we dropped the lock.)
+ */
+ if (check_allocated) {
+ lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+ if (hfs_isallocated(hfsmp, start, nblocks)) {
+ panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
+ hfsmp, i, start, nblocks);
+ }
+ lck_spin_lock(&hfsmp->vcbFreeExtLock);
+ }
+ /* Check if any part of the extent is beyond allocLimit */
+ if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
+ panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
+ hfsmp, i, start, nblocks, hfsmp->allocLimit);
+ }
-/*
-_______________________________________________________________________
+ /* Check if there are any duplicate start blocks */
+ for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
+ if (start == hfsmp->vcbFreeExt[j].startBlock) {
+ panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n",
+ hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock,
+ hfsmp->vcbFreeExt[j].blockCount);
+ }
+ }
-Routine: BlockMarkFree
+ /* Check if the entries are out of order */
+ if ((i+1) != hfsmp->vcbFreeExtCnt) {
+ if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+ /* sparse devices are sorted by starting block number (ascending) */
+ if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) {
+ panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n",
+ hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
+ hfsmp->vcbFreeExt[i+1].blockCount);
+ }
+ } else {
+ /* normally sorted by block count (descending) */
+ if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
+ panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n",
+ hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
+ hfsmp->vcbFreeExt[i+1].blockCount);
+ }
+ }
+ }
+ }
+ lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+}
-Function: Mark a contiguous group of blocks as free (clear in the
- bitmap). It assumes those bits are currently marked
- allocated (set in the bitmap).
+#define BIT_RIGHT_MASK(bit) (0xffffffffffffffffull >> (bit))
+#define kHighBitInDoubleWordMask 0x8000000000000000ull
-Inputs:
- vcb Pointer to volume where space is to be freed
- startingBlock First block number to mark as freed
- numBlocks Number of blocks to mark as freed
-_______________________________________________________________________
-*/
-__private_extern__
-OSErr BlockMarkFree(
- ExtendedVCB *vcb,
- UInt32 startingBlock,
- register UInt32 numBlocks)
+static int clzll(uint64_t x)
{
- OSErr err;
- register UInt32 *currentWord; // Pointer to current word within bitmap block
- register UInt32 wordsLeft; // Number of words left in this bitmap block
- register UInt32 bitMask; // Word with given bits already set (ready to OR in)
- UInt32 firstBit; // Bit index within word of first bit to allocate
- UInt32 numBits; // Number of bits in word to allocate
- UInt32 *buffer = NULL;
- UInt32 blockRef;
- UInt32 bitsPerBlock;
- UInt32 wordsPerBlock;
- // XXXdbg
- struct hfsmount *hfsmp = VCBTOHFS(vcb);
+ if (x == 0)
+ return 64;
+ else
+ return __builtin_clzll(x);
+}
- if (startingBlock + numBlocks > vcb->totalBlocks) {
- panic("hfs: block mark free: trying to free non-existent blocks (%d %d %d)\n",
- startingBlock, numBlocks, vcb->totalBlocks);
- }
+#if !HFS_ALLOC_TEST
+static errno_t get_more_bits(bitmap_context_t *bitmap_ctx)
+{
+ uint32_t start_bit;
+ uint32_t iosize = 0;
+ uint32_t byte_offset;
+ uint32_t last_bitmap_block;
+ int error;
+ struct hfsmount *hfsmp = bitmap_ctx->hfsmp;
+#if !HFS_ALLOC_TEST
+ uint64_t lock_elapsed;
+#endif
- //
- // Pre-read the bitmap block containing the first word of allocation
- //
- err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
- if (err != noErr) goto Exit;
- // XXXdbg
- if (hfsmp->jnl) {
- journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+ if (bitmap_ctx->bp)
+ ReleaseScanBitmapRange(bitmap_ctx->bp);
+
+ if (msleep(NULL, NULL, PINOD | PCATCH,
+ "hfs_fsinfo", NULL) == EINTR) {
+ return EINTR;
}
- //
- // Initialize currentWord, and wordsLeft.
- //
- {
- UInt32 wordIndexInBlock;
+#if !HFS_ALLOC_TEST
+ /*
+ * Let someone else use the allocation map after we've processed over HFS_FSINFO_MAX_LOCKHELD_TIME .
+ * lock_start is initialized in hfs_find_free_extents().
+ */
+ absolutetime_to_nanoseconds(mach_absolute_time() - bitmap_ctx->lock_start, &lock_elapsed);
+
+ if (lock_elapsed >= HFS_FSINFO_MAX_LOCKHELD_TIME) {
+
+ hfs_systemfile_unlock(hfsmp, bitmap_ctx->lockflags);
- bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
- wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+ /* add tsleep here to force context switch and fairness */
+ tsleep((caddr_t)get_more_bits, PRIBIO, "hfs_fsinfo", 1);
- wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
- currentWord = buffer + wordIndexInBlock;
- wordsLeft = wordsPerBlock - wordIndexInBlock;
- }
-
- //
- // If the first block to free doesn't start on a word
- // boundary in the bitmap, then treat that first word
- // specially.
- //
+ hfs_journal_lock(hfsmp);
- firstBit = startingBlock % kBitsPerWord;
- if (firstBit != 0) {
- bitMask = kAllBitsSetInWord >> firstBit; // turn off all bits before firstBit
- numBits = kBitsPerWord - firstBit; // number of remaining bits in this word
- if (numBits > numBlocks) {
- numBits = numBlocks; // entire allocation is inside this one word
- bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); // turn off bits after last
- }
- if ((*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
- goto Corruption;
+ /* Flush the journal and wait for all I/Os to finish up */
+ error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+ if (error) {
+ hfs_journal_unlock(hfsmp);
+ return error;
}
- *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
- numBlocks -= numBits; // adjust number of blocks left to free
- ++currentWord; // move to next word
- --wordsLeft; // one less word left in this block
- }
+ /*
+ * Take bitmap lock to ensure it is not being modified while journal is still held.
+ * Since we are reading larger than normal blocks from the bitmap, which
+ * might confuse other parts of the bitmap code using normal blocks, we
+ * take exclusive lock here.
+ */
+ bitmap_ctx->lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
- //
- // Free whole words (32 blocks) at a time.
- //
+ bitmap_ctx->lock_start = mach_absolute_time();
- while (numBlocks >= kBitsPerWord) {
- if (wordsLeft == 0) {
- // Read in the next bitmap block
- startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
-
- buffer = NULL;
- err = ReleaseBitmapBlock(vcb, blockRef, true);
- if (err != noErr) goto Exit;
+ /* Release the journal lock */
+ hfs_journal_unlock(hfsmp);
- err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
- if (err != noErr) goto Exit;
+ /*
+ * Bitmap is read in large block size (up to 1MB),
+ * unlike the runtime which reads the bitmap in the
+ * 4K block size. If the bitmap is read by both ways
+ * at the same time, it can result in multiple buf_t with
+ * different sizes and potentially case data corruption.
+ * To avoid this, we invalidate all the existing buffers
+ * associated with the bitmap vnode.
+ */
+ error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
+ if (error) {
+ /* hfs_systemfile_unlock will be called in the caller */
+ return error;
+ }
+ }
+#endif
- // XXXdbg
- if (hfsmp->jnl) {
- journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
- }
+ start_bit = bitmap_ctx->run_offset;
- // Readjust currentWord and wordsLeft
- currentWord = buffer;
- wordsLeft = wordsPerBlock;
- }
- if (*currentWord != SWAP_BE32 (kAllBitsSetInWord)) {
- goto Corruption;
- }
- *currentWord = 0; // clear the entire word
- numBlocks -= kBitsPerWord;
-
- ++currentWord; // move to next word
- --wordsLeft; // one less word left in this block
+ if (start_bit >= bitmap_ctx->hfsmp->totalBlocks) {
+ bitmap_ctx->chunk_end = 0;
+ bitmap_ctx->bp = NULL;
+ bitmap_ctx->bitmap = NULL;
+ return 0;
}
-
- //
- // Free any remaining blocks.
- //
-
- if (numBlocks != 0) {
- bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
- if (wordsLeft == 0) {
- // Read in the next bitmap block
- startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
-
- buffer = NULL;
- err = ReleaseBitmapBlock(vcb, blockRef, true);
- if (err != noErr) goto Exit;
- err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
- if (err != noErr) goto Exit;
+ assert(start_bit % 8 == 0);
- // XXXdbg
- if (hfsmp->jnl) {
- journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
- }
-
- // Readjust currentWord and wordsLeft
- currentWord = buffer;
- wordsLeft = wordsPerBlock;
- }
- if ((*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
- goto Corruption;
- }
- *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
+ /*
+ * Compute how much I/O we should generate here.
+ * hfs_scan_range_size will validate that the start bit
+ * converted into a byte offset into the bitmap file,
+ * is aligned on a VBMIOSize boundary.
+ */
+ error = hfs_scan_range_size (bitmap_ctx->hfsmp, start_bit, &iosize);
+ if (error)
+ return error;
- // No need to update currentWord or wordsLeft
- }
+ assert(iosize != 0);
-Exit:
+ /* hfs_scan_range_size should have verified startbit. Convert it to bytes */
+ byte_offset = start_bit / kBitsPerByte;
- if (buffer)
- (void)ReleaseBitmapBlock(vcb, blockRef, true);
+ /*
+ * When the journal replays blocks, it does so by writing directly to the disk
+ * device (bypassing any filesystem vnodes and such). When it finishes its I/Os
+ * it also immediately re-reads and invalidates the range covered by the bp so
+ * it does not leave anything lingering in the cache (for iosize reasons).
+ *
+ * As such, it is safe to do large I/Os here with ReadBitmapRange.
+ *
+ * NOTE: It is not recommended, but it is possible to call the function below
+ * on sections of the bitmap that may be in core already as long as the pages are not
+ * dirty. In that case, we'd notice that something starting at that
+ * logical block of the bitmap exists in the metadata cache, and we'd check
+ * if the iosize requested is the same as what was already allocated for it.
+ * Odds are pretty good we're going to request something larger. In that case,
+ * we just free the existing memory associated with the buf and reallocate a
+ * larger range. This function should immediately invalidate it as soon as we're
+ * done scanning, so this shouldn't cause any coherency issues.
+ */
+ error = ReadBitmapRange(bitmap_ctx->hfsmp, byte_offset, iosize, (uint32_t **)&bitmap_ctx->bitmap, &bitmap_ctx->bp);
+ if (error)
+ return error;
- return err;
+ /*
+ * At this point, we have a giant wired buffer that represents some portion of
+ * the bitmap file that we want to analyze. We may not have gotten all 'iosize'
+ * bytes though, so clip our ending bit to what we actually read in.
+ */
+ last_bitmap_block = start_bit + buf_count(bitmap_ctx->bp) * kBitsPerByte;
-Corruption:
-#if DEBUG_BUILD
- panic("BlockMarkFree: blocks not allocated!");
-#else
- printf("hfs: WARNING - blocks on volume %s not allocated!\n", vcb->vcbVN);
- vcb->vcbAtrb |= kHFSVolumeInconsistentMask;
- MarkVCBDirty(vcb);
- err = EIO;
- goto Exit;
-#endif
+ /* Cap the last block to the total number of blocks if required */
+ if (last_bitmap_block > bitmap_ctx->hfsmp->totalBlocks)
+ last_bitmap_block = bitmap_ctx->hfsmp->totalBlocks;
+
+ bitmap_ctx->chunk_current = 0; // new chunk of bitmap
+ bitmap_ctx->chunk_end = last_bitmap_block - start_bit;
+
+ return 0;
}
+#endif // !HFS_ALLOC_TEST
-/*
-_______________________________________________________________________
+// Returns number of contiguous bits set at start
+static int bit_count_set(void *bitmap, int start, int end)
+{
+ if (start == end)
+ return 0;
-Routine: BlockFindContiguous
+ assert(end > start);
-Function: Find a contiguous range of blocks that are free (bits
- clear in the bitmap). If a contiguous range of the
- minimum size can't be found, an error will be returned.
+ const int start_bit = start & 63;
+ const int end_bit = end & 63;
-Inputs:
- vcb Pointer to volume where space is to be allocated
- startingBlock Preferred first block of range
- endingBlock Last possible block in range + 1
- minBlocks Minimum number of blocks needed. Must be > 0.
- maxBlocks Maximum (ideal) number of blocks desired
- useMetaZone OK to dip into metadata allocation zone
+ uint64_t *p = (uint64_t *)bitmap + start / 64;
+ uint64_t x = ~OSSwapBigToHostInt64(*p);
-Outputs:
- actualStartBlock First block of range found, or 0 if error
- actualNumBlocks Number of blocks found, or 0 if error
+ if ((start & ~63) == (end & ~63)) {
+ // Start and end in same 64 bits
+ x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit);
+ return clzll(x) - start_bit;
+ }
-Returns:
- noErr Found at least minBlocks contiguous
- dskFulErr No contiguous space found, or all less than minBlocks
-_______________________________________________________________________
-*/
+ // Deal with initial unaligned bit
+ x &= BIT_RIGHT_MASK(start_bit);
-static OSErr BlockFindContiguous(
- ExtendedVCB *vcb,
- UInt32 startingBlock,
- UInt32 endingBlock,
- UInt32 minBlocks,
- UInt32 maxBlocks,
- Boolean useMetaZone,
- UInt32 *actualStartBlock,
- UInt32 *actualNumBlocks)
-{
- OSErr err;
- register UInt32 currentBlock; // Block we're currently looking at.
- UInt32 firstBlock; // First free block in current extent.
- UInt32 stopBlock; // If we get to this block, stop searching for first free block.
- UInt32 foundBlocks; // Number of contiguous free blocks in current extent.
- UInt32 *buffer = NULL;
- register UInt32 *currentWord;
- register UInt32 bitMask;
- register UInt32 wordsLeft;
- register UInt32 tempWord;
- UInt32 blockRef;
- UInt32 wordsPerBlock;
-
- if (!useMetaZone) {
- struct hfsmount *hfsmp = VCBTOHFS(vcb);
+ if (x)
+ return clzll(x) - start_bit;
-
- if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
- (startingBlock <= hfsmp->hfs_metazone_end))
- startingBlock = hfsmp->hfs_metazone_end + 1;
+ // Go fast
+ ++p;
+ int count = 64 - start_bit;
+ int nquads = (end - end_bit - start - 1) / 64;
+
+ while (nquads--) {
+ if (*p != 0xffffffffffffffffull) {
+ x = ~OSSwapBigToHostInt64(*p);
+ return count + clzll(x);
+ }
+ ++p;
+ count += 64;
}
- if ((endingBlock - startingBlock) < minBlocks)
- {
- // The set of blocks we're checking is smaller than the minimum number
- // of blocks, so we couldn't possibly find a good range.
- goto DiskFull;
+ if (end_bit) {
+ x = ~OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit);
+ count += clzll(x);
}
- stopBlock = endingBlock - minBlocks + 1;
- currentBlock = startingBlock;
- firstBlock = 0;
+ return count;
+}
- /*
- * Skip over metadata blocks.
- */
- if (!useMetaZone)
- currentBlock = NextBitmapBlock(vcb, currentBlock);
+/* Returns the number of a run of cleared bits:
+ * bitmap is a single chunk of memory being examined
+ * start: the start bit relative to the current buffer to be examined; start is inclusive.
+ * end: the end bit relative to the current buffer to be examined; end is not inclusive.
+ */
+static int bit_count_clr(void *bitmap, int start, int end)
+{
+ if (start == end)
+ return 0;
- //
- // Pre-read the first bitmap block.
- //
- err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
- if ( err != noErr ) goto ErrorExit;
+ assert(end > start);
- //
- // Figure out where currentBlock is within the buffer.
- //
- wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+ const int start_bit = start & 63;
+ const int end_bit = end & 63;
- wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1); // Current index into buffer
- currentWord = buffer + wordsLeft;
- wordsLeft = wordsPerBlock - wordsLeft;
-
- do
- {
- foundBlocks = 0;
-
- //============================================================
- // Look for a free block, skipping over allocated blocks.
- //============================================================
+ uint64_t *p = (uint64_t *)bitmap + start / 64;
+ uint64_t x = OSSwapBigToHostInt64(*p);
- //
- // Check an initial partial word (if any)
- //
- bitMask = currentBlock & kBitsWithinWordMask;
- if (bitMask)
- {
- tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
- bitMask = kHighBitInWordMask >> bitMask;
- while (tempWord & bitMask)
- {
- bitMask >>= 1;
- ++currentBlock;
- }
+ if ((start & ~63) == (end & ~63)) {
+ // Start and end in same 64 bits
+ x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit);
- // Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)?
- if (bitMask)
- goto FoundUnused;
+ return clzll(x) - start_bit;
+ }
+
+ // Deal with initial unaligned bit
+ x &= BIT_RIGHT_MASK(start_bit);
+
+ if (x)
+ return clzll(x) - start_bit;
+
+ // Go fast
+ ++p;
+ int count = 64 - start_bit;
+ int nquads = (end - end_bit - start - 1) / 64;
+
+ while (nquads--) {
+ if (*p) {
+ x = OSSwapBigToHostInt64(*p);
+ return count + clzll(x);
+ }
+ ++p;
+ count += 64;
+ }
+
+ if (end_bit) {
+ x = OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit);
+
+ count += clzll(x);
+ }
+
+ return count;
+}
+
+#if !HFS_ALLOC_TEST
+static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set)
+{
+ uint32_t end, start_summary_bit, end_summary_bit;
+ errno_t error = 0;
+
+ if (count == 0)
+ goto out;
- // Didn't find any unused bits, so we're done with this word.
- ++currentWord;
- --wordsLeft;
- }
+ if (!ISSET(bitmap_ctx->hfsmp->hfs_flags, HFS_SUMMARY_TABLE))
+ return 0;
- //
- // Check whole words
- //
- while (currentBlock < stopBlock)
- {
- // See if it's time to read another block.
- if (wordsLeft == 0)
- {
- buffer = NULL;
- err = ReleaseBitmapBlock(vcb, blockRef, false);
- if (err != noErr) goto ErrorExit;
+ if (hfs_get_summary_index (bitmap_ctx->hfsmp, start, &start_summary_bit)) {
+ error = EINVAL;
+ goto out;
+ }
- /*
- * Skip over metadata blocks.
- */
- if (!useMetaZone) {
- currentBlock = NextBitmapBlock(vcb, currentBlock);
- if (currentBlock >= stopBlock) {
- goto LoopExit;
- }
- }
+ end = start + count - 1;
+ if (hfs_get_summary_index (bitmap_ctx->hfsmp, end, &end_summary_bit)) {
+ error = EINVAL;
+ goto out;
+ }
- err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
- if ( err != noErr ) goto ErrorExit;
-
- currentWord = buffer;
- wordsLeft = wordsPerBlock;
- }
-
- // See if any of the bits are clear
- if ((tempWord = SWAP_BE32(*currentWord)) + 1) // non-zero if any bits were clear
- {
- // Figure out which bit is clear
- bitMask = kHighBitInWordMask;
- while (tempWord & bitMask)
- {
- bitMask >>= 1;
- ++currentBlock;
- }
-
- break; // Found the free bit; break out to FoundUnused.
- }
+ // if summary table bit has been updated with free block previously, leave it.
+ if ((start_summary_bit == bitmap_ctx->last_free_summary_bit) && set)
+ start_summary_bit++;
- // Keep looking at the next word
- currentBlock += kBitsPerWord;
- ++currentWord;
- --wordsLeft;
- }
+ for (uint32_t summary_bit = start_summary_bit; summary_bit <= end_summary_bit; summary_bit++)
+ hfs_set_summary (bitmap_ctx->hfsmp, summary_bit, set);
-FoundUnused:
- // Make sure the unused bit is early enough to use
- if (currentBlock >= stopBlock)
- {
- break;
- }
+ if (!set)
+ bitmap_ctx->last_free_summary_bit = end_summary_bit;
- // Remember the start of the extent
- firstBlock = currentBlock;
+out:
+ return error;
- //============================================================
- // Count the number of contiguous free blocks.
- //============================================================
+}
+#endif //!HFS_ALLOC_TEST
- //
- // Check an initial partial word (if any)
- //
- bitMask = currentBlock & kBitsWithinWordMask;
- if (bitMask)
- {
- tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
- bitMask = kHighBitInWordMask >> bitMask;
- while (bitMask && !(tempWord & bitMask))
- {
- bitMask >>= 1;
- ++currentBlock;
- }
+/*
+ * Read in chunks of the bitmap into memory, and find a run of cleared/set bits;
+ * the run can extend across chunk boundaries.
+ * bit_count_clr can be passed to get a run of cleared bits.
+ * bit_count_set can be passed to get a run of set bits.
+ */
+static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count)
+{
+ int count;
+ errno_t error = 0;
- // Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)?
- if (bitMask)
- goto FoundUsed;
+ *bit_count = 0;
- // Didn't find any used bits, so we're done with this word.
- ++currentWord;
- --wordsLeft;
+ do {
+ if (bitmap_ctx->run_offset == 0 || bitmap_ctx->chunk_current == bitmap_ctx->chunk_end) {
+ if ((error = get_more_bits(bitmap_ctx)) != 0)
+ goto out;
}
-
- //
- // Check whole words
- //
- while (currentBlock < endingBlock)
- {
- // See if it's time to read another block.
- if (wordsLeft == 0)
- {
- buffer = NULL;
- err = ReleaseBitmapBlock(vcb, blockRef, false);
- if (err != noErr) goto ErrorExit;
- /*
- * Skip over metadata blocks.
- */
- if (!useMetaZone) {
- UInt32 nextBlock;
+ if (bitmap_ctx->chunk_end == 0)
+ break;
- nextBlock = NextBitmapBlock(vcb, currentBlock);
- if (nextBlock != currentBlock) {
- goto LoopExit; /* allocation gap, so stop */
- }
- }
+ count = fn(bitmap_ctx->bitmap, bitmap_ctx->chunk_current, bitmap_ctx->chunk_end);
- err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
- if ( err != noErr ) goto ErrorExit;
-
- currentWord = buffer;
- wordsLeft = wordsPerBlock;
- }
-
- // See if any of the bits are set
- if ((tempWord = SWAP_BE32(*currentWord)) != 0)
- {
- // Figure out which bit is set
- bitMask = kHighBitInWordMask;
- while (!(tempWord & bitMask))
- {
- bitMask >>= 1;
- ++currentBlock;
- }
-
- break; // Found the used bit; break out to FoundUsed.
- }
+ bitmap_ctx->run_offset += count;
+ bitmap_ctx->chunk_current += count;
+ *bit_count += count;
- // Keep looking at the next word
- currentBlock += kBitsPerWord;
- ++currentWord;
- --wordsLeft;
-
- // If we found at least maxBlocks, we can quit early.
- if ((currentBlock - firstBlock) >= maxBlocks)
- break;
- }
+ } while (bitmap_ctx->chunk_current >= bitmap_ctx->chunk_end && count);
-FoundUsed:
- // Make sure we didn't run out of bitmap looking for a used block.
- // If so, pin to the end of the bitmap.
- if (currentBlock > endingBlock)
- currentBlock = endingBlock;
+out:
+ return error;
- // Figure out how many contiguous free blocks there were.
- // Pin the answer to maxBlocks.
- foundBlocks = currentBlock - firstBlock;
- if (foundBlocks > maxBlocks)
- foundBlocks = maxBlocks;
- if (foundBlocks >= minBlocks)
- break; // Found what we needed!
+}
- // This free chunk wasn't big enough. Try inserting it into the free extent cache in case
- // the allocation wasn't forced contiguous.
- tempWord = vcb->vcbFreeExtCnt;
- if (tempWord == kMaxFreeExtents && vcb->vcbFreeExt[kMaxFreeExtents-1].blockCount < foundBlocks)
- --tempWord;
- if (tempWord < kMaxFreeExtents)
- {
- // We're going to add this extent. Bubble any smaller extents down in the list.
- while (tempWord && vcb->vcbFreeExt[tempWord-1].blockCount < foundBlocks)
- {
- vcb->vcbFreeExt[tempWord] = vcb->vcbFreeExt[tempWord-1];
- --tempWord;
- }
- vcb->vcbFreeExt[tempWord].startBlock = firstBlock;
- vcb->vcbFreeExt[tempWord].blockCount = foundBlocks;
-
- if (vcb->vcbFreeExtCnt < kMaxFreeExtents)
- ++vcb->vcbFreeExtCnt;
- }
- } while (currentBlock < stopBlock);
-LoopExit:
+// Returns count of number of bits clear
+static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count)
+{
+ return hfs_bit_count(bitmap_ctx, bit_count_clr, count);
+}
- // Return the outputs.
- if (foundBlocks < minBlocks)
- {
-DiskFull:
- err = dskFulErr;
-ErrorExit:
- *actualStartBlock = 0;
- *actualNumBlocks = 0;
- }
- else
- {
- err = noErr;
- *actualStartBlock = firstBlock;
- *actualNumBlocks = foundBlocks;
- }
-
- if (buffer)
- (void) ReleaseBitmapBlock(vcb, blockRef, false);
+// Returns count of number of bits set
+static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count)
+{
+ return hfs_bit_count(bitmap_ctx, bit_count_set, count);
+}
- return err;
+static uint32_t hfs_bit_offset(bitmap_context_t *bitmap_ctx)
+{
+ return bitmap_ctx->run_offset;
}
/*
- * Test to see if any blocks in a range are allocated.
- *
- * The journal or allocation file lock must be held.
+ * Perform a full scan of the bitmap file.
+ * Note: during the scan of bitmap file, it may drop and reacquire the
+ * bitmap lock to let someone else use the bitmap for fairness.
+ * Currently it is used by HFS_GET_FSINFO statistic gathing, which
+ * is run while other processes might perform HFS operations.
*/
-__private_extern__
-int
-hfs_isallocated(struct hfsmount *hfsmp, u_long startingBlock, u_long numBlocks)
+
+errno_t hfs_find_free_extents(struct hfsmount *hfsmp,
+ void (*callback)(void *data, off_t free_extent_size), void *callback_arg)
{
- UInt32 *currentWord; // Pointer to current word within bitmap block
- UInt32 wordsLeft; // Number of words left in this bitmap block
- UInt32 bitMask; // Word with given bits already set (ready to test)
- UInt32 firstBit; // Bit index within word of first bit to allocate
- UInt32 numBits; // Number of bits in word to allocate
- UInt32 *buffer = NULL;
- UInt32 blockRef;
- UInt32 bitsPerBlock;
- UInt32 wordsPerBlock;
- int inuse = 0;
- int error;
+ struct bitmap_context bitmap_ctx;
+ uint32_t count;
+ errno_t error = 0;
+
+ if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
+ error = hfs_init_summary(hfsmp);
+ if (error)
+ return error;
+ }
- /*
- * Pre-read the bitmap block containing the first word of allocation
- */
- error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
- if (error)
- return (error);
+ bzero(&bitmap_ctx, sizeof(struct bitmap_context));
/*
- * Initialize currentWord, and wordsLeft.
+ * The journal maintains list of recently deallocated blocks to
+ * issue DKIOCUNMAPs when the corresponding journal transaction is
+ * flushed to the disk. To avoid any race conditions, we only
+ * want one active trim list. Therefore we make sure that the
+ * journal trim list is sync'ed, empty, and not modifiable for
+ * the duration of our scan.
+ *
+ * Take the journal lock before flushing the journal to the disk.
+ * We will keep on holding the journal lock till we don't get the
+ * bitmap lock to make sure that no new journal transactions can
+ * start. This will make sure that the journal trim list is not
+ * modified after the journal flush and before getting bitmap lock.
+ * We can release the journal lock after we acquire the bitmap
+ * lock as it will prevent any further block deallocations.
*/
- {
- UInt32 wordIndexInBlock;
-
- bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
- wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
+ hfs_journal_lock(hfsmp);
- wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
- currentWord = buffer + wordIndexInBlock;
- wordsLeft = wordsPerBlock - wordIndexInBlock;
+ /* Flush the journal and wait for all I/Os to finish up */
+ error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+ if (error) {
+ hfs_journal_unlock(hfsmp);
+ return error;
}
-
+
/*
- * First test any non word aligned bits.
+ * Take bitmap lock to ensure it is not being modified.
+ * Since we are reading larger than normal blocks from the bitmap, which
+ * might confuse other parts of the bitmap code using normal blocks, we
+ * take exclusive lock here.
*/
- firstBit = startingBlock % kBitsPerWord;
- if (firstBit != 0) {
- bitMask = kAllBitsSetInWord >> firstBit;
- numBits = kBitsPerWord - firstBit;
- if (numBits > numBlocks) {
- numBits = numBlocks;
- bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
- }
- if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
- inuse = 1;
- goto Exit;
- }
- numBlocks -= numBits;
- ++currentWord;
- --wordsLeft;
- }
+ bitmap_ctx.lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+#if !HFS_ALLOC_TEST
+ bitmap_ctx.lock_start = mach_absolute_time();
+#endif
+
+ /* Release the journal lock */
+ hfs_journal_unlock(hfsmp);
/*
- * Test whole words (32 blocks) at a time.
+ * Bitmap is read in large block size (up to 1MB),
+ * unlike the runtime which reads the bitmap in the
+ * 4K block size. If the bitmap is read by both ways
+ * at the same time, it can result in multiple buf_t with
+ * different sizes and potentially case data corruption.
+ * To avoid this, we invalidate all the existing buffers
+ * associated with the bitmap vnode.
*/
- while (numBlocks >= kBitsPerWord) {
- if (wordsLeft == 0) {
- /* Read in the next bitmap block. */
- startingBlock += bitsPerBlock;
-
- buffer = NULL;
- error = ReleaseBitmapBlock(hfsmp, blockRef, false);
- if (error) goto Exit;
-
- error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
- if (error) goto Exit;
+ error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
+ if (error)
+ goto out;
- /* Readjust currentWord and wordsLeft. */
- currentWord = buffer;
- wordsLeft = wordsPerBlock;
- }
- if (*currentWord != 0) {
- inuse = 1;
- goto Exit;
- }
- numBlocks -= kBitsPerWord;
- ++currentWord;
- --wordsLeft;
- }
-
/*
- * Test any remaining blocks.
+ * Get the list of all free extent ranges. hfs_alloc_scan_range()
+ * will call hfs_fsinfo_data_add() to account for all the free
+ * extent ranges found during scan.
*/
- if (numBlocks != 0) {
- bitMask = ~(kAllBitsSetInWord >> numBlocks);
- if (wordsLeft == 0) {
- /* Read in the next bitmap block */
- startingBlock += bitsPerBlock;
-
- buffer = NULL;
- error = ReleaseBitmapBlock(hfsmp, blockRef, false);
- if (error) goto Exit;
+ bitmap_ctx.hfsmp = hfsmp;
+ bitmap_ctx.run_offset = 0;
- error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
- if (error) goto Exit;
+ while (bitmap_ctx.run_offset < hfsmp->totalBlocks) {
- currentWord = buffer;
- wordsLeft = wordsPerBlock;
- }
- if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
- inuse = 1;
- goto Exit;
- }
+ uint32_t start = hfs_bit_offset(&bitmap_ctx);
+
+ if ((error = hfs_bit_count_clr(&bitmap_ctx, &count)) != 0)
+ goto out;
+
+ if (count)
+ callback(callback_arg, hfs_blk_to_bytes(count, hfsmp->blockSize));
+
+ if ((error = update_summary_table(&bitmap_ctx, start, count, false)) != 0)
+ goto out;
+
+ start = hfs_bit_offset(&bitmap_ctx);
+
+ if ((error = hfs_bit_count_set(&bitmap_ctx, &count)) != 0)
+ goto out;
+
+ if ((error = update_summary_table(&bitmap_ctx, start, count, true)) != 0)
+ goto out;
}
-Exit:
- if (buffer) {
- (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
+
+out:
+ if (bitmap_ctx.lockflags) {
+ hfs_systemfile_unlock(hfsmp, bitmap_ctx.lockflags);
}
- return (inuse);
-}
+ return error;
+}