]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/hfs/hfscommon/Misc/VolumeAllocation.c
xnu-3247.10.11.tar.gz
[apple/xnu.git] / bsd / hfs / hfscommon / Misc / VolumeAllocation.c
index 157b7fb57cbfb9fced361950019c5e15cc4c6183..612171809aac16b49c987a6740b0bd531f8768e4 100644 (file)
@@ -1,23 +1,29 @@
 /*
- * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
        File:           VolumeAllocation.c
 
        Version:        HFS Plus 1.0
 
-       Copyright:      © 1996-2001 by Apple Computer, Inc., all rights reserved.
+       Copyright:      � 1996-2009 by Apple Computer, Inc., all rights reserved.
 
 */
 
 /*
 Public routines:
-       BlockAllocate
+       BlockAllocate / hfs_block_alloc
                                        Allocate space on a volume.  Can allocate space contiguously.
                                        If not contiguous, then allocation may be less than what was
                                        asked for.  Returns the starting block number, and number of
-                                       blocks.  (Will only do a single extent???)
+                                       blocks.  It will only return a single extent.
+
        BlockDeallocate
                                        Deallocate a contiguous run of allocation blocks.
-
-
-Internal routines:
+       BlockMarkAllocated
+                                       Exported wrapper to mark blocks as in-use.  This will correctly determine
+                                       whether or not the red-black tree is enabled and call the appropriate function 
+                                       if applicable.
        BlockMarkFree
+                                       Exported wrapper to mark blocks as freed.  This will correctly determine whether or
+                                       not the red-black tree is enabled and call the appropriate function if applicable.
+
+       ResetVCBFreeExtCache
+                                       Since the red-black tree obviates the need to maintain the free extent cache, we do
+                                       not update it if the tree is also live.  As a result, if we ever need to destroy the trees
+                                       we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
+                                       bitmap scanning allocator.
+                                       We also reset and disable the free extent cache when volume resizing is 
+                                       in flight.
+       UpdateAllocLimit 
+                                       Adjusts the AllocLimit field in the hfs mount point.  This is used when we need to prevent
+                                       allocations from occupying space in the region we are modifying during a filesystem resize.  
+                                       At other times, it should be consistent with the total number of allocation blocks in the 
+                                       filesystem.  It is also used to shrink or grow the number of blocks that the red-black tree should
+                                       know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
+                                       number of items in the tree that we can allocate from.
+
+       ScanUnmapBlocks 
+                                       Traverse the entire allocation bitmap.  Potentially issue DKIOCUNMAPs to the device as it 
+                                       tracks unallocated ranges when iterating the volume bitmap.  Additionally, build up the in-core
+                                       summary table of the allocation bitmap.
+Internal routines:
+       BlockMarkFreeInternal
                                        Mark a contiguous range of blocks as free.  The corresponding
-                                       bits in the volume bitmap will be cleared.
-       BlockMarkAllocated
+                                       bits in the volume bitmap will be cleared.  This will actually do the work
+                                       of modifying the bitmap for us.
+                                       
+       BlockMarkAllocatedInternal
                                        Mark a contiguous range of blocks as allocated.  The cor-
                                        responding bits in the volume bitmap are set.  Also tests to see
-                                       if any of the blocks were previously unallocated.
-       FindContiguous
+                                       if any of the blocks were previously unallocated.  
+       BlockFindContiguous
                                        Find a contiguous range of blocks of a given size.  The caller
                                        specifies where to begin the search (by block number).  The
-                                       block number of the first block in the range is returned.
-       BlockAllocateAny
+                                       block number of the first block in the range is returned.  This is only
+                                       called by the bitmap scanning logic as the red-black tree should be able
+                                       to do this internally by searching its tree. 
+       BlockFindAny
                                        Find and allocate a contiguous range of blocks up to a given size.  The
                                        first range of contiguous free blocks found are allocated, even if there
                                        are fewer blocks than requested (and even if a contiguous range of blocks
                                        of the given size exists elsewhere).
-       BlockAllocateContig
-                                       Find and allocate a contiguous range of blocks of a given size.  If
-                                       a contiguous range of free blocks of the given size isn't found, then
-                                       the allocation fails (i.e. it is "all or nothing").
-
-       BlockAllocateKnown
+       BlockFindAnyBitmap
+                                       Finds a range of blocks per the above requirements without using the 
+                                       Allocation RB Tree.  This relies on the bitmap-scanning logic in order to find
+                                       any valid range of free space needed.
+       BlockFindContig
+                                       Find a contiguous range of blocks of a given size.
+                                       If the minimum cannot be satisfied, nothing is
+                                       returned.
+       BlockFindKnown
                                        Try to allocate space from known free space in the volume's
                                        free extent cache.
-
        ReadBitmapBlock
                                        Given an allocation block number, read the bitmap block that
                                        contains that allocation block into a caller-supplied buffer.
 
        ReleaseBitmapBlock
                                        Release a bitmap block back into the buffer cache.
+       
+       ReadBitmapRange
+                                       Given an allocation block number, read a range of bitmap that
+                                       must begin at that allocation block into a caller supplied buffer.
+
+       ReleaseBitmapRange
+                                       Release and invalidate a buf_t corresponding to the bitmap
+                                       back into the UBC in order to prevent coherency issues.
+
+       remove_free_extent_cache
+                                       Remove an extent from the free extent cache.  Handles overlaps
+                                       with multiple extents in the cache, and handles splitting an
+                                       extent in the cache if the extent to be removed is in the middle
+                                       of a cached extent.
+       
+       add_free_extent_cache
+                                       Add an extent to the free extent cache.  It will merge the
+                                       input extent with extents already in the cache.
+       CheckUnmappedBytes
+                                       Check whether or not the current transaction
+                                       has allocated blocks that were recently freed. This may have data safety implications.
+
+
+Debug/Test Routines
+       hfs_isallocated
+                                       Test to see if any blocks in a range are allocated.  Journal or
+                                       allocation file lock must be held.
+       hfs_isallocated_scan
+                                       Test to see if any blocks in a range are allocated.  Releases and
+                                       invalidates the block used when finished.
+        
+Optimization Routines 
+       hfs_alloc_scan_block
+                                       Given a starting allocation block number, figures out which physical block contains that 
+                                       allocation block's bit, and scans it from the starting bit until either the ending bit or
+                                       the end of the block.  Free space extents are inserted into the appropriate red-black tree.
+                                       
 */
 
-#include "../../hfs_macos_defs.h"
 
 #include <sys/types.h>
 #include <sys/buf.h>
-#include <sys/systm.h>
 
+#if !HFS_ALLOC_TEST
+
+#include "../../hfs_macos_defs.h"
+#include <sys/systm.h>
+#include <sys/ubc.h>
+#include <kern/kalloc.h>
+/* For VM Page size */
+#include <libkern/libkern.h>
+#include <vfs/vfs_journal.h>
 #include "../../hfs.h"
+#include "../../hfs_endian.h"
+#include "../headers/FileMgrInternal.h"
+
+#endif // !HFS_ALLOC_TEST
+
+#include <sys/sysctl.h>
+#include <sys/disk.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+
 #include "../../hfs_dbg.h"
 #include "../../hfs_format.h"
-#include "../../hfs_endian.h"
+#include "../../hfs_kdebug.h"
+#include "../../rangelist.h"
+#include "../../hfs_extents.h"
 
-#include "../headers/FileMgrInternal.h"
+/* Headers for unmap-on-mount support */
+#include <sys/disk.h>
 
+#ifndef CONFIG_HFS_TRIM
+#define CONFIG_HFS_TRIM 0
+#endif
+
+/*
+ * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
+ * KERNEL_DEBUG_CONSTANT events are enabled at runtime.  (They're
+ * disabled by default because there can be a lot of these events,
+ * and we don't want to overwhelm the kernel debug buffer.  If you
+ * want to watch these events in particular, just set the sysctl.)
+ */
+static int hfs_kdebug_allocation = 0;
+SYSCTL_DECL(_vfs_generic);
+SYSCTL_NODE(_vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system");
+SYSCTL_NODE(_vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug");
+SYSCTL_INT(_vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations");
+enum {
+       /*
+        * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
+        * BlockDeallocate, including the internal BlockAllocateXxx
+        * routines so we can see how an allocation was satisfied.
+        *
+        * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
+        * free extent cache.
+        *
+        * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
+        *
+        * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
+        * or clearing bits, scanning the bitmap).
+        */
+       HFSDBG_ALLOC_ENABLED            = 1,
+       HFSDBG_EXT_CACHE_ENABLED        = 2,
+       HFSDBG_UNMAP_ENABLED            = 4,
+       HFSDBG_BITMAP_ENABLED           = 8
+};
 
 enum {
        kBytesPerWord                   =       4,
@@ -101,1648 +237,6014 @@ enum {
 #define kHighBitInWordMask     0x80000000ul
 #define kAllBitsSetInWord      0xFFFFFFFFul
 
+#define HFS_MIN_SUMMARY_BLOCKSIZE 4096
+
+#define ALLOC_DEBUG 0
 
 static OSErr ReadBitmapBlock(
-       ExtendedVCB             *vcb,
-       UInt32                  bit,
-       UInt32                  **buffer,
-       UInt32                  *blockRef);
+               ExtendedVCB             *vcb,
+               u_int32_t               bit,
+               u_int32_t               **buffer,
+               uintptr_t               *blockRef,
+               hfs_block_alloc_flags_t flags);
 
 static OSErr ReleaseBitmapBlock(
-       ExtendedVCB             *vcb,
-       UInt32                  blockRef,
-       Boolean                 dirty);
-
-static OSErr BlockAllocateAny(
-       ExtendedVCB             *vcb,
-       UInt32                  startingBlock,
-       UInt32                  endingBlock,
-       UInt32                  maxBlocks,
-       Boolean                 useMetaZone,
-       UInt32                  *actualStartBlock,
-       UInt32                  *actualNumBlocks);
-
-static OSErr BlockAllocateContig(
-       ExtendedVCB             *vcb,
-       UInt32                  startingBlock,
-       UInt32                  minBlocks,
-       UInt32                  maxBlocks,
-       Boolean                 useMetaZone,
-       UInt32                  *actualStartBlock,
-       UInt32                  *actualNumBlocks);
+               ExtendedVCB             *vcb,
+               uintptr_t               blockRef,
+               Boolean                 dirty);
+
+static OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
+                                                                HFSPlusExtentDescriptor *extent,
+                                                                hfs_block_alloc_flags_t flags,
+                                                                hfs_alloc_extra_args_t *ap);
+
+static OSErr BlockFindAny(
+               ExtendedVCB             *vcb,
+               u_int32_t               startingBlock,
+               u_int32_t               endingBlock,
+               u_int32_t               maxBlocks,
+               hfs_block_alloc_flags_t flags,
+               Boolean                 trustSummary,
+               u_int32_t               *actualStartBlock,
+               u_int32_t               *actualNumBlocks);
+
+static OSErr BlockFindAnyBitmap(
+               ExtendedVCB             *vcb,
+               u_int32_t               startingBlock,
+               u_int32_t               endingBlock,
+               u_int32_t               maxBlocks,
+               hfs_block_alloc_flags_t flags,
+               u_int32_t               *actualStartBlock,
+               u_int32_t               *actualNumBlocks);
+
+static OSErr BlockFindContig(
+               ExtendedVCB             *vcb,
+               u_int32_t               startingBlock,
+               u_int32_t               minBlocks,
+               u_int32_t               maxBlocks,
+               hfs_block_alloc_flags_t flags,
+               u_int32_t               *actualStartBlock,
+               u_int32_t               *actualNumBlocks);
 
 static OSErr BlockFindContiguous(
-       ExtendedVCB             *vcb,
-       UInt32                  startingBlock,
-       UInt32                  endingBlock,
-       UInt32                  minBlocks,
-       UInt32                  maxBlocks,
-       Boolean                 useMetaZone,
-       UInt32                  *actualStartBlock,
-       UInt32                  *actualNumBlocks);
-
-static OSErr BlockAllocateKnown(
-       ExtendedVCB             *vcb,
-       UInt32                  maxBlocks,
-       UInt32                  *actualStartBlock,
-       UInt32                  *actualNumBlocks);
+               ExtendedVCB             *vcb,
+               u_int32_t               startingBlock,
+               u_int32_t               endingBlock,
+               u_int32_t               minBlocks,
+               u_int32_t               maxBlocks,
+               Boolean                 useMetaZone,
+               Boolean                 trustSummary,
+               u_int32_t               *actualStartBlock,
+               u_int32_t               *actualNumBlocks,
+               hfs_block_alloc_flags_t flags);
+
+static OSErr BlockFindKnown(
+               ExtendedVCB             *vcb,
+               u_int32_t               maxBlocks,
+               u_int32_t               *actualStartBlock,
+               u_int32_t               *actualNumBlocks);
+
+static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
+                                                               HFSPlusExtentDescriptor *extent,
+                                                               uint32_t max_blocks,
+                                                               hfs_block_alloc_flags_t flags);
+
+static OSErr BlockMarkAllocatedInternal (
+               ExtendedVCB             *vcb,
+               u_int32_t               startingBlock,
+               u_int32_t               numBlocks,
+               hfs_block_alloc_flags_t flags);
+
+static OSErr BlockMarkFreeInternal(
+               ExtendedVCB     *vcb,
+               u_int32_t       startingBlock,
+               u_int32_t       numBlocks, 
+               Boolean         do_validate);
+
+
+static OSErr ReadBitmapRange (struct hfsmount *hfsmp, uint32_t offset, uint32_t iosize,
+               uint32_t **buffer, struct buf **blockRef);
+
+static OSErr ReleaseScanBitmapRange( struct buf *bp );
+
+static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset, 
+               u_int32_t numBlocks, struct jnl_trim_list *list);
+
+static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list);
+
+static int hfs_alloc_scan_range(struct hfsmount *hfsmp, 
+               u_int32_t startbit, 
+               u_int32_t *bitToScan,
+               struct jnl_trim_list *list);
+
+static int hfs_scan_range_size (struct hfsmount* hfsmp, uint32_t start, uint32_t *iosize);
+static uint32_t CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recent, uint32_t *next);
+
+/* Bitmap Re-use Detection */
+static inline int extents_overlap (uint32_t start1, uint32_t len1,
+               uint32_t start2, uint32_t len2) {
+       return !( ((start1 + len1) <= start2) || ((start2 + len2) <= start1) );
+}
+
+
+int hfs_isallocated_scan (struct hfsmount *hfsmp,
+               u_int32_t startingBlock,
+               u_int32_t *bp_buf);
+
+/* Summary Table Functions */
+static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse);
+static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t *index);
+static int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
+static int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t *alloc);
+static int hfs_release_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t length);
+static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t *freeblocks);
+static int hfs_rebuild_summary (struct hfsmount *hfsmp);
+
+#if 0
+static int hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
+#endif
+
+/* Used in external mount code to initialize the summary table */
+int hfs_init_summary (struct hfsmount *hfsmp);
+
+#if ALLOC_DEBUG 
+void hfs_validate_summary (struct hfsmount *hfsmp);
+#endif
+
+
+/* Functions for manipulating free extent cache */
+static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
+static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
+static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
+
+static void hfs_release_reserved(hfsmount_t *hfsmp, struct rl_entry *range, int list);
+
+/* Functions for getting free exents */
+
+typedef struct bitmap_context {
+       void                    *bitmap;                                // current bitmap chunk
+       uint32_t                run_offset;                             // offset (in bits) from start of bitmap to start of current run
+       uint32_t                chunk_current;                  // next bit to scan in the chunk
+       uint32_t                chunk_end;                              // number of valid bits in this chunk
+       struct hfsmount *hfsmp;
+       struct buf              *bp;
+       uint32_t                last_free_summary_bit;  // last marked free summary bit
+       int                             lockflags;
+       uint64_t                lock_start;
+} bitmap_context_t;
+
 
+static errno_t get_more_bits(bitmap_context_t *bitmap_ctx);
+static int bit_count_set(void *bitmap, int start, int end);
+static int bit_count_clr(void *bitmap, int start, int end);
+static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count);
+static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count);
+static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count);
+static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set);
+static int clzll(uint64_t x);
 
+#if ALLOC_DEBUG
 /*
-;________________________________________________________________________________
-;
-; Routine:        BlkAlloc
-;
-; Function:    Allocate space on a volume.     If contiguous allocation is requested,
-;                         at least the requested number of bytes will be allocated or an
-;                         error will be returned.      If contiguous allocation is not forced,
-;                         the space will be allocated at the first free fragment following
-;                         the requested starting allocation block.  If there is not enough
-;                         room there, a block of less than the requested size will be
-;                         allocated.
-;
-;                         If the requested starting block is 0 (for new file allocations),
-;                         the volume's allocation block pointer will be used as a starting
-;                         point.
-;
-; Input Arguments:
-;       vcb                     - Pointer to ExtendedVCB for the volume to allocate space on
-;       fcb                     - Pointer to FCB for the file for which storage is being allocated
-;       startingBlock   - Preferred starting allocation block, 0 = no preference
-;       forceContiguous - Force contiguous flag - if bit 0 set (NE), allocation is contiguous
-;                                         or an error is returned
-;       useMetaZone  - 
-;       minBlocks       - Number of blocks requested.  If the allocation is non-contiguous,
-;                                         less than this may actually be allocated
-;       maxBlocks       - The maximum number of blocks to allocate.  If there is additional free
-;                                         space after bytesRequested, then up to maxBlocks bytes should really
-;                                         be allocated.  (Used by ExtendFileC to round up allocations to a multiple
-;                                         of the file's clump size.)
-;
-; Output:
-;       (result)                - Error code, zero for successful allocation
-;       *startBlock     - Actual starting allocation block
-;       *actualBlocks   - Actual number of allocation blocks allocated
-;
-; Side effects:
-;       The volume bitmap is read and updated; the volume bitmap cache may be changed.
-;________________________________________________________________________________
-*/
+ * Validation Routine to verify that the TRIM list maintained by the journal
+ * is in good shape relative to what we think the bitmap should have.  We should
+ * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
+ * we panic.  
+ */
+int trim_validate_bitmap (struct hfsmount *hfsmp);
+int trim_validate_bitmap (struct hfsmount *hfsmp) {
+       u_int64_t blockno_offset;
+       u_int64_t numblocks;
+       int i;
+       int count;
+       u_int32_t startblk;
+       u_int32_t blks;
+       int err = 0;
+       uint32_t alloccount = 0;
 
-__private_extern__
-OSErr BlockAllocate (
-       ExtendedVCB             *vcb,                           /* which volume to allocate space on */
-       UInt32                  startingBlock,          /* preferred starting block, or 0 for no preference */
-       UInt32                  minBlocks,              /* desired number of blocks to allocate */
-       UInt32                  maxBlocks,              /* maximum number of blocks to allocate */
-       Boolean                 forceContiguous,        /* non-zero to force contiguous allocation and to force */
-                                                       /* minBlocks bytes to actually be allocated */
-                                                       
-       Boolean useMetaZone,
-       UInt32                  *actualStartBlock,      /* actual first block of allocation */
-       UInt32                  *actualNumBlocks)       /* number of blocks actually allocated; if forceContiguous */
-                                                       /* was zero, then this may represent fewer than minBlocks */
+       if (hfsmp->jnl) {
+               struct journal *jnl = (struct journal*)hfsmp->jnl;
+               if (jnl->active_tr) {
+                       struct jnl_trim_list *trim = &(jnl->active_tr->trim);
+                       count = trim->extent_count;
+                       for (i = 0; i < count; i++) {
+                               blockno_offset = trim->extents[i].offset;
+                               blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
+                               blockno_offset = blockno_offset / hfsmp->blockSize;
+                               numblocks = trim->extents[i].length / hfsmp->blockSize;
+
+                               startblk = (u_int32_t)blockno_offset;
+                               blks = (u_int32_t) numblocks;
+                               err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);
+
+                               if (err == 0 && alloccount != 0) {
+                                       panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
+                               }
+                       }
+               }
+       }
+       return 0;
+}
+
+#endif
+
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:            hfs_unmap_free_extent
+ ;
+ ; Function:           Make note of a range of allocation blocks that should be
+ ;                             unmapped (trimmed).  That is, the given range of blocks no
+ ;                             longer have useful content, and the device can unmap the
+ ;                             previous contents.  For example, a solid state disk may reuse
+ ;                             the underlying storage for other blocks.
+ ;
+ ;                             This routine is only supported for journaled volumes.  The extent
+ ;                             being freed is passed to the journal code, and the extent will
+ ;                             be unmapped after the current transaction is written to disk.
+ ;
+ ; Input Arguments:
+ ;     hfsmp                   - The volume containing the allocation blocks.
+ ;     startingBlock   - The first allocation block of the extent being freed.
+ ;     numBlocks               - The number of allocation blocks of the extent being freed.
+ ;________________________________________________________________________________
+ */
+static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
 {
-       UInt32  freeBlocks;
-       OSErr                   err;
-       Boolean                 updateAllocPtr = false;         //      true if nextAllocation needs to be updated
+       u_int64_t offset;
+       u_int64_t length;
+       u_int64_t device_sz;
+       int err = 0;
 
-       //
-       //      Initialize outputs in case we get an error
-       //
-       *actualStartBlock = 0;
-       *actualNumBlocks = 0;
-       freeBlocks = hfs_freeblks(VCBTOHFS(vcb), 0);
-       
-       //
-       //      If the disk is already full, don't bother.
-       //
-       if (freeBlocks == 0) {
-               err = dskFulErr;
-               goto Exit;
-       }
-       if (forceContiguous && freeBlocks < minBlocks) {
-               err = dskFulErr;
-               goto Exit;
-       }
-       /*
-        * Clip if necessary so we don't over-subscribe the free blocks.
-        */
-       if (minBlocks > freeBlocks) {
-               minBlocks = freeBlocks;
+       if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
+
+       if (ALLOC_DEBUG) {
+               if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
+                       panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks);
+               }
        }
-       if (maxBlocks > freeBlocks) {
-               maxBlocks = freeBlocks;
+
+       if (hfsmp->jnl != NULL) {
+               device_sz = hfsmp->hfs_logical_bytes;
+               offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
+               length = (u_int64_t) numBlocks * hfsmp->blockSize;
+
+               /* Validate that the trim is in a valid range of bytes */
+               if ((offset >= device_sz) || ((offset + length) > device_sz)) {
+                       printf("hfs_unmap_free_ext: ignoring trim vol=%s @ off %lld len %lld \n", hfsmp->vcbVN, offset, length);
+                       err = EINVAL;
+               }
+
+               if (err == 0) {
+                       err = journal_trim_add_extent(hfsmp->jnl, offset, length);
+                       if (err) {
+                               printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent for vol=%s", err, hfsmp->vcbVN);
+                       }
+               }
        }
 
-       //
-       //      If caller didn't specify a starting block number, then use the volume's
-       //      next block to allocate from.
-       //
-       if (startingBlock == 0) {
-               HFS_MOUNT_LOCK(vcb, TRUE);
-               startingBlock = vcb->nextAllocation;
-               HFS_MOUNT_UNLOCK(vcb, TRUE);
-               updateAllocPtr = true;
+       if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0);
+}
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:            hfs_track_unmap_blocks
+ ;
+ ; Function:   Make note of a range of allocation blocks that should be
+ ;                             unmapped (trimmed).  That is, the given range of blocks no
+ ;                             longer have useful content, and the device can unmap the
+ ;                             previous contents.  For example, a solid state disk may reuse
+ ;                             the underlying storage for other blocks.
+ ;
+ ;                             This routine is only supported for journaled volumes.  
+ ; 
+ ;              *****NOTE*****: 
+ ;              This function should *NOT* be used when the volume is fully 
+ ;              mounted.  This function is intended to support a bitmap iteration
+ ;              at mount time to fully inform the SSD driver of the state of all blocks
+ ;              at mount time, and assumes that there is no allocation/deallocation
+ ;              interference during its iteration.,
+ ;
+ ; Input Arguments:
+ ;     hfsmp                   - The volume containing the allocation blocks.
+ ;     offset          - The first allocation block of the extent being freed.
+ ;     numBlocks               - The number of allocation blocks of the extent being freed.
+ ;  list            - The list of currently tracked trim ranges.
+ ;________________________________________________________________________________
+ */
+static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start, 
+               u_int32_t numBlocks, struct jnl_trim_list *list) {
+
+       u_int64_t offset;
+       u_int64_t length;
+       int error = 0;
+
+       if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL) && list->allocated_count && list->extents != NULL) {
+               int extent_no = list->extent_count;
+               offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
+               length = (u_int64_t) numBlocks * hfsmp->blockSize;
+
+
+               list->extents[extent_no].offset = offset;
+               list->extents[extent_no].length = length;
+               list->extent_count++;
+               if (list->extent_count == list->allocated_count) {
+                       error = hfs_issue_unmap (hfsmp, list);
+               }
        }
-       if (startingBlock >= vcb->totalBlocks) {
-               startingBlock = 0; /* overflow so start at beginning */
+
+       return error;
+}
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:            hfs_issue_unmap
+ ;
+ ; Function:   Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list
+ ;
+ ; Input Arguments:
+ ;     hfsmp                   - The volume containing the allocation blocks.
+ ;  list            - The list of currently tracked trim ranges.
+ ;________________________________________________________________________________
+ */
+
+static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list) 
+{
+       dk_unmap_t unmap;
+       int error = 0;
+
+       if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+               KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
        }
 
-       //
-       //      If the request must be contiguous, then find a sequence of free blocks
-       //      that is long enough.  Otherwise, find the first free block.
-       //
-       if (forceContiguous) {
-               err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks,
-                                         useMetaZone, actualStartBlock, actualNumBlocks);
-               /*
-                * If we allocated from a new position then
-                * also update the roving allocatior.
-                */
-               if ((err == noErr) &&
-                   (*actualStartBlock > startingBlock) &&
-                   ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
-                    (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
-                       HFS_MOUNT_LOCK(vcb, TRUE);
-                       vcb->nextAllocation = *actualStartBlock;
-                       HFS_MOUNT_UNLOCK(vcb, TRUE);
+       if (list->extent_count > 0 && list->extents != NULL) {
+               bzero(&unmap, sizeof(unmap));
+               unmap.extents = list->extents;
+               unmap.extentsCount = list->extent_count;
+
+               if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+                       KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_NONE, hfsmp->hfs_raw_dev, unmap.extentsCount, 0, 0, 0);
                }
-       } else {
-               /*
-                * Scan the bitmap once, gather the N largest free extents, then
-                * allocate from these largest extents.  Repeat as needed until
-                * we get all the space we needed.  We could probably build up
-                * that list when the higher level caller tried (and failed) a
-                * contiguous allocation first.
+       
+#if CONFIG_PROTECT
+               /* 
+                * If we have not yet completed the first scan through the bitmap, then
+                * optionally inform the block driver below us that this is an initialization
+                * TRIM scan, if it can deal with this information.
                 */
-               err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks);
-               if (err == dskFulErr)
-                       err = BlockAllocateAny(vcb, startingBlock, vcb->totalBlocks,
-                                              maxBlocks, useMetaZone, actualStartBlock,
-                                              actualNumBlocks);
-               if (err == dskFulErr)
-                       err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks,
-                                              useMetaZone, actualStartBlock,
-                                              actualNumBlocks);
+               if ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED) == 0) {
+                       unmap.options |= _DK_UNMAP_INITIALIZE;  
+               }
+#endif
+               /* Issue a TRIM and flush them out */
+               error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
+
+               bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t)));
+               bzero (&unmap, sizeof(unmap));
+               list->extent_count = 0;
        }
 
-Exit:
-       // if we actually allocated something then go update the
-       // various bits of state that we maintain regardless of
-       // whether there was an error (i.e. partial allocations
-       // still need to update things like the free block count).
-       //
-       if (*actualNumBlocks != 0) {
-               //
-               //      If we used the volume's roving allocation pointer, then we need to update it.
-               //      Adding in the length of the current allocation might reduce the next allocate
-               //      call by avoiding a re-scan of the already allocated space.  However, the clump
-               //      just allocated can quite conceivably end up being truncated or released when
-               //      the file is closed or its EOF changed.  Leaving the allocation pointer at the
-               //      start of the last allocation will avoid unnecessary fragmentation in this case.
-               //
-               HFS_MOUNT_LOCK(vcb, TRUE);
+       if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+               KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
+       }
 
-               if (updateAllocPtr &&
-                   ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
-                    (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
-                       vcb->nextAllocation = *actualStartBlock;
-               }
-               //
-               //      Update the number of free blocks on the volume
-               //
-               vcb->freeBlocks -= *actualNumBlocks;
-               MarkVCBDirty(vcb);
-               HFS_MOUNT_UNLOCK(vcb, TRUE);
+       return error;
+}
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:            hfs_unmap_alloc_extent
+ ;
+ ; Function:           Make note of a range of allocation blocks, some of
+ ;                             which may have previously been passed to hfs_unmap_free_extent,
+ ;                             is now in use on the volume.  The given blocks will be removed
+ ;                             from any pending DKIOCUNMAP.
+ ;
+ ; Input Arguments:
+ ;     hfsmp                   - The volume containing the allocation blocks.
+ ;     startingBlock   - The first allocation block of the extent being allocated.
+ ;     numBlocks               - The number of allocation blocks being allocated.
+ ;________________________________________________________________________________
+ */
+
+static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
+{
+       u_int64_t offset;
+       u_int64_t length;
+       int err = 0;
+
+       if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
 
-               hfs_generate_volume_notifications(VCBTOHFS(vcb));
+       if (hfsmp->jnl != NULL) {
+               offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
+               length = (u_int64_t) numBlocks * hfsmp->blockSize;
+
+               err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
+               if (err) {
+                       printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent for vol=%s", err, hfsmp->vcbVN);
+               }
        }
-       
-       return err;
+
+       if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0);
 }
 
 
 /*
 ;________________________________________________________________________________
 ;
-; Routine:        BlkDealloc
+; Routine:             hfs_trim_callback
 ;
-; Function:    Update the bitmap to deallocate a run of disk allocation blocks
+; Function:            This function is called when a transaction that freed extents
+;                              (via hfs_unmap_free_extent/journal_trim_add_extent) has been
+;                              written to the on-disk journal.  This routine will add those
+;                              extents to the free extent cache so that they can be reused.
 ;
-; Input Arguments:
-;       vcb            - Pointer to ExtendedVCB for the volume to free space on
-;       firstBlock     - First allocation block to be freed
-;       numBlocks      - Number of allocation blocks to free up (must be > 0!)
+;                              CAUTION: This routine is called while the journal's trim lock
+;                              is held shared, so that no other thread can reuse any portion
+;                              of those extents.  We must be very careful about which locks
+;                              we take from within this callback, to avoid deadlock.  The
+;                              call to add_free_extent_cache will end up taking the cache's
+;                              lock (just long enough to add these extents to the cache).
 ;
-; Output:
-;       (result)       - Result code
+;                              CAUTION: If the journal becomes invalid (eg., due to an I/O
+;                              error when trying to write to the journal), this callback
+;                              will stop getting called, even if extents got freed before
+;                              the journal became invalid!
 ;
-; Side effects:
-;       The volume bitmap is read and updated; the volume bitmap cache may be changed.
+; Input Arguments:
+;      arg                             - The hfsmount of the volume containing the extents.
+;      extent_count    - The number of extents freed in the transaction.
+;      extents                 - An array of extents (byte ranges) that were freed.
 ;________________________________________________________________________________
 */
 
-__private_extern__
-OSErr BlockDeallocate (
-       ExtendedVCB             *vcb,                   //      Which volume to deallocate space on
-       UInt32                  firstBlock,             //      First block in range to deallocate
-       UInt32                  numBlocks)              //      Number of contiguous blocks to deallocate
+__private_extern__ void
+hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
 {
-       OSErr                   err;
-       
-       //
-       //      If no blocks to deallocate, then exit early
-       //
-       if (numBlocks == 0) {
-               err = noErr;
-               goto Exit;
+       uint32_t i;
+       uint32_t startBlock, numBlocks;
+       struct hfsmount *hfsmp = arg;
+
+       if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0);
+
+       for (i=0; i<extent_count; ++i) {
+               /* Convert the byte range in *extents back to a range of allocation blocks. */
+               startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize;
+               numBlocks = extents[i].length / hfsmp->blockSize;
+               (void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
        }
 
-       //
-       //      Call internal routine to free the sequence of blocks
-       //
-       err = BlockMarkFree(vcb, firstBlock, numBlocks);
-       if (err)
-               goto Exit;
+       if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0);
+}
 
-       //
-       //      Update the volume's free block count, and mark the VCB as dirty.
-       //
-       HFS_MOUNT_LOCK(vcb, TRUE);
-       vcb->freeBlocks += numBlocks;
-       if (vcb->nextAllocation == (firstBlock + numBlocks))
-               vcb->nextAllocation -= numBlocks;
-       MarkVCBDirty(vcb);
-       HFS_MOUNT_UNLOCK(vcb, TRUE); 
 
-       hfs_generate_volume_notifications(VCBTOHFS(vcb));
-Exit:
+/*
+   ;________________________________________________________________________________
+   ;
+   ; Routine:          CheckUnmappedBytes
+   ;
+   ; Function: From the specified inputs, determine if the extent in question overlaps 
+   ;                           space that was recently freed, where the recently freed space may still be
+   ;                           lingering in an uncommitted journal transaction.  This may have data safety 
+   ;                           implications.  The intended use is to decide whether or not to force a journal flush
+   ;                           before allowing file data I/O to be issued.  If we did not do this
+   ;                           then it would be possible to issue the file I/O ahead of the
+   ;                           journal, resulting in data being overwritten if the transaction either
+   ;                           is not committed or cannot be replayed.
+   ;
+   ;           NOTE: This function assumes that the journal and catalog/extent locks are held.
+   ;
+   ; Input Arguments:
+   ;   hfsmp                   - The volume containing the allocation blocks.
+   ;   foffset                 - start of the extent in question (in allocation blocks)
+   ;   numbytes                - number of blocks in the extent.
+   ;  recently_freed:  - output pointer containing whether or not the blocks were freed recently
+   ;  overlap_end              - end of the overlap between the argument extent and the trim list (in allocation blocks)
+   ;
+   ; Output:
+   ;
+   ;           Returns 0 if we could determine extent validity for this (or a previous transaction)
+   ;           Returns errno if there was an error
+   ;
+   ;           If returned 0, then recently freed will contain a boolean that indicates
+   ;           that it was recently freed.
+   ;________________________________________________________________________________
+ */
+
+u_int32_t
+CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recently_freed, uint32_t *overlap_end) {
+       uint64_t device_offset;
+       uint64_t numbytes;
+       uint32_t err = 0;
+       uint64_t lba_overlap_end;
 
+       if (hfsmp->jnl != NULL) {
+               /*
+                * Convert the allocation block # and the number of blocks into device-relative
+                * offsets so that they can be compared using the TRIM list.
+                */
+               uint64_t device_sz = hfsmp->hfs_logical_bytes;
+               device_offset = blockno * ((uint64_t)hfsmp->blockSize);
+               device_offset += hfsmp->hfsPlusIOPosOffset;
+               numbytes = (((uint64_t)hfsmp->blockSize) * numblocks);
+
+               /* 
+                * Since we check that the device_offset isn't too large, it's safe to subtract it
+                * from the size in the second check.
+                */
+               if ((device_offset >= device_sz) || (numbytes > (device_sz - device_offset))) {
+                       return EINVAL;
+               }
+
+               /* Ask the journal if this extent overlaps with any pending TRIMs */
+               if (journal_trim_extent_overlap (hfsmp->jnl, device_offset, numbytes, &lba_overlap_end)) {
+                       *recently_freed = 1;
+
+                       /* Convert lba_overlap_end back into allocation blocks */
+                       uint64_t end_offset = lba_overlap_end - hfsmp->hfsPlusIOPosOffset;
+                       end_offset = end_offset / ((uint64_t) hfsmp->blockSize);
+                       *overlap_end = (uint32_t) end_offset;
+               }
+               else {
+                       *recently_freed = 0;
+               }
+               err = 0;
+       }
+       else {
+               /* There may not be a journal.  In that case, always return success.  */
+               *recently_freed = 0;
+       }
        return err;
+
 }
 
 
-UInt8 freebitcount[16] = {
-       4, 3, 3, 2, 3, 2, 2, 1,  /* 0 1 2 3 4 5 6 7 */
-       3, 2, 2, 1, 2, 1, 1, 0,  /* 8 9 A B C D E F */
-};
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:            ScanUnmapBlocks
+ ;
+ ; Function:   Traverse the bitmap, and potentially issue DKIOCUNMAPs to the underlying
+ ;                             device as needed so that the underlying disk device is as
+ ;                             up-to-date as possible with which blocks are unmapped.
+ ;                             Additionally build up the summary table as needed.
+ ;
+ ;                             This function reads the bitmap in large block size 
+ ;                             (up to 1MB) unlink the runtime which reads the bitmap 
+ ;                             in 4K block size.  So if this function is being called 
+ ;                             after the volume is mounted and actively modified, the 
+ ;                             caller needs to invalidate all of the existing buffers 
+ ;                             associated with the bitmap vnode before calling this 
+ ;                             function.  If the buffers are not invalidated, it can 
+ ;                             cause but_t collision and potential data corruption.
+ ;  
+ ; Input Arguments:
+ ;     hfsmp                   - The volume containing the allocation blocks.
+ ;________________________________________________________________________________
+ */
 
 __private_extern__
-UInt32
-MetaZoneFreeBlocks(ExtendedVCB *vcb)
+u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp) 
 {
-       UInt32 freeblocks;
-       UInt32 *currCache;
-       UInt32 blockRef;
-       UInt32 bit;
-       UInt32 lastbit;
-       int bytesleft;
-       int bytesperblock;
-       UInt8 byte;
-       UInt8 *buffer;
+       u_int32_t blocks_scanned = 0;
+       int error = 0;
+       struct jnl_trim_list trimlist;
 
-       blockRef = 0;
-       bytesleft = freeblocks = 0;
-       buffer = NULL;
-       bit = VCBTOHFS(vcb)->hfs_metazone_start;
-       if (bit == 1)
-               bit = 0;
-       
-       lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
-       bytesperblock = vcb->vcbVBMIOSize;
+       if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+               KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
+       }
 
        /*
-        *  Count all the bits from bit to lastbit.
+        *struct jnl_trim_list {
+        uint32_t    allocated_count;
+        uint32_t    extent_count;
+        dk_extent_t *extents;
+        };
         */
-       while (bit < lastbit) {
-               /*
-                *  Get next bitmap block.
-                */
-               if (bytesleft == 0) {
-                       if (blockRef) {
-                               (void) ReleaseBitmapBlock(vcb, blockRef, false);
-                               blockRef = 0;
-                       }
-                       if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef) != 0) {
-                               return (0);
-                       }
-                       buffer = (UInt8 *)currCache;
-                       bytesleft = bytesperblock;
+       bzero (&trimlist, sizeof(trimlist));
+
+       /* 
+        * The scanning itself here is not tied to the presence of CONFIG_HFS_TRIM
+        * which is now enabled for most architectures.  Instead, any trim related 
+        * work should be tied to whether the underlying storage media supports 
+        * UNMAP, as any solid state device would on desktop or embedded.
+        * 
+        * We do this because we may want to scan the full bitmap on desktop
+        * for spinning media for the purposes of building up the 
+        * summary table. 
+        * 
+        * We also avoid sending TRIMs down to the underlying media if the mount is read-only.
+        */
+
+       if ((hfsmp->hfs_flags & HFS_UNMAP) && 
+                       ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
+               /* If the underlying device supports unmap and the mount is read-write, initialize */
+               int alloc_count = PAGE_SIZE / sizeof(dk_extent_t);
+               void *extents = kalloc (alloc_count * sizeof(dk_extent_t));
+               if (extents == NULL) {
+                       return ENOMEM;
                }
-               byte = *buffer++;
-               freeblocks += freebitcount[byte & 0x0F];
-               freeblocks += freebitcount[(byte >> 4) & 0x0F];
-               bit += kBitsPerByte;
-               --bytesleft;
+               trimlist.extents = (dk_extent_t*)extents;
+               trimlist.allocated_count = alloc_count;
+               trimlist.extent_count = 0;
        }
-       if (blockRef)
-               (void) ReleaseBitmapBlock(vcb, blockRef, false);
 
-       return (freeblocks);
-}
+       while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){
 
+               error = hfs_alloc_scan_range (hfsmp, blocks_scanned, &blocks_scanned, &trimlist);
 
-/*
- * Obtain the next allocation block (bit) that's
- * outside the metadata allocation zone.
- */
-static UInt32 NextBitmapBlock(
-       ExtendedVCB             *vcb,
-       UInt32                  bit)
-{
-       struct  hfsmount *hfsmp = VCBTOHFS(vcb);
+               if (error) {
+                       printf("HFS: bitmap scan range error: %d on vol=%s\n", error, hfsmp->vcbVN);
+                       break;
+               }
+       }
 
-       if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
-               return (bit);
-       /*
-        * Skip over metadata allocation zone.
+       if ((hfsmp->hfs_flags & HFS_UNMAP) && 
+                       ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
+               if (error == 0) {
+                       hfs_issue_unmap(hfsmp, &trimlist);
+               }
+               if (trimlist.extents) {
+                       kfree (trimlist.extents, (trimlist.allocated_count * sizeof(dk_extent_t)));
+               }
+       }
+
+       /* 
+        * This is in an #if block because hfs_validate_summary prototype and function body
+        * will only show up if ALLOC_DEBUG is on, to save wired memory ever so slightly.
         */
-       if ((bit >= hfsmp->hfs_metazone_start) &&
-           (bit <= hfsmp->hfs_metazone_end)) {
-               bit = hfsmp->hfs_metazone_end + 1;
+#if ALLOC_DEBUG
+       sanity_check_free_ext(hfsmp, 1);
+       if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+               /* Validate the summary table too! */
+               hfs_validate_summary(hfsmp);
+               printf("HFS: Summary validation complete on %s\n", hfsmp->vcbVN);
        }
-       return (bit);
-}
+#endif
 
+       if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+               KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
+       }
 
-/*
-;_______________________________________________________________________
-;
-; Routine:     ReadBitmapBlock
-;
-; Function:    Read in a bitmap block corresponding to a given allocation
-;                      block (bit).  Return a pointer to the bitmap block.
-;
-; Inputs:
-;      vcb                     --      Pointer to ExtendedVCB
-;      bit                     --      Allocation block whose bitmap block is desired
-;
-; Outputs:
-;      buffer          --      Pointer to bitmap block corresonding to "block"
-;      blockRef
-;_______________________________________________________________________
-*/
-static OSErr ReadBitmapBlock(
-       ExtendedVCB             *vcb,
-       UInt32                  bit,
-       UInt32                  **buffer,
-       UInt32                  *blockRef)
+       return error;
+}
+
+static void add_to_reserved_list(hfsmount_t *hfsmp, uint32_t start, 
+                                                                uint32_t count, int list, 
+                                                                struct rl_entry **reservation)
 {
-       OSErr                   err;
-       struct buf *bp = NULL;
-       struct vnode *vp = NULL;
-       daddr64_t block;
-       UInt32 blockSize;
+       struct rl_entry *range, *next_range;
+
+       if (list == HFS_TENTATIVE_BLOCKS) {
+               int nranges = 0;
+               // Don't allow more than 4 tentative reservations
+               TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
+                                                  rl_link, next_range) {
+                       if (++nranges > 3)
+                               hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
+               }
+       }
 
-       /*
-        * volume bitmap blocks are protected by the allocation file lock
-        */
-       REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);       
+       MALLOC(range, struct rl_entry *, sizeof(*range), M_TEMP, M_WAITOK);
+       range->rl_start = start;
+       range->rl_end = start + count - 1;
+       TAILQ_INSERT_HEAD(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
+       *reservation = range;
+}
 
-       blockSize = (UInt32)vcb->vcbVBMIOSize;
-       block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
+static void hfs_release_reserved(hfsmount_t *hfsmp,
+                                                                struct rl_entry *range,
+                                                                int list)
+{
+       if (range->rl_start == -1)
+               return;
+
+       TAILQ_REMOVE(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
+
+       if (rl_len(range) > 0) {
+               if (list == HFS_TENTATIVE_BLOCKS)
+                       hfsmp->tentativeBlocks -= rl_len(range);
+               else {
+                       /*
+                        * We don't need to unmap tentative blocks because we won't have
+                        * written to them, but we might have written to reserved blocks.
+                        * Nothing can refer to those blocks so this doesn't have to be
+                        * via the journal. If this proves to be too expensive, we could
+                        * consider not sending down the unmap or we could require this
+                        * to always be called within a transaction and then we can use
+                        * the journal.
+                        */
+                       dk_extent_t extent = {
+                               .offset = (hfs_blk_to_bytes(range->rl_start, hfsmp->blockSize)
+                                                  + hfsmp->hfsPlusIOPosOffset),
+                               .length = hfs_blk_to_bytes(rl_len(range), hfsmp->blockSize)
+                       };
+                       dk_unmap_t unmap = {
+                               .extents = &extent,
+                               .extentsCount = 1,
+                       };
+                       VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap,
+                                          0, vfs_context_kernel());
+                       assert(hfsmp->lockedBlocks >= rl_len(range));
+                       hfsmp->lockedBlocks -= rl_len(range);
+               }
+               hfs_release_summary(hfsmp, range->rl_start, rl_len(range));
+               add_free_extent_cache(hfsmp, range->rl_start, rl_len(range));
+       }
 
-       if (vcb->vcbSigWord == kHFSPlusSigWord) {
-               vp = vcb->hfs_allocation_vp;    /* use allocation file vnode */
+       range->rl_start = -1;
+       range->rl_end   = -2;
+}
 
-       } else /* hfs */ {
-               vp = VCBTOHFS(vcb)->hfs_devvp;  /* use device I/O vnode */
-               block += vcb->vcbVBMSt;                 /* map to physical block */
+static void hfs_free_locked_internal(hfsmount_t *hfsmp,
+                                                                          struct rl_entry **reservation,
+                                                                          int list)
+{
+       if (*reservation) {
+               hfs_release_reserved(hfsmp, *reservation, list);
+               FREE(*reservation, M_TEMP);
+               *reservation = NULL;
        }
+}
 
-       err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);
+void hfs_free_tentative(hfsmount_t *hfsmp, struct rl_entry **reservation)
+{
+       hfs_free_locked_internal(hfsmp, reservation, HFS_TENTATIVE_BLOCKS);
+}
 
-       if (bp) {
-               if (err) {
-                       buf_brelse(bp);
-                       *blockRef = NULL;
-                       *buffer = NULL;
-               } else {
-                       *blockRef = (UInt32)bp;
-                       *buffer = (UInt32 *)buf_dataptr(bp);
-               }
-       }
+void hfs_free_locked(hfsmount_t *hfsmp, struct rl_entry **reservation)
+{
+       hfs_free_locked_internal(hfsmp, reservation, HFS_LOCKED_BLOCKS);
+}
+
+OSErr BlockAllocate (
+                hfsmount_t             *hfsmp,                         /* which volume to allocate space on */
+                u_int32_t              startingBlock,          /* preferred starting block, or 0 for no preference */
+                u_int32_t              minBlocks,              /* desired number of blocks to allocate */
+                u_int32_t              maxBlocks,              /* maximum number of blocks to allocate */
+                hfs_block_alloc_flags_t flags,                 /* option flags */
+                u_int32_t              *actualStartBlock,      /* actual first block of allocation */
+                u_int32_t              *actualNumBlocks)
+{
+       hfs_alloc_extra_args_t extra_args = {
+               .max_blocks = maxBlocks
+       };
+
+       HFSPlusExtentDescriptor extent = { startingBlock, minBlocks };
+
+       OSErr err = hfs_block_alloc_int(hfsmp, &extent, flags, &extra_args);
+
+       *actualStartBlock = extent.startBlock;
+       *actualNumBlocks  = extent.blockCount;
 
        return err;
 }
 
+errno_t hfs_block_alloc(hfsmount_t *hfsmp,
+                                               HFSPlusExtentDescriptor *extent,
+                                               hfs_block_alloc_flags_t flags,
+                                               hfs_alloc_extra_args_t *ap)
+{
+       return MacToVFSError(hfs_block_alloc_int(hfsmp, extent, flags, ap));
+}
 
 /*
-;_______________________________________________________________________
-;
-; Routine:     ReleaseBitmapBlock
-;
-; Function:    Relase a bitmap block. 
-;
-; Inputs:
-;      vcb
-;      blockRef
-;      dirty
-;_______________________________________________________________________
-*/
-static OSErr ReleaseBitmapBlock(
-       ExtendedVCB             *vcb,
-       UInt32                  blockRef,
-       Boolean                 dirty)
+ ;________________________________________________________________________________
+ ;
+ ; Routine:       hfs_block_alloc_int
+ ;
+ ; Function:   Allocate space on a volume.     If contiguous allocation is requested,
+ ;                        at least the requested number of bytes will be allocated or an
+ ;                        error will be returned.      If contiguous allocation is not forced,
+ ;                        the space will be allocated with the first largest extent available 
+ ;                        at the requested starting allocation block.  If there is not enough
+ ;                        room there, a block allocation of less than the requested size will be
+ ;                        allocated.
+ ;
+ ;                        If the requested starting block is 0 (for new file allocations),
+ ;                        the volume's allocation block pointer will be used as a starting
+ ;                        point.
+ ;
+ ; Input Arguments:
+ ;   hfsmp           - Pointer to the HFS mount structure.
+ ;   extent          - startBlock indicates the block to start
+ ;                     searching from and blockCount is the number of
+ ;                     blocks required.  Depending on the flags used,
+ ;                     more or less blocks may be returned.  The
+ ;                     allocated extent is returned via this
+ ;                     parameter.
+ ;   flags           - Flags to specify options like contiguous, use
+ ;                     metadata zone, skip free block check, etc.
+ ;   ap              - Additional arguments used depending on flags.
+ ;                     See hfs_alloc_extra_args_t and below.
+ ;
+ ; Output:
+ ;   (result)        - Error code, zero for successful allocation
+ ;   extent          - If successful, the allocated extent.
+ ;
+ ; Side effects:
+ ;      The volume bitmap is read and updated; the volume bitmap cache may be changed.
+ ;
+ ; HFS_ALLOC_TENTATIVE
+ ; Blocks will be reserved but not marked allocated.  They can be
+ ; stolen if free space is limited.  Tentative blocks can be used by
+ ; passing HFS_ALLOC_USE_TENTATIVE and passing in the resevation.
+ ; @ap->reservation_out is used to store the reservation.
+ ;
+ ; HFS_ALLOC_USE_TENTATIVE
+ ; Use blocks previously returned with HFS_ALLOC_TENTATIVE.
+ ; @ap->reservation_in should be set to whatever @ap->reservation_out
+ ; was set to when HFS_ALLOC_TENTATIVE was used.  If the tentative
+ ; reservation was stolen, a normal allocation will take place.
+ ;
+ ; HFS_ALLOC_LOCKED
+ ; Blocks will be reserved but not marked allocated.  Unlike tentative
+ ; reservations they cannot be stolen.  It is safe to write to these
+ ; blocks.  @ap->reservation_out is used to store the reservation.
+ ;
+ ; HFS_ALLOC_COMMIT
+ ; This will take blocks previously returned with HFS_ALLOC_LOCKED and
+ ; mark them allocated on disk.  @ap->reservation_in is used.
+ ;
+ ; HFS_ALLOC_ROLL_BACK
+ ; Take blocks that were just recently deallocated and mark them
+ ; allocated.  This is for roll back situations.  Blocks got
+ ; deallocated and then something went wrong and we need to roll back
+ ; by marking the blocks allocated.
+ ;
+ ; HFS_ALLOC_FORCECONTIG
+ ; It will not return fewer than @min_blocks.
+ ;
+ ; HFS_ALLOC_TRY_HARD
+ ; We will perform an exhaustive search to try and find @max_blocks.
+ ; It will not return fewer than @min_blocks.
+ ;
+ ;________________________________________________________________________________
+ */
+OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
+                                                 HFSPlusExtentDescriptor *extent,
+                                                 hfs_block_alloc_flags_t flags,
+                                                 hfs_alloc_extra_args_t *ap)
 {
-       struct buf *bp = (struct buf *)blockRef;
-       
-       if (blockRef == 0) {
-               if (dirty)
-                       panic("ReleaseBitmapBlock: missing bp");
-               return (0);
-       }
+       u_int32_t  freeBlocks;
+       OSErr                   err = 0;
+       Boolean                 updateAllocPtr = false;         //      true if nextAllocation needs to be updated
+       Boolean useMetaZone;
+       Boolean forceContiguous = false;
+       Boolean forceFlush;
 
-       if (bp) {
-               if (dirty) {
-                       // XXXdbg
-                       struct hfsmount *hfsmp = VCBTOHFS(vcb);
-                       
-                       if (hfsmp->jnl) {
-                               journal_modify_block_end(hfsmp->jnl, bp);
-                       } else {
-                               buf_bdwrite(bp);
-                       }
-               } else {
-                       buf_brelse(bp);
-               }
-       }
+       uint32_t startingBlock = extent->startBlock;
+       uint32_t minBlocks = extent->blockCount;
+       uint32_t maxBlocks = (ap && ap->max_blocks) ? ap->max_blocks : minBlocks;
 
-       return (0);
-}
+       if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
 
+       if (ISSET(flags, HFS_ALLOC_COMMIT)) {
+               extent->startBlock = (*ap->reservation_in)->rl_start;
+               extent->blockCount = rl_len(*ap->reservation_in);
+               goto mark_allocated;
+       }
 
-/*
-_______________________________________________________________________
+       if (ISSET(flags, HFS_ALLOC_ROLL_BACK))
+               goto mark_allocated;
 
-Routine:       BlockAllocateContig
+       freeBlocks = hfs_freeblks(hfsmp, 0);
 
-Function:      Allocate a contiguous group of allocation blocks.  The
-                       allocation is all-or-nothing.  The caller guarantees that
-                       there are enough free blocks (though they may not be
-                       contiguous, in which case this call will fail).
+       if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
+               struct rl_entry *range = *ap->reservation_in;
 
-Inputs:
-       vcb                             Pointer to volume where space is to be allocated
-       startingBlock   Preferred first block for allocation
-       minBlocks               Minimum number of contiguous blocks to allocate
-       maxBlocks               Maximum number of contiguous blocks to allocate
-       useMetaZone
+               if (range && range->rl_start != -1) {
+                       /*
+                        * It's possible that we have a tentative reservation
+                        * but there aren't enough free blocks due to loaned blocks
+                        * or insufficient space in the backing store.
+                        */
+                       uint32_t count = min(min(maxBlocks, rl_len(range)), freeBlocks);
 
-Outputs:
-       actualStartBlock        First block of range allocated, or 0 if error
-       actualNumBlocks         Number of blocks allocated, or 0 if error
-_______________________________________________________________________
-*/
-static OSErr BlockAllocateContig(
-       ExtendedVCB             *vcb,
-       UInt32                  startingBlock,
-       UInt32                  minBlocks,
-       UInt32                  maxBlocks,
-       Boolean                 useMetaZone,
-       UInt32                  *actualStartBlock,
-       UInt32                  *actualNumBlocks)
-{
-       OSErr   err;
+                       if (count >= minBlocks) {
+                               extent->startBlock = range->rl_start;
+                               extent->blockCount = count;
 
-       //
-       //      Find a contiguous group of blocks at least minBlocks long.
-       //      Determine the number of contiguous blocks available (up
-       //      to maxBlocks).
-       //
+                               // Should we go straight to commit?
+                               if (!ISSET(flags, HFS_ALLOC_LOCKED))
+                                       SET(flags, HFS_ALLOC_COMMIT);
+
+                               goto mark_allocated;
+                       }
+               }
 
-       /*
-        * NOTE: If the only contiguous free extent of at least minBlocks
-        * crosses startingBlock (i.e. starts before, ends after), then we
-        * won't find it. Earlier versions *did* find this case by letting
-        * the second search look past startingBlock by minBlocks.  But
-        * with the free extent cache, this can lead to duplicate entries
-        * in the cache, causing the same blocks to be allocated twice.
-        */
-       err = BlockFindContiguous(vcb, startingBlock, vcb->totalBlocks, minBlocks,
-                                 maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
-       if (err == dskFulErr && startingBlock != 0) {
                /*
-                * Constrain the endingBlock so we don't bother looking for ranges
-                * that would overlap those found in the previous call.
+                * We can't use the tentative reservation so free it and allocate
+                * normally.
                 */
-               err = BlockFindContiguous(vcb, 1, startingBlock, minBlocks, maxBlocks,
-                                         useMetaZone, actualStartBlock, actualNumBlocks);
+               hfs_free_tentative(hfsmp, ap->reservation_in);
+               CLR(flags, HFS_ALLOC_USE_TENTATIVE);
        }
-       if (err != noErr) goto Exit;
 
-       // sanity check
-       if ((*actualStartBlock + *actualNumBlocks) > vcb->totalBlocks)
-               panic("BlockAllocateContig: allocation overflow on \"%s\"", vcb->vcbVN);
+       if (ISSET(flags, HFS_ALLOC_FORCECONTIG | HFS_ALLOC_TRY_HARD))
+               forceContiguous = true;
 
-       //
-       //      Now mark those blocks allocated.
-       //
-       err = BlockMarkAllocated(vcb, *actualStartBlock, *actualNumBlocks);
-       
-Exit:
-       if (err != noErr) {
-               *actualStartBlock = 0;
-               *actualNumBlocks = 0;
+       if (flags & HFS_ALLOC_METAZONE) {
+               useMetaZone = true;
+       } else {
+               useMetaZone = false;
        }
-       
-       return err;
-}
-
-/*
-_______________________________________________________________________
 
-Routine:       BlockAllocateAny
+       if (flags & HFS_ALLOC_FLUSHTXN) {
+               forceFlush = true;
+       }
+       else {
+               forceFlush = false;
+       }
 
-Function:      Allocate one or more allocation blocks.  If there are fewer
-                       free blocks than requested, all free blocks will be
-                       allocated.  The caller guarantees that there is at least
-                       one free block.
+       assert(hfsmp->freeBlocks >= hfsmp->tentativeBlocks);
+
+       // See if we have to steal tentative blocks
+       if (freeBlocks < hfsmp->tentativeBlocks + minBlocks)
+               SET(flags, HFS_ALLOC_IGNORE_TENTATIVE);
+
+       /* Skip free block check if blocks are being allocated for relocating 
+        * data during truncating a volume.
+        * 
+        * During hfs_truncatefs(), the volume free block count is updated 
+        * before relocating data to reflect the total number of free blocks 
+        * that will exist on the volume after resize is successful.  This 
+        * means that we have reserved allocation blocks required for relocating 
+        * the data and hence there is no need to check the free blocks.
+        * It will also prevent resize failure when the number of blocks in 
+        * an extent being relocated is more than the free blocks that will 
+        * exist after the volume is resized.
+        */
+       if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
+               //      If the disk is already full, don't bother.
+               if (freeBlocks == 0) {
+                       err = dskFulErr;
+                       goto exit;
+               }
+               if (forceContiguous && freeBlocks < minBlocks) {
+                       err = dskFulErr;
+                       goto exit;
+               }
 
-Inputs:
-       vcb                             Pointer to volume where space is to be allocated
-       startingBlock   Preferred first block for allocation
-       endingBlock             Last block to check + 1
-       maxBlocks               Maximum number of contiguous blocks to allocate
-       useMetaZone
+               /*
+                * Clip if necessary so we don't over-subscribe the free blocks.
+                */
+               if (minBlocks > freeBlocks) {
+                       minBlocks = freeBlocks;
+               }
+               if (maxBlocks > freeBlocks) {
+                       maxBlocks = freeBlocks;
+               }
+       }
 
-Outputs:
-       actualStartBlock        First block of range allocated, or 0 if error
-       actualNumBlocks         Number of blocks allocated, or 0 if error
-_______________________________________________________________________
-*/
-static OSErr BlockAllocateAny(
-       ExtendedVCB             *vcb,
-       UInt32                  startingBlock,
-       register UInt32 endingBlock,
-       UInt32                  maxBlocks,
-       Boolean                 useMetaZone,
-       UInt32                  *actualStartBlock,
-       UInt32                  *actualNumBlocks)
-{
-       OSErr                   err;
-       register UInt32 block;                  //      current block number
-       register UInt32 currentWord;    //      Pointer to current word within bitmap block
-       register UInt32 bitMask;                //      Word with given bits already set (ready to OR in)
-       register UInt32 wordsLeft;              //      Number of words left in this bitmap block
-       UInt32  *buffer = NULL;
-       UInt32  *currCache = NULL;
-       UInt32  blockRef;
-       UInt32  bitsPerBlock;
-       UInt32  wordsPerBlock;
-       Boolean dirty = false;
-       struct hfsmount *hfsmp = VCBTOHFS(vcb);
+       if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
+               err = hfs_alloc_try_hard(hfsmp, extent, maxBlocks, flags);
+               if (err)
+                       goto exit;
 
-       //      Since this routine doesn't wrap around
-       if (maxBlocks > (endingBlock - startingBlock)) {
-               maxBlocks = endingBlock - startingBlock;
+               goto mark_allocated;
        }
 
-       /*
-        * Skip over metadata blocks.
-        */
-       if (!useMetaZone)
-               startingBlock = NextBitmapBlock(vcb, startingBlock);
-
        //
-       //      Pre-read the first bitmap block
+       //      If caller didn't specify a starting block number, then use the volume's
+       //      next block to allocate from.
        //
-       err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef);
-       if (err != noErr) goto Exit;
-       buffer = currCache;
+       if (startingBlock == 0) {
+               hfs_lock_mount (hfsmp);
+
+               /* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
+               if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+                       startingBlock = hfsmp->sparseAllocation;
+               } 
+               else {
+                       startingBlock = hfsmp->nextAllocation;
+               }
+               hfs_unlock_mount(hfsmp);
+               updateAllocPtr = true;
+       }
 
-       //
-       //      Set up the current position within the block
-       //
-       {
-               UInt32 wordIndexInBlock;
-               
-               bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
-               wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
 
-               wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
-               buffer += wordIndexInBlock;
-               wordsLeft = wordsPerBlock - wordIndexInBlock;
-               currentWord = SWAP_BE32 (*buffer);
-               bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
+       if (startingBlock >= hfsmp->allocLimit) {
+               startingBlock = 0; /* overflow so start at beginning */
        }
-       
+
        //
-       //      Find the first unallocated block
+       //      If the request must be contiguous, then find a sequence of free blocks
+       //      that is long enough.  Otherwise, find the first free block.
        //
-       block=startingBlock;
-       while (block < endingBlock) {
-               if ((currentWord & bitMask) == 0)
-                       break;
+       if (forceContiguous) {
+               err = BlockFindContig(hfsmp, startingBlock, minBlocks, maxBlocks,
+                               flags, &extent->startBlock, &extent->blockCount);
+               /*
+                * If we allocated from a new position then also update the roving allocator.  
+                * This will keep the roving allocation pointer up-to-date even 
+                * if we are using the new R/B tree allocator, since
+                * it doesn't matter to us here, how the underlying allocator found 
+                * the block to vend out.
+                */
+               if ((err == noErr) &&
+                               (extent->startBlock > startingBlock) &&
+                               ((extent->startBlock < hfsmp->hfs_metazone_start) ||
+                                (extent->startBlock > hfsmp->hfs_metazone_end))) {
+                       updateAllocPtr = true;
+               }
+       } else {                                        
+               /*
+                * Scan the bitmap once, gather the N largest free extents, then
+                * allocate from these largest extents.  Repeat as needed until
+                * we get all the space we needed.  We could probably build up
+                * that list when the higher level caller tried (and failed) a
+                * contiguous allocation first.
+                *
+                * Note that the free-extent cache will be cease to be updated if
+                * we are using the red-black tree for allocations.  If we jettison 
+                * the tree, then we will reset the free-extent cache and start over.
+                */
 
-               //      Next bit
-               ++block;
-               bitMask >>= 1;
-               if (bitMask == 0) {
-                       //      Next word
-                       bitMask = kHighBitInWordMask;
-                       ++buffer;
+               /* Disable HFS_ALLOC_FLUSHTXN if needed */
+               if (forceFlush) {
+                       flags &= ~HFS_ALLOC_FLUSHTXN;
+               }
 
-                       if (--wordsLeft == 0) {
-                               //      Next block
-                               buffer = currCache = NULL;
-                               err = ReleaseBitmapBlock(vcb, blockRef, false);
-                               if (err != noErr) goto Exit;
+               /* 
+                * BlockFindKnown only examines the free extent cache; anything in there will
+                * have been committed to stable storage already.
+                */
+               err = BlockFindKnown(hfsmp, maxBlocks, &extent->startBlock,
+                                                       &extent->blockCount);
+
+               /* dskFulErr out of BlockFindKnown indicates an empty Free Extent Cache */
+
+               if (err == dskFulErr) {
+                       /* 
+                        * Now we have to do a bigger scan.  Start at startingBlock and go up until the
+                        * allocation limit.  We 'trust' the summary bitmap in this call, if it tells us
+                        * that it could not find any free space.
+                        */
+                       err = BlockFindAny(hfsmp, startingBlock, hfsmp->allocLimit,
+                                       maxBlocks, flags, true, 
+                                       &extent->startBlock, &extent->blockCount);
+               }
+               if (err == dskFulErr) {
+                       /*
+                        * Vary the behavior here if the summary table is on or off.  
+                        * If it is on, then we don't trust it it if we get into this case and
+                        * basically do a full scan for maximum coverage.
+                        * If it is off, then we trust the above and go up until the startingBlock.
+                        */
+                       if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+                               err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
+                                               flags, false, 
+                                               &extent->startBlock, &extent->blockCount);
+                       }
+                       else {
+                               err = BlockFindAny(hfsmp, 1, startingBlock, maxBlocks,
+                                               flags, false, 
+                                               &extent->startBlock, &extent->blockCount);
+                       }       
+
+                       /*
+                    * Last Resort: Find/use blocks that may require a journal flush.
+                        */              
+                       if (err == dskFulErr && forceFlush) {
+                               flags |= HFS_ALLOC_FLUSHTXN;
+                               err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
+                                               flags, false, 
+                                               &extent->startBlock, &extent->blockCount);
+                       }
+               }
+       }
 
-                               /*
-                                * Skip over metadata blocks.
-                                */
-                               if (!useMetaZone) {
-                                       block = NextBitmapBlock(vcb, block);
-                                       if (block >= endingBlock) {
-                                               err = dskFulErr;
-                                               goto Exit;
-                                       }
-                               }
-                               err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
-                               if (err != noErr) goto Exit;
-                               buffer = currCache;
+       if (err)
+               goto exit;
 
-                               wordsLeft = wordsPerBlock;
-                       }
-                       currentWord = SWAP_BE32 (*buffer);
-               }
-       }
+mark_allocated:
 
-       //      Did we get to the end of the bitmap before finding a free block?
-       //      If so, then couldn't allocate anything.
-       if (block >= endingBlock) {
-               err = dskFulErr;
-               goto Exit;
-       }
+       // Handle alignment
+       if (ap && ap->alignment && extent->blockCount < ap->max_blocks) {
+               /*
+                * See the comment in FileMgrInternal.h for alignment
+                * semantics.
+                */
+               uint32_t rounding = ((extent->blockCount + ap->alignment_offset)
+                                                        % ap->alignment);
 
-       //      Return the first block in the allocated range
-       *actualStartBlock = block;
-       dirty = true;
-       
-       //      If we could get the desired number of blocks before hitting endingBlock,
-       //      then adjust endingBlock so we won't keep looking.  Ideally, the comparison
-       //      would be (block + maxBlocks) < endingBlock, but that could overflow.  The
-       //      comparison below yields identical results, but without overflow.
-       if (block < (endingBlock-maxBlocks)) {
-               endingBlock = block + maxBlocks;        //      if we get this far, we've found enough
+               // @minBlocks is still the minimum
+               if (extent->blockCount >= minBlocks + rounding)
+                       extent->blockCount -= rounding;
        }
-       
-       // XXXdbg
-       if (hfsmp->jnl) {
-               journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+
+       err = BlockMarkAllocatedInternal(hfsmp, extent->startBlock,
+                                                                        extent->blockCount, flags);
+
+       if (err)
+               goto exit;
+
+       if (ISSET(hfsmp->hfs_flags, HFS_CS) && extent->blockCount != 0
+               && !ISSET(flags, HFS_ALLOC_TENTATIVE)) {
+               if (ISSET(flags, HFS_ALLOC_FAST_DEV)) {
+#if !HFS_ALLOC_TEST        /* need this guard because this file is compiled outside of the kernel */
+                       hfs_pin_block_range(hfsmp, HFS_PIN_IT,
+                                                               extent->startBlock, extent->blockCount,
+                                                               vfs_context_kernel());
+#endif
+               } else {
+                       _dk_cs_map_t cm = {
+                               .cm_extent = {
+                                       (hfs_blk_to_bytes(extent->startBlock, hfsmp->blockSize)
+                                        + hfsmp->hfsPlusIOPosOffset),
+                                       hfs_blk_to_bytes(extent->blockCount, hfsmp->blockSize)
+                               }
+                       };
+
+                       errno_t err2 = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP,
+                                                                         (caddr_t)&cm, 0, vfs_context_current());
+
+                       /*
+                        * Ignore errors for now; we are fully provisioned so in
+                        * theory CoreStorage should be able to handle this
+                        * allocation.  Should we want to change this in future, then
+                        * we should think carefully how we handle errors.  Allowing
+                        * CoreStorage to truncate our allocation is problematic
+                        * because we might have minimum and alignment requirements
+                        * and backing out changes we have already made is
+                        * non-trivial.
+                        */
+
+                       if (err2 || cm.cm_bytes_mapped < cm.cm_extent.length) {
+                               printf("hfs: _DKIOCCSMAP error: %d, bytes_mapped: %llu\n",
+                                          err2, cm.cm_bytes_mapped);
+                       }
+               }
        }
 
+       // if we actually allocated something then go update the
+       // various bits of state that we maintain regardless of
+       // whether there was an error (i.e. partial allocations
+       // still need to update things like the free block count).
        //
-       //      Allocate all of the consecutive blocks
-       //
-       while ((currentWord & bitMask) == 0) {
-               //      Allocate this block
-               currentWord |= bitMask;
-               
-               //      Move to the next block.  If no more, then exit.
-               ++block;
-               if (block == endingBlock)
-                       break;
+       if (extent->blockCount != 0) {
+               //
+               //      If we used the volume's roving allocation pointer, then we need to update it.
+               //      Adding in the length of the current allocation might reduce the next allocate
+               //      call by avoiding a re-scan of the already allocated space.  However, the clump
+               //      just allocated can quite conceivably end up being truncated or released when
+               //      the file is closed or its EOF changed.  Leaving the allocation pointer at the
+               //      start of the last allocation will avoid unnecessary fragmentation in this case.
+               //
+               hfs_lock_mount (hfsmp);
 
-               //      Next bit
-               bitMask >>= 1;
-               if (bitMask == 0) {
-                       *buffer = SWAP_BE32 (currentWord);                                      //      update value in bitmap
-                       
-                       //      Next word
-                       bitMask = kHighBitInWordMask;
-                       ++buffer;
-                       
-                       if (--wordsLeft == 0) {
-                               //      Next block
-                               buffer = currCache = NULL;
-                               err = ReleaseBitmapBlock(vcb, blockRef, true);
-                               if (err != noErr) goto Exit;
+               if (!ISSET(flags, HFS_ALLOC_USE_TENTATIVE | HFS_ALLOC_COMMIT)) {
+                       lck_spin_lock(&hfsmp->vcbFreeExtLock);
+                       if (hfsmp->vcbFreeExtCnt == 0 && hfsmp->hfs_freed_block_count == 0) {
+                               hfsmp->sparseAllocation = extent->startBlock;
+                       }
+                       lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+                       if (extent->blockCount < hfsmp->hfs_freed_block_count) {
+                               hfsmp->hfs_freed_block_count -= extent->blockCount;
+                       } else {
+                               hfsmp->hfs_freed_block_count = 0;
+                       }
 
-                               /*
-                                * Skip over metadata blocks.
-                                */
-                               if (!useMetaZone) {
-                                       UInt32 nextBlock;
+                       if (updateAllocPtr &&
+                               ((extent->startBlock < hfsmp->hfs_metazone_start) ||
+                                (extent->startBlock > hfsmp->hfs_metazone_end))) {
+                               HFS_UPDATE_NEXT_ALLOCATION(hfsmp, extent->startBlock);
+                       }
 
-                                       nextBlock = NextBitmapBlock(vcb, block);
-                                       if (nextBlock != block) {
-                                               goto Exit;  /* allocation gap, so stop */
-                                       }
-                               }
+                       (void) remove_free_extent_cache(hfsmp, extent->startBlock, extent->blockCount);
+               }
 
-                               err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
-                               if (err != noErr) goto Exit;
-                               buffer = currCache;
+               if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
+                       (*ap->reservation_in)->rl_start += extent->blockCount;
+                       hfsmp->tentativeBlocks -= extent->blockCount;
+                       if (rl_len(*ap->reservation_in) <= 0)
+                               hfs_free_tentative(hfsmp, ap->reservation_in);
+               } else if (ISSET(flags, HFS_ALLOC_COMMIT)) {
+                       // Handle committing locked extents
+                       assert(hfsmp->lockedBlocks >= extent->blockCount);
+                       (*ap->reservation_in)->rl_start += extent->blockCount;
+                       hfsmp->lockedBlocks -= extent->blockCount;
+                       hfs_free_locked(hfsmp, ap->reservation_in);
+               }
+
+               /* 
+                * Update the number of free blocks on the volume 
+                *
+                * Skip updating the free blocks count if the block are 
+                * being allocated to relocate data as part of hfs_truncatefs()
+                */
+
+               if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
+                       hfsmp->tentativeBlocks += extent->blockCount;
+               } else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
+                       hfsmp->lockedBlocks += extent->blockCount;
+               } else if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
+                       hfsmp->freeBlocks -= extent->blockCount;
+               }
+               MarkVCBDirty(hfsmp);
+               hfs_unlock_mount(hfsmp);
+
+               hfs_generate_volume_notifications(hfsmp);
+
+               if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
+                       add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount, 
+                                                                0, ap->reservation_out);
+               } else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
+                       add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount, 
+                                                                1, ap->reservation_out);
+               }
 
-                               // XXXdbg
-                               if (hfsmp->jnl) {
-                                       journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+               if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
+                       /*
+                        * See if we used tentative blocks.  Note that we cannot
+                        * free the reservations here because we don't have access
+                        * to the external pointers.  All we can do is update the
+                        * reservations and they'll be cleaned up when whatever is
+                        * holding the pointers calls us back.
+                        *
+                        * We use the rangelist code to detect overlaps and
+                        * constrain the tentative block allocation.  Note that
+                        * @end is inclusive so that our rangelist code will
+                        * resolve the various cases for us.  As a result, we need
+                        * to ensure that we account for it properly when removing
+                        * the blocks from the tentative count in the mount point
+                        * and re-inserting the remainder (either head or tail)
+                        */
+                       struct rl_entry *range, *next_range;
+                       struct rl_head *ranges = &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS];
+                       const uint32_t start = extent->startBlock;
+                       const uint32_t end = start + extent->blockCount - 1;
+                       TAILQ_FOREACH_SAFE(range, ranges, rl_link, next_range) {
+                               switch (rl_overlap(range, start, end)) {
+                                       case RL_OVERLAPCONTAINSRANGE:
+                                               // Keep the bigger part
+                                               if (start - range->rl_start > range->rl_end - end) {
+                                                       // Discard the tail
+                                                       hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
+                                                       hfs_release_summary(hfsmp, end + 1, range->rl_end - end);
+                                                       const uint32_t old_end = range->rl_end;
+                                                       range->rl_end = start - 1;
+                                                       add_free_extent_cache(hfsmp, end + 1, old_end - end);
+                                               } else {
+                                                       // Discard the head
+                                                       hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
+                                                       hfs_release_summary(hfsmp, range->rl_start,
+                                                                                               start - range->rl_start);
+                                                       const uint32_t old_start = range->rl_start;
+                                                       range->rl_start = end + 1;
+                                                       add_free_extent_cache(hfsmp, old_start,
+                                                                                                 start - old_start);
+                                               }
+                                               assert(range->rl_end >= range->rl_start);
+                                               break;
+                                       case RL_MATCHINGOVERLAP:
+                                       case RL_OVERLAPISCONTAINED:
+                                               hfsmp->tentativeBlocks -= rl_len(range);
+                                               range->rl_end = range->rl_start - 1;
+                                               hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
+                                               break;
+                                       case RL_OVERLAPSTARTSBEFORE:
+                                               hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
+                                               range->rl_end = start - 1;
+                                               assert(range->rl_end >= range->rl_start);
+                                               break;
+                                       case RL_OVERLAPENDSAFTER:
+                                               hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
+                                               range->rl_start = end + 1;
+                                               assert(range->rl_end >= range->rl_start);
+                                               break;
+                                       case RL_NOOVERLAP:
+                                               break;
                                }
-                               
-                               wordsLeft = wordsPerBlock;
                        }
-                       
-                       currentWord = SWAP_BE32 (*buffer);
                }
        }
-       *buffer = SWAP_BE32 (currentWord);                                                      //      update the last change
 
-Exit:
-       if (err == noErr) {
-               *actualNumBlocks = block - *actualStartBlock;
+exit:
 
-       // sanity check
-       if ((*actualStartBlock + *actualNumBlocks) > vcb->totalBlocks)
-               panic("BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
+       if (ALLOC_DEBUG) {
+               if (err == noErr) {
+                       if (extent->startBlock >= hfsmp->totalBlocks) {
+                               panic ("BlockAllocate: vending invalid blocks!");
+                       }
+                       if (extent->startBlock >= hfsmp->allocLimit) {
+                               panic ("BlockAllocate: vending block past allocLimit!");
+                       }
+
+                       if ((extent->startBlock + extent->blockCount) >= hfsmp->totalBlocks) {  
+                               panic ("BlockAllocate: vending too many invalid blocks!");
+                       }
+
+                       if ((extent->startBlock + extent->blockCount) >= hfsmp->allocLimit) {   
+                               panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
+                       }
+               }
        }
-       else {
-               *actualStartBlock = 0;
-               *actualNumBlocks = 0;
+
+       if (err) {
+               // Just to be safe...
+               extent->startBlock = 0;
+               extent->blockCount = 0;
        }
-       
-    if (currCache)
-       (void) ReleaseBitmapBlock(vcb, blockRef, dirty);
+
+       if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, extent->startBlock, extent->blockCount, 0, 0);
 
        return err;
 }
 
 
 /*
-_______________________________________________________________________
-
-Routine:       BlockAllocateKnown
-
-Function:      Try to allocate space from known free space in the free
-                       extent cache.
-
-Inputs:
-       vcb                             Pointer to volume where space is to be allocated
-       maxBlocks               Maximum number of contiguous blocks to allocate
-
-Outputs:
-       actualStartBlock        First block of range allocated, or 0 if error
-       actualNumBlocks         Number of blocks allocated, or 0 if error
-
-Returns:
-       dskFulErr               Free extent cache is empty
-_______________________________________________________________________
+;________________________________________________________________________________
+;
+; Routine:        BlockDeallocate
+;
+; Function:    Update the bitmap to deallocate a run of disk allocation blocks
+;
+; Input Arguments:
+;       vcb            - Pointer to ExtendedVCB for the volume to free space on
+;       firstBlock     - First allocation block to be freed
+;       numBlocks      - Number of allocation blocks to free up (must be > 0!)
+;
+; Output:
+;       (result)       - Result code
+;
+; Side effects:
+;       The volume bitmap is read and updated; the volume bitmap cache may be changed.
+;       The Allocator's red-black trees may also be modified as a result.
+;
+;________________________________________________________________________________
 */
-static OSErr BlockAllocateKnown(
-       ExtendedVCB             *vcb,
-       UInt32                  maxBlocks,
-       UInt32                  *actualStartBlock,
-       UInt32                  *actualNumBlocks)
+
+OSErr BlockDeallocate (
+               ExtendedVCB             *vcb,                   //      Which volume to deallocate space on
+               u_int32_t               firstBlock,             //      First block in range to deallocate
+               u_int32_t               numBlocks,              //      Number of contiguous blocks to deallocate
+               hfs_block_alloc_flags_t flags)
 {
-       UInt32                  i;
-       UInt32                  foundBlocks;
-       UInt32                  newStartBlock, newBlockCount;
-       
-       if (vcb->vcbFreeExtCnt == 0)
-               return dskFulErr;
+       if (ISSET(flags, HFS_ALLOC_TENTATIVE | HFS_ALLOC_LOCKED))
+               return 0;
 
-       //      Just grab up to maxBlocks of the first (largest) free exent.
-       *actualStartBlock = vcb->vcbFreeExt[0].startBlock;
-       foundBlocks = vcb->vcbFreeExt[0].blockCount;
-       if (foundBlocks > maxBlocks)
-               foundBlocks = maxBlocks;
-       *actualNumBlocks = foundBlocks;
-       
-       //      Adjust the start and length of that extent.
-       newStartBlock = vcb->vcbFreeExt[0].startBlock + foundBlocks;
-       newBlockCount = vcb->vcbFreeExt[0].blockCount - foundBlocks;
-       
-       //      The first extent might not be the largest anymore.  Bubble up any
-       //      (now larger) extents to the top of the list.
-       for (i=1; i<vcb->vcbFreeExtCnt; ++i)
-       {
-               if (vcb->vcbFreeExt[i].blockCount > newBlockCount)
-               {
-                       vcb->vcbFreeExt[i-1].startBlock = vcb->vcbFreeExt[i].startBlock;
-                       vcb->vcbFreeExt[i-1].blockCount = vcb->vcbFreeExt[i].blockCount;
-               }
-               else
-               {
-                       break;
-               }
-       }
-       
-       //      If this is now the smallest known free extent, then it might be smaller than
-       //      other extents we didn't keep track of.  So, just forget about this extent.
-       //      After the previous loop, (i-1) is the index of the extent we just allocated from.
-       if (i == vcb->vcbFreeExtCnt)
-       {
-               //      It's now the smallest extent, so forget about it
-               --vcb->vcbFreeExtCnt;
-       }
-       else
-       {
-               //      It's not the smallest, so store it in its proper place
-               vcb->vcbFreeExt[i-1].startBlock = newStartBlock;
-               vcb->vcbFreeExt[i-1].blockCount = newBlockCount;
-       }
-       
-       // sanity check
-       if ((*actualStartBlock + *actualNumBlocks) > vcb->totalBlocks)
-               panic("BlockAllocateKnown: allocation overflow on \"%s\"", vcb->vcbVN);
+       OSErr                   err;
+       struct hfsmount *hfsmp;
+       hfsmp = VCBTOHFS(vcb);
+
+       if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0);
 
        //
-       //      Now mark the found extent in the bitmap
+       //      If no blocks to deallocate, then exit early
        //
-       return BlockMarkAllocated(vcb, *actualStartBlock, *actualNumBlocks);
-}
+       if (numBlocks == 0) {
+               err = noErr;
+               goto Exit;
+       }
 
 
+       if (ALLOC_DEBUG) {
+               if (firstBlock >= hfsmp->totalBlocks) {
+                       panic ("BlockDeallocate: freeing invalid blocks!");
+               }
 
-/*
-_______________________________________________________________________
+               if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {   
+                       panic ("BlockDeallocate: freeing too many invalid blocks!");
+               }                       
+       }
 
-Routine:       BlockMarkAllocated
+       /*
+        * If we're using the summary bitmap, then try to mark the bits
+        * as potentially usable/free before actually deallocating them.
+        * It is better to be slightly speculative here for correctness.
+        */
 
-Function:      Mark a contiguous group of blocks as allocated (set in the
-                       bitmap).  It assumes those bits are currently marked
-                       deallocated (clear in the bitmap).
+       (void) hfs_release_summary (hfsmp, firstBlock, numBlocks);
 
-Inputs:
-       vcb                             Pointer to volume where space is to be allocated
-       startingBlock   First block number to mark as allocated
-       numBlocks               Number of blocks to mark as allocated
-_______________________________________________________________________
-*/
-__private_extern__
-OSErr BlockMarkAllocated(
-       ExtendedVCB             *vcb,
-       UInt32                  startingBlock,
-       register UInt32 numBlocks)
-{
-       OSErr                   err;
-       register UInt32 *currentWord;   //      Pointer to current word within bitmap block
-       register UInt32 wordsLeft;              //      Number of words left in this bitmap block
-       register UInt32 bitMask;                //      Word with given bits already set (ready to OR in)
-       UInt32                  firstBit;               //      Bit index within word of first bit to allocate
-       UInt32                  numBits;                //      Number of bits in word to allocate
-       UInt32                  *buffer = NULL;
-       UInt32  blockRef;
-       UInt32  bitsPerBlock;
-       UInt32  wordsPerBlock;
-       // XXXdbg
-       struct hfsmount *hfsmp = VCBTOHFS(vcb);
+       err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
 
+       if (err) {
+               goto Exit;
+       }
 
        //
-       //      Pre-read the bitmap block containing the first word of allocation
+       //      Update the volume's free block count, and mark the VCB as dirty.
        //
+       hfs_lock_mount(hfsmp);
+       /* 
+        * Do not update the free block count.  This flags is specified 
+        * when a volume is being truncated.  
+        */
+       if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
+               vcb->freeBlocks += numBlocks;
+       }
 
-       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
-       if (err != noErr) goto Exit;
-       //
-       //      Initialize currentWord, and wordsLeft.
-       //
-       {
-               UInt32 wordIndexInBlock;
-               
-               bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
-               wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+       vcb->hfs_freed_block_count += numBlocks;
 
-               wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
-               currentWord = buffer + wordIndexInBlock;
-               wordsLeft = wordsPerBlock - wordIndexInBlock;
-       }
-       
-       // XXXdbg
-       if (hfsmp->jnl) {
-               journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+       if (vcb->nextAllocation == (firstBlock + numBlocks)) {
+               HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
        }
 
-       //
-       //      If the first block to allocate doesn't start on a word
-       //      boundary in the bitmap, then treat that first word
-       //      specially.
-       //
+       if (hfsmp->jnl == NULL) {
+               /*
+                * In the journal case, we'll add the free extent once the journal
+                * calls us back to tell us it wrote the transaction to disk.
+                */
+               (void) add_free_extent_cache(vcb, firstBlock, numBlocks);
 
-       firstBit = startingBlock % kBitsPerWord;
-       if (firstBit != 0) {
-               bitMask = kAllBitsSetInWord >> firstBit;        //      turn off all bits before firstBit
-               numBits = kBitsPerWord - firstBit;                      //      number of remaining bits in this word
-               if (numBits > numBlocks) {
-                       numBits = numBlocks;                                    //      entire allocation is inside this one word
-                       bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));        //      turn off bits after last
-               }
-#if DEBUG_BUILD
-               if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
-                       panic("BlockMarkAllocated: blocks already allocated!");
+               /*
+                * If the journal case, we'll only update sparseAllocation once the
+                * free extent cache becomes empty (when we remove the last entry
+                * from the cache).  Skipping it here means we're less likely to
+                * find a recently freed extent via the bitmap before it gets added
+                * to the free extent cache.
+                */
+               if (firstBlock < vcb->sparseAllocation) {
+                       vcb->sparseAllocation = firstBlock;
                }
-#endif
-               *currentWord |= SWAP_BE32 (bitMask);            //      set the bits in the bitmap
-               numBlocks -= numBits;                                           //      adjust number of blocks left to allocate
-
-               ++currentWord;                                                          //      move to next word
-               --wordsLeft;                                                            //      one less word left in this block
        }
 
-       //
-       //      Allocate whole words (32 blocks) at a time.
-       //
+       MarkVCBDirty(vcb);
+       hfs_unlock_mount(hfsmp);
 
-       bitMask = kAllBitsSetInWord;                                    //      put this in a register for 68K
-       while (numBlocks >= kBitsPerWord) {
-               if (wordsLeft == 0) {
+       hfs_generate_volume_notifications(VCBTOHFS(vcb));
+Exit:
+
+       if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0);
+
+       return err;
+}
+
+
+u_int8_t freebitcount[16] = {
+       4, 3, 3, 2, 3, 2, 2, 1,  /* 0 1 2 3 4 5 6 7 */
+       3, 2, 2, 1, 2, 1, 1, 0,  /* 8 9 A B C D E F */
+};
+
+u_int32_t
+MetaZoneFreeBlocks(ExtendedVCB *vcb)
+{
+       u_int32_t freeblocks;
+       u_int32_t *currCache;
+       uintptr_t blockRef;
+       u_int32_t bit;
+       u_int32_t lastbit;
+       int bytesleft;
+       int bytesperblock;
+       u_int8_t byte;
+       u_int8_t *buffer;
+
+       blockRef = 0;
+       bytesleft = freeblocks = 0;
+       buffer = NULL;
+       bit = VCBTOHFS(vcb)->hfs_metazone_start;
+       if (bit == 1)
+               bit = 0;
+
+       lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
+       bytesperblock = vcb->vcbVBMIOSize;
+
+       /*
+        *  Count all the bits from bit to lastbit.
+        */
+       while (bit < lastbit) {
+               /*
+                *  Get next bitmap block.
+                */
+               if (bytesleft == 0) {
+                       if (blockRef) {
+                               (void) ReleaseBitmapBlock(vcb, blockRef, false);
+                               blockRef = 0;
+                       }
+                       if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef, 
+                                                               HFS_ALLOC_IGNORE_TENTATIVE) != 0) {
+                               return (0);
+                       }
+                       buffer = (u_int8_t *)currCache;
+                       bytesleft = bytesperblock;
+               }
+               byte = *buffer++;
+               freeblocks += freebitcount[byte & 0x0F];
+               freeblocks += freebitcount[(byte >> 4) & 0x0F];
+               bit += kBitsPerByte;
+               --bytesleft;
+       }
+       if (blockRef)
+               (void) ReleaseBitmapBlock(vcb, blockRef, false);
+
+       return (freeblocks);
+}
+
+
+/*
+ * Obtain the next allocation block (bit) that's
+ * outside the metadata allocation zone.
+ */
+static u_int32_t NextBitmapBlock(
+               ExtendedVCB             *vcb,
+               u_int32_t               bit)
+{
+       struct  hfsmount *hfsmp = VCBTOHFS(vcb);
+
+       if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
+               return (bit);
+       /*
+        * Skip over metadata allocation zone.
+        */
+       if ((bit >= hfsmp->hfs_metazone_start) &&
+                       (bit <= hfsmp->hfs_metazone_end)) {
+               bit = hfsmp->hfs_metazone_end + 1;
+       }
+       return (bit);
+}
+
+
+// Assumes @bitmap is aligned to 8 bytes and multiple of 8 bytes.
+static void bits_set(void *bitmap, int start, int end)
+{
+       const int start_bit = start & 63;
+       const int end_bit   = end   & 63;
+
+#define LEFT_MASK(bit) OSSwapHostToBigInt64(0xffffffffffffffffull << (64 - bit))
+#define RIGHT_MASK(bit)        OSSwapHostToBigInt64(0xffffffffffffffffull >> bit)
+
+       uint64_t *p = (uint64_t *)bitmap + start / 64;
+
+       if ((start & ~63) == (end & ~63)) {
+               // Start and end in same 64 bits
+               *p |= RIGHT_MASK(start_bit) & LEFT_MASK(end_bit);
+       } else {
+               *p++ |= RIGHT_MASK(start_bit);
+
+               int nquads = (end - end_bit - start - 1) / 64;
+
+               while (nquads--)
+                       *p++ = 0xffffffffffffffffull;
+
+               if (end_bit)
+                       *p |= LEFT_MASK(end_bit);
+       }
+}
+
+// Modifies the buffer and applies any reservations that we might have
+static buf_t process_reservations(hfsmount_t *hfsmp, buf_t bp, off_t offset,
+                                                                 hfs_block_alloc_flags_t flags,
+                                                                 bool always_copy)
+{
+       bool taken_copy = false;
+       void *buffer = (void *)buf_dataptr(bp);
+       const uint32_t nbytes = buf_count(bp);
+       const off_t end = offset + nbytes * 8 - 1;
+
+       for (int i = (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
+                                 ? HFS_LOCKED_BLOCKS : HFS_TENTATIVE_BLOCKS); i < 2; ++i) {
+               struct rl_entry *entry;
+               TAILQ_FOREACH(entry, &hfsmp->hfs_reserved_ranges[i], rl_link) {
+                       uint32_t a, b;
+
+                       enum rl_overlaptype overlap_type = rl_overlap(entry, offset, end);
+
+                       if (overlap_type == RL_NOOVERLAP)
+                               continue;
+
+                       /*
+                        * If always_copy is false, we only take a copy if B_LOCKED is
+                        * set because ReleaseScanBitmapRange doesn't invalidate the
+                        * buffer in that case.
+                        */
+                       if (!taken_copy && (always_copy || ISSET(buf_flags(bp), B_LOCKED))) {
+                               buf_t new_bp = buf_create_shadow(bp, true, 0, NULL, NULL);
+                               buf_brelse(bp);
+                               bp = new_bp;
+                               buf_setflags(bp, B_NOCACHE);
+                               buffer = (void *)buf_dataptr(bp);
+                               taken_copy = true;
+                       }
+
+                       switch (overlap_type) {
+                       case RL_OVERLAPCONTAINSRANGE:
+                       case RL_MATCHINGOVERLAP:
+                               memset(buffer, 0xff, nbytes);
+                               return bp;
+                       case RL_OVERLAPISCONTAINED:
+                               a = entry->rl_start;
+                               b = entry->rl_end;
+                               break;
+                       case RL_OVERLAPSTARTSBEFORE:
+                               a = offset;
+                               b = entry->rl_end;
+                               break;
+                       case RL_OVERLAPENDSAFTER:
+                               a = entry->rl_start;
+                               b = end;
+                               break;
+                       case RL_NOOVERLAP:
+                               __builtin_unreachable();
+                       }
+
+                       a -= offset;
+                       b -= offset;
+
+                       assert(a < buf_count(bp) * 8);
+                       assert(b < buf_count(bp) * 8);
+                       assert(b >= a);
+
+                       // b is inclusive
+                       bits_set(buffer, a, b + 1);
+               }
+       } // for (;;)
+
+       return bp;
+}
+
+/*
+;_______________________________________________________________________
+;
+; Routine:     ReadBitmapBlock
+;
+; Function:    Read in a bitmap block corresponding to a given allocation
+;                      block (bit).  Return a pointer to the bitmap block.
+;
+; Inputs:
+;      vcb                     --      Pointer to ExtendedVCB
+;      bit                     --      Allocation block whose bitmap block is desired
+;
+; Outputs:
+;      buffer          --      Pointer to bitmap block corresonding to "block"
+;      blockRef
+;_______________________________________________________________________
+*/
+static OSErr ReadBitmapBlock(ExtendedVCB               *vcb,
+                                                        u_int32_t              bit,
+                                                        u_int32_t              **buffer,
+                                                        uintptr_t              *blockRef,
+                                                        hfs_block_alloc_flags_t flags)
+{
+       OSErr                   err;
+       struct buf *bp = NULL;
+       struct vnode *vp = NULL;
+       daddr64_t block;
+       u_int32_t blockSize;
+
+       if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0);
+
+       /*
+        * volume bitmap blocks are protected by the allocation file lock
+        */
+       REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);       
+
+       blockSize = (u_int32_t)vcb->vcbVBMIOSize;
+       block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
+
+       /* HFS+ / HFSX */
+       if (vcb->vcbSigWord != kHFSSigWord) {
+               vp = vcb->hfs_allocation_vp;    /* use allocation file vnode */
+       } 
+#if CONFIG_HFS_STD
+       else {
+               /* HFS Standard */      
+               vp = VCBTOHFS(vcb)->hfs_devvp;  /* use device I/O vnode */
+               block += vcb->vcbVBMSt;                 /* map to physical block */
+       }
+#endif
+
+       err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);
+
+       if (bp) {
+               if (err) {
+                       buf_brelse(bp);
+                       *blockRef = 0;
+                       *buffer = NULL;
+               } else {
+                       if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED)) {
+                               bp = process_reservations(vcb, bp, block * blockSize * 8,
+                                                                                 flags, /* always_copy: */ true);
+                       }
+
+                       buf_setfsprivate(bp, (void *)(uintptr_t)flags);
+
+                       *blockRef = (uintptr_t)bp;
+                       *buffer = (u_int32_t *)buf_dataptr(bp);
+               }
+       }
+
+       if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0);
+
+       return err;
+}
+
+
+/*
+;_______________________________________________________________________
+;
+; Routine:     ReadBitmapRange
+;
+; Function:    Read in a range of the bitmap starting at the given offset. 
+;                      Use the supplied size to determine the amount of I/O to generate
+;                      against the bitmap file. Return a pointer to the bitmap block.
+;
+; Inputs:
+;      hfsmp           --      Pointer to hfs mount
+;      offset          --      byte offset into the bitmap file 
+;      size            --  How much I/O to generate against the bitmap file.
+;
+; Outputs:
+;      buffer          --      Pointer to bitmap block data corresonding to "block"
+;      blockRef        --  struct 'buf' pointer which MUST be released in a subsequent call.
+;_______________________________________________________________________
+*/
+static OSErr ReadBitmapRange(struct hfsmount *hfsmp, uint32_t offset,
+               uint32_t iosize, uint32_t **buffer, struct buf **blockRef)
+{
+
+       OSErr                   err;
+       struct buf *bp = NULL;
+       struct vnode *vp = NULL;
+       daddr64_t block;
+
+       /* This function isn't supported for HFS standard */
+       if (hfsmp->vcbSigWord != kHFSPlusSigWord) {
+               return EINVAL;
+       }
+
+       if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
+               KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_START, offset, iosize, 0, 0, 0);
+       }
+
+       /*
+        * volume bitmap blocks are protected by the allocation file lock
+        */
+       REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);       
+
+       vp = hfsmp->hfs_allocation_vp;  /* use allocation file vnode */
+
+       /*
+        * The byte offset argument must be converted into bitmap-relative logical 
+        * block numbers before using it in buf_meta_bread.
+        * 
+        * buf_meta_bread (and the things it calls) will eventually try to
+        * reconstruct the byte offset into the file by multiplying the logical 
+        * block number passed in below by the vcbVBMIOSize field in the mount
+        * point.  So we prepare for that by converting the byte offset back into
+        * logical blocks in terms of VBMIOSize units.
+        * 
+        * The amount of I/O requested and the byte offset should be computed 
+        * based on the helper function in the frame that called us, so we can
+        * get away with just doing a simple divide here.
+        */
+       block = (daddr64_t)(offset / hfsmp->vcbVBMIOSize);
+
+       err = (int) buf_meta_bread(vp, block, iosize, NOCRED, &bp);
+
+       if (bp) {
+               if (err) {
+                       buf_brelse(bp);
+                       *blockRef = 0;
+                       *buffer = NULL;
+               } else {
+                       bp = process_reservations(hfsmp, bp, (offset * 8), 0,
+                                                                         /* always_copy: */ false);
+
+                       *blockRef = bp;
+                       *buffer = (u_int32_t *)buf_dataptr(bp);
+               }
+       }
+
+       if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
+               KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_END, err, 0, 0, 0, 0);
+       }
+
+       return err;
+}
+
+
+/*
+;_______________________________________________________________________
+;
+; Routine:     ReleaseBitmapBlock
+;
+; Function:    Relase a bitmap block. 
+;
+; Inputs:
+;      vcb
+;      blockRef
+;      dirty
+;_______________________________________________________________________
+*/
+static OSErr ReleaseBitmapBlock(
+               ExtendedVCB             *vcb,
+               uintptr_t               blockRef,
+               Boolean                 dirty)
+{
+       struct buf *bp = (struct buf *)blockRef;
+
+       if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0);
+
+       if (blockRef == 0) {
+               if (dirty)
+                       panic("hfs: ReleaseBitmapBlock: missing bp");
+               return (0);
+       }
+
+       if (bp) {
+               if (dirty) {
+                       hfs_block_alloc_flags_t flags = (uintptr_t)buf_fsprivate(bp);
+
+                       if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED))
+                               panic("Modified read-only bitmap buffer!");
+
+                       struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+                       if (hfsmp->jnl) {
+                               journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
+                       } else {
+                               buf_bdwrite(bp);
+                       }
+               } else {
+                       buf_brelse(bp);
+               }
+       }
+
+       if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0);
+
+       return (0);
+}
+
+/*
+ * ReleaseScanBitmapRange
+ *
+ * This is used to release struct bufs that were created for use by 
+ * bitmap scanning code.  Because they may be of sizes different than the
+ * typical runtime manipulation code, we want to force them to be purged out 
+ * of the buffer cache ASAP, so we'll release them differently than in the 
+ * ReleaseBitmapBlock case.  
+ *
+ * Additionally, because we know that we're only reading the blocks and that they
+ * should have been clean prior to reading them, we will never 
+ * issue a write to them (thus dirtying them).
+ */
+
+static OSErr ReleaseScanBitmapRange(struct buf *bp ) {
+
+       if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
+               KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, 0, 0, 0, 0, 0);
+       }
+
+       if (bp) {
+               /* Mark the buffer invalid if it isn't locked, then release it */
+               if ((buf_flags(bp) & B_LOCKED) == 0) {
+                       buf_markinvalid(bp);
+               }
+               buf_brelse(bp);
+       }
+
+       if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
+               KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_SCAN_BITMAP | DBG_FUNC_END, 0, 0, 0, 0, 0);
+       }
+
+       return (0);
+}
+
+/* 
+ * @extent.startBlock, on input, contains a preferred block for the
+ * allocation.  @extent.blockCount, on input, contains the minimum
+ * number of blocks acceptable.  Upon success, the result is conveyed
+ * in @extent.
+ */
+static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
+                                                               HFSPlusExtentDescriptor *extent,
+                                                               uint32_t max_blocks,
+                                                               hfs_block_alloc_flags_t flags)
+{
+       OSErr err = dskFulErr;
+
+       const uint32_t min_blocks = extent->blockCount;
+
+       // It's > rather than >= because the last block is always reserved
+       if (extent->startBlock > 0 && extent->startBlock < hfsmp->allocLimit
+               && hfsmp->allocLimit - extent->startBlock > max_blocks) {
+               /*
+                * This is just checking to see if there's an extent starting
+                * at extent->startBlock that will suit.  We only check for
+                * @max_blocks here; @min_blocks is ignored.
+                */
+
+               err = BlockFindContiguous(hfsmp, extent->startBlock, extent->startBlock + max_blocks,
+                                                                 max_blocks, max_blocks, true, true,
+                                                                 &extent->startBlock, &extent->blockCount, flags);
+
+               if (err != dskFulErr)
+                       return err;
+       }
+
+       err = BlockFindKnown(hfsmp, max_blocks, &extent->startBlock,
+                                               &extent->blockCount);
+
+       if (!err) {
+               if (extent->blockCount >= max_blocks)
+                       return 0;
+       } else if (err != dskFulErr)
+               return err;
+
+       // Try a more exhaustive search
+       return BlockFindContiguous(hfsmp, 1, hfsmp->allocLimit,
+                                                          min_blocks, max_blocks,
+                                                          /* useMetaZone: */ true,
+                                                          /* trustSummary: */ true,
+                                                          &extent->startBlock, &extent->blockCount, flags);
+}
+
+/*
+_______________________________________________________________________
+
+Routine:       BlockFindContig
+
+Function:   Find a contiguous group of allocation blocks.  If the
+                       minimum cannot be satisfied, nothing is returned.  The
+                       caller guarantees that there are enough free blocks
+                       (though they may not be contiguous, in which case this
+                       call will fail).
+
+Inputs:
+       vcb                             Pointer to volume where space is to be allocated
+       startingBlock   Preferred first block for allocation
+       minBlocks               Minimum number of contiguous blocks to allocate
+       maxBlocks               Maximum number of contiguous blocks to allocate
+       flags
+
+Outputs:
+       actualStartBlock        First block of range allocated, or 0 if error
+       actualNumBlocks         Number of blocks allocated, or 0 if error
+_______________________________________________________________________
+*/
+static OSErr BlockFindContig(
+               ExtendedVCB             *vcb,
+               u_int32_t               startingBlock,
+               u_int32_t               minBlocks,
+               u_int32_t               maxBlocks,
+               hfs_block_alloc_flags_t flags,
+               u_int32_t               *actualStartBlock,
+               u_int32_t               *actualNumBlocks)
+{
+       OSErr retval = noErr;
+       uint32_t currentStart = startingBlock;
+
+       uint32_t foundStart = 0; // values to emit to caller
+       uint32_t foundCount = 0;
+
+       uint32_t collision_start = 0;  // if we have to re-allocate a recently deleted extent, use this
+       uint32_t collision_count = 0;
+
+       int err;
+       int allowReuse = (flags & HFS_ALLOC_FLUSHTXN);
+       Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
+
+       int recently_deleted = 0;
+       struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+       if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
+
+       while ((retval == noErr) && (foundStart == 0) && (foundCount == 0)) {
+
+               /* Try and find something that works. */
+
+               /*
+                * NOTE: If the only contiguous free extent of at least minBlocks
+                * crosses startingBlock (i.e. starts before, ends after), then we
+                * won't find it. Earlier versions *did* find this case by letting
+                * the second search look past startingBlock by minBlocks.  But
+                * with the free extent cache, this can lead to duplicate entries
+                * in the cache, causing the same blocks to be allocated twice.
+                */
+               retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks, 
+                               maxBlocks, useMetaZone, true, &foundStart, &foundCount, flags);
+
+               if (retval == dskFulErr && currentStart != 0) {
+                       /*
+                        * We constrain the endingBlock so we don't bother looking for ranges
+                        * that would overlap those found in the previous call, if the summary bitmap
+                        * is not on for this volume.  If it is, then we assume that it was not trust
+                        * -worthy and do a full scan.
+                        */
+                       if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+                               retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks, 
+                                               maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
+                       }
+                       else {
+                               retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks,
+                                               maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
+                       }
+               }
+
+               if (retval != noErr) {
+                       goto bailout;
+               }
+
+               /* Do we overlap with the recently found collision extent? */
+               if (collision_start) {
+                       if (extents_overlap (foundStart, foundCount, collision_start, collision_count)) {
+                               /* 
+                                * We've looped around, and the only thing we could use was the collision extent.
+                                * Since we are allowed to use it, go ahead and do so now.
+                                */
+                               if(allowReuse) {
+                                       /* 
+                                        * then we couldn't find anything except values which might have been 
+                                        * recently deallocated. just return our cached value if we are allowed to.
+                                        */
+                                       foundStart = collision_start;
+                                       foundCount = collision_count;
+                                       goto bailout;
+                               }
+                               else {
+                                       /* Otherwise, we looped around and couldn't find anything that wouldn't require a journal flush. */
+                                       retval = dskFulErr;
+                                       goto bailout;
+                               }       
+                       }
+               }
+
+               /* OK, we know we must not have collided . See if this one is recently deleted */
+               if (hfsmp->jnl) {
+                       recently_deleted = 0;
+                       uint32_t nextStart;
+                       err = CheckUnmappedBytes (hfsmp, (uint64_t)foundStart,
+                                       (uint64_t) foundCount, &recently_deleted, &nextStart);
+                       if (err == 0) {
+                               if(recently_deleted != 0) {
+                                       /* 
+                                        * these blocks were recently deleted/deallocated.  Cache the extent, but
+                                        * but keep searching to see if we can find one that won't collide here. 
+                                        */
+                                       if (collision_start == 0) {
+                                               collision_start = foundStart;
+                                               collision_count = foundCount;
+                                       }
+                                       recently_deleted = 0;
+
+                                       /* 
+                                        * advance currentStart to the point just past the overlap we just found. Note that 
+                                        * we will automatically loop around to start of the bitmap as needed.
+                                        */
+                                       currentStart = nextStart;
+                                       /* Unset foundStart/Count to allow it to loop around again. */
+                                       foundStart = 0;
+                                       foundCount = 0;
+                               }
+                       }
+               } // end jnl/deleted case
+
+               /* 
+                * If we found something good, we'd break out of the loop at the top; foundCount
+                * and foundStart should be set.
+                */
+
+       } // end while loop. 
+
+bailout:
+
+       if (retval == noErr) {
+               *actualStartBlock = foundStart;
+               *actualNumBlocks = foundCount;
+       }
+
+       if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_END, foundStart, foundCount, retval, 0, 0);
+
+       return retval;
+
+}
+
+
+/*
+_______________________________________________________________________
+
+Routine:       BlockFindAny
+
+Function: Find one or more allocation blocks and may return fewer than
+          requested.  The caller guarantees that there is at least one
+          free block.
+
+Inputs:
+       vcb                             Pointer to volume where space is to be allocated
+       startingBlock   Preferred first block for allocation
+       endingBlock             Last block to check + 1
+       maxBlocks               Maximum number of contiguous blocks to allocate
+       useMetaZone
+
+Outputs:
+       actualStartBlock        First block of range allocated, or 0 if error
+       actualNumBlocks         Number of blocks allocated, or 0 if error
+_______________________________________________________________________
+*/
+
+static OSErr BlockFindAny(
+               ExtendedVCB             *vcb,
+               u_int32_t               startingBlock,
+               register u_int32_t      endingBlock,
+               u_int32_t               maxBlocks,
+               hfs_block_alloc_flags_t flags,
+               Boolean                 trustSummary,
+               u_int32_t               *actualStartBlock,
+               u_int32_t               *actualNumBlocks)
+{
+
+       /*
+        * If it is enabled, scan through the summary table to find the first free block.
+        *
+        * If it reports that there are not any free blocks, we could have a false
+        * positive, so in that case, use the input arguments as a pass through.
+        */
+       uint32_t start_blk  = startingBlock;
+       uint32_t end_blk = endingBlock;
+       struct hfsmount *hfsmp;
+       OSErr err;
+
+       hfsmp = (struct hfsmount*)vcb;
+       if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+               uint32_t suggested_start;
+
+               /* 
+                * If the summary table is enabled, scan through it to find the first free 
+                * block.  If there was an error, or we couldn't find anything free in the
+                * summary table, then just leave the start_blk fields unmodified. We wouldn't
+                * have gotten to this point if the mount point made it look like there was possibly
+                * free space in the FS. 
+                */
+               err = hfs_find_summary_free (hfsmp, startingBlock, &suggested_start);
+               if (err == 0) {
+                       start_blk = suggested_start;
+               }
+               else {
+                       /* Differentiate between ENOSPC and a more esoteric error in the above call. */
+                       if ((err == ENOSPC) && (trustSummary)) {
+                               /* 
+                                * The 'trustSummary' argument is for doing a full scan if we really
+                                * really, need the space and we think it's somewhere but can't find it in the
+                                * summary table. If it's true, then we trust the summary table and return 
+                                * dskFulErr if we couldn't find it above.
+                                */
+                               return dskFulErr;
+                       }
+                       /* 
+                        * If either trustSummary was false or we got a different errno, then we
+                        * want to fall through to the real bitmap single i/o code...
+                        */ 
+               }
+       }
+
+       err =  BlockFindAnyBitmap(vcb, start_blk, end_blk, maxBlocks,
+                       flags, actualStartBlock, actualNumBlocks);
+
+       return err;
+}
+
+
+/*
+ * BlockFindAnyBitmap finds free ranges by scanning the bitmap to
+ * figure out where the free allocation blocks are.  Inputs and
+ * outputs are the same as for BlockFindAny.
+ */
+
+static OSErr BlockFindAnyBitmap(
+               ExtendedVCB             *vcb,
+               u_int32_t               startingBlock,
+               register u_int32_t      endingBlock,
+               u_int32_t               maxBlocks,
+               hfs_block_alloc_flags_t flags,
+               u_int32_t               *actualStartBlock,
+               u_int32_t               *actualNumBlocks)
+{
+       OSErr                   err;
+       register u_int32_t      block = 0;              //      current block number
+       register u_int32_t      currentWord;    //      Pointer to current word within bitmap block
+       register u_int32_t      bitMask;                //      Word with given bits already set (ready to OR in)
+       register u_int32_t      wordsLeft;              //      Number of words left in this bitmap block
+       u_int32_t  *buffer = NULL;
+       u_int32_t  *currCache = NULL;
+       uintptr_t  blockRef = 0;
+       u_int32_t  bitsPerBlock;
+       u_int32_t  wordsPerBlock;
+       Boolean dirty = false;
+       struct hfsmount *hfsmp = VCBTOHFS(vcb);
+       Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
+       Boolean forceFlush = (flags & HFS_ALLOC_FLUSHTXN);
+
+       if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);
+
+restartSearchAny:
+
+       /*
+        * When we're skipping the metadata zone and the start/end
+        * range overlaps with the metadata zone then adjust the 
+        * start to be outside of the metadata zone.  If the range
+        * is entirely inside the metadata zone then we can deny the
+        * request (dskFulErr).
+        */
+       if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
+               if (startingBlock <= vcb->hfs_metazone_end) {
+                       if (endingBlock > (vcb->hfs_metazone_end + 2))
+                               startingBlock = vcb->hfs_metazone_end + 1;
+                       else {
+                               err = dskFulErr;
+                               goto Exit;
+                       }
+               }
+       }
+
+       //      Since this routine doesn't wrap around
+       if (maxBlocks > (endingBlock - startingBlock)) {
+               maxBlocks = endingBlock - startingBlock;
+       }
+
+       //
+       //      Pre-read the first bitmap block
+       //
+       err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef, flags);
+       if (err != noErr) goto Exit;
+       buffer = currCache;
+
+       //
+       //      Set up the current position within the block
+       //
+       {
+               u_int32_t wordIndexInBlock;
+
+               bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
+               wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+
+               wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+               buffer += wordIndexInBlock;
+               wordsLeft = wordsPerBlock - wordIndexInBlock;
+               currentWord = SWAP_BE32 (*buffer);
+               bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
+       }
+
+       /*
+        * While loop 1:
+        *              Find the first unallocated block starting at 'block'
+        */
+       uint32_t summary_block_scan = 0;
+
+       block=startingBlock;
+       while (block < endingBlock) {
+               if ((currentWord & bitMask) == 0)
+                       break;
+
+               //      Next bit
+               ++block;
+               bitMask >>= 1;
+               if (bitMask == 0) {
+                       //      Next word
+                       bitMask = kHighBitInWordMask;
+                       ++buffer;
+
+                       if (--wordsLeft == 0) {
+                               //      Next block
+                               buffer = currCache = NULL;
+                               if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+                                       /*
+                                        * If summary_block_scan is non-zero, then we must have
+                                        * pulled a bitmap file block into core, and scanned through
+                                        * the entire thing.  Because we're in this loop, we are 
+                                        * implicitly trusting that the bitmap didn't have any knowledge
+                                        * about this particular block.  As a result, update the bitmap
+                                        * (lazily, now that we've scanned it) with our findings that 
+                                        * this particular block is completely used up.
+                                        */
+                                       if (summary_block_scan != 0) {
+                                               uint32_t summary_bit;
+                                               (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
+                                               hfs_set_summary (hfsmp, summary_bit, 1);
+                                               summary_block_scan = 0;
+                                       }
+                               }
+
+                               err = ReleaseBitmapBlock(vcb, blockRef, false);
+                               if (err != noErr) goto Exit;
+
+                               /*
+                                * Skip over metadata blocks.
+                                */
+                               if (!useMetaZone) {
+                                       block = NextBitmapBlock(vcb, block);
+                               }
+                               if (block >= endingBlock) {
+                                       err = dskFulErr;
+                                       goto Exit;
+                               }
+
+                               err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
+                               if (err != noErr) goto Exit;
+                               buffer = currCache;
+                               summary_block_scan = block;
+                               wordsLeft = wordsPerBlock;
+                       }
+                       currentWord = SWAP_BE32 (*buffer);
+               }
+       }
+
+       //      Did we get to the end of the bitmap before finding a free block?
+       //      If so, then couldn't allocate anything.
+       if (block >= endingBlock) {
+               err = dskFulErr;
+               goto Exit;
+       }
+
+
+       /* 
+        * Don't move forward just yet.  Verify that either one of the following
+        * two conditions is true:
+        * 1) journaling is not enabled
+        * 2) block is not currently on any pending TRIM list. 
+        */
+       if (hfsmp->jnl != NULL && (forceFlush == false)) {
+               int recently_deleted = 0;
+               uint32_t nextblk;
+               err = CheckUnmappedBytes (hfsmp, (uint64_t) block, 1, &recently_deleted, &nextblk);
+               if ((err == 0) && (recently_deleted)) {
+
+                       /* release the bitmap block & unset currCache.  we may jump past it. */
+                       err = ReleaseBitmapBlock(vcb, blockRef, false);
+                       currCache = NULL;
+                       if (err != noErr) {
+                               goto Exit;
+                       }
+                       /* set our start to nextblk, and re-do the search. */
+                       startingBlock = nextblk;
+                       goto restartSearchAny;
+               }
+       }
+
+
+       //      Return the first block in the allocated range
+       *actualStartBlock = block;
+       dirty = true;
+
+       //      If we could get the desired number of blocks before hitting endingBlock,
+       //      then adjust endingBlock so we won't keep looking.  Ideally, the comparison
+       //      would be (block + maxBlocks) < endingBlock, but that could overflow.  The
+       //      comparison below yields identical results, but without overflow.
+       if (block < (endingBlock-maxBlocks)) {
+               endingBlock = block + maxBlocks;        //      if we get this far, we've found enough
+       }
+
+       /*
+        * While loop 2:
+        *              Scan the bitmap, starting at 'currentWord' in the current
+        *              bitmap block.  Continue iterating through the bitmap until
+        *              either we hit an allocated block, or until we have accumuluated
+        *              maxBlocks worth of bitmap.
+        */
+       
+       /* Continue until we see an allocated block */
+       while ((currentWord & bitMask) == 0) {  
+               //      Move to the next block.  If no more, then exit.
+               ++block;
+               if (block == endingBlock) {
+                       break;
+               }
+
+               //      Next bit
+               bitMask >>= 1;
+               if (bitMask == 0) {
+                       //      Next word
+                       bitMask = kHighBitInWordMask;
+                       ++buffer;
+
+                       if (--wordsLeft == 0) {
+                               //      Next block
+                               buffer = currCache = NULL;
+
+                               /* We're only reading the bitmap here, so mark it as clean */
+                               err = ReleaseBitmapBlock(vcb, blockRef, false);
+                               if (err != noErr) {
+                                       goto Exit;
+                               }
+
+                               /*
+                                * Skip over metadata blocks.
+                                */
+                               if (!useMetaZone) {
+                                       u_int32_t nextBlock;
+                                       nextBlock = NextBitmapBlock(vcb, block);
+                                       if (nextBlock != block) {
+                                               goto Exit;  /* allocation gap, so stop */
+                                       }
+                               }
+
+                               if (block >= endingBlock) {
+                                       goto Exit;
+                               }
+
+                               err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
+                               if (err != noErr) {
+                                       goto Exit;
+                               }
+                               buffer = currCache;
+                               wordsLeft = wordsPerBlock;
+                       }
+                       currentWord = SWAP_BE32 (*buffer);
+               }
+       }
+
+Exit:
+       if (currCache) {
+               /* Release the bitmap reference prior to marking bits in-use */
+               (void) ReleaseBitmapBlock(vcb, blockRef, false);
+               currCache = NULL;
+       }
+
+       if (err == noErr) {
+               *actualNumBlocks = block - *actualStartBlock;
+       
+               // sanity check
+               if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
+                       panic("hfs: BlockFindAnyBitmap: allocation overflow on \"%s\"", vcb->vcbVN);
+               }
+       }
+       else {
+               *actualStartBlock = 0;
+               *actualNumBlocks = 0;
+       }
+
+       if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
+
+       return err;
+}
+
+
+/*
+_______________________________________________________________________
+
+Routine:       BlockFindKnown
+
+Function:   Return a potential extent from the free extent cache.  The
+                   returned extent *must* be marked allocated and removed
+                   from the cache by the *caller*.
+
+Inputs:
+       vcb                             Pointer to volume where space is to be allocated
+       maxBlocks               Maximum number of contiguous blocks to allocate
+
+Outputs:
+       actualStartBlock        First block of range allocated, or 0 if error
+       actualNumBlocks         Number of blocks allocated, or 0 if error
+
+Returns:
+       dskFulErr               Free extent cache is empty
+_______________________________________________________________________
+*/
+
+static OSErr BlockFindKnown(
+               ExtendedVCB             *vcb,
+               u_int32_t               maxBlocks,
+               u_int32_t               *actualStartBlock,
+               u_int32_t               *actualNumBlocks)
+{
+       OSErr                   err;    
+       u_int32_t               foundBlocks;
+       struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+       if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
+
+       hfs_lock_mount (hfsmp);
+       lck_spin_lock(&vcb->vcbFreeExtLock);
+       if ( vcb->vcbFreeExtCnt == 0 || 
+                       vcb->vcbFreeExt[0].blockCount == 0) {
+               lck_spin_unlock(&vcb->vcbFreeExtLock);
+               hfs_unlock_mount(hfsmp);
+               if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+                       KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
+               return dskFulErr;
+       }
+       lck_spin_unlock(&vcb->vcbFreeExtLock);
+       hfs_unlock_mount(hfsmp);
+
+       lck_spin_lock(&vcb->vcbFreeExtLock);
+
+       //      Just grab up to maxBlocks of the first (largest) free exent.
+       *actualStartBlock = vcb->vcbFreeExt[0].startBlock;
+       foundBlocks = vcb->vcbFreeExt[0].blockCount;
+       if (foundBlocks > maxBlocks)
+               foundBlocks = maxBlocks;
+       *actualNumBlocks = foundBlocks;
+
+       lck_spin_unlock(&vcb->vcbFreeExtLock);
+
+       // sanity check
+       if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) 
+       {
+               printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
+               hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
+               err = EIO;
+       } else
+               err = 0;
+
+       if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
+
+       return err;
+}
+
+/*
+ * BlockMarkAllocated
+ * 
+ * This is a wrapper function around the internal calls which will actually mark the blocks
+ * as in-use.  It will mark the blocks in the red-black tree if appropriate.  We need to do 
+ * this logic here to avoid callers having to deal with whether or not the red-black tree
+ * is enabled.
+ */
+
+OSErr BlockMarkAllocated(
+               ExtendedVCB             *vcb,
+               u_int32_t               startingBlock,
+               register u_int32_t      numBlocks)
+{
+       struct hfsmount *hfsmp;
+
+       hfsmp = VCBTOHFS(vcb);
+
+       return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks, 0);
+
+}
+
+
+/*
+_______________________________________________________________________
+
+Routine:       BlockMarkAllocatedInternal
+
+Function:      Mark a contiguous group of blocks as allocated (set in the
+                       bitmap).  It assumes those bits are currently marked
+                       deallocated (clear in the bitmap).  Note that this function
+                       must be called regardless of whether or not the bitmap or
+                       tree-based allocator is used, as all allocations must correctly
+                       be marked on-disk.  If the tree-based approach is running, then
+                       this will be done before the node is removed from the tree.
+
+Inputs:
+       vcb                             Pointer to volume where space is to be allocated
+       startingBlock   First block number to mark as allocated
+       numBlocks               Number of blocks to mark as allocated
+_______________________________________________________________________
+*/
+static 
+OSErr BlockMarkAllocatedInternal (
+                                                                 ExtendedVCB           *vcb,
+                                                                 u_int32_t             startingBlock,
+                                                                 u_int32_t     numBlocks,
+                                                                 hfs_block_alloc_flags_t flags)
+{
+       OSErr                   err;
+       register u_int32_t      *currentWord;   //      Pointer to current word within bitmap block
+       register u_int32_t      wordsLeft;              //      Number of words left in this bitmap block
+       register u_int32_t      bitMask;                //      Word with given bits already set (ready to OR in)
+       u_int32_t               firstBit;               //      Bit index within word of first bit to allocate
+       u_int32_t               numBits;                //      Number of bits in word to allocate
+       u_int32_t               *buffer = NULL;
+       uintptr_t  blockRef = 0;
+       u_int32_t  bitsPerBlock;
+       u_int32_t  wordsPerBlock;
+       // XXXdbg
+       struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+       if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, flags, 0, 0);
+
+#if DEBUG
+
+       struct rl_entry *range;
+       TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS], rl_link) {
+               assert(rl_overlap(range, startingBlock,
+                                                 startingBlock + numBlocks - 1) == RL_NOOVERLAP);
+       }
+
+#endif
+
+       int force_flush = 0;
+       /*
+        * Since we are about to mark these bits as in-use 
+        * in the bitmap, decide if we need to alert the caller
+        * that a journal flush might be appropriate. It's safe to 
+        * poke at the journal pointer here since we MUST have 
+        * called start_transaction by the time this function is invoked.  
+        * If the journal is enabled, then it will have taken the requisite 
+        * journal locks.  If it is not enabled, then we have taken 
+        * a shared lock on the global lock.
+        */
+       if (hfsmp->jnl) {
+               uint32_t ignore;
+               err = CheckUnmappedBytes (hfsmp, (uint64_t) startingBlock, (uint64_t)numBlocks, &force_flush, &ignore);
+               if ((err == 0) && (force_flush)) {
+                       journal_request_immediate_flush (hfsmp->jnl);           
+               }
+       }
+
+       hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
+
+       /*
+        * Don't make changes to the disk if we're just reserving.  Note that
+        * we could do better in the tentative case because we could, in theory,
+        * avoid the journal flush above.  However, that would mean that we would
+        * need to catch the callback to stop it incorrectly addding the extent
+        * to our free cache.
+        */
+       if (ISSET(flags, HFS_ALLOC_LOCKED | HFS_ALLOC_TENTATIVE)) {
+               err = 0;
+               goto Exit;
+       }
+
+       //
+       //      Pre-read the bitmap block containing the first word of allocation
+       //
+
+       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+                                                 HFS_ALLOC_IGNORE_RESERVED);
+       if (err != noErr) goto Exit;
+       //
+       //      Initialize currentWord, and wordsLeft.
+       //
+       {
+               u_int32_t wordIndexInBlock;
+
+               bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
+               wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+
+               wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+               currentWord = buffer + wordIndexInBlock;
+               wordsLeft = wordsPerBlock - wordIndexInBlock;
+       }
+
+       // XXXdbg
+       if (hfsmp->jnl) {
+               journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+       }
+
+       //
+       //      If the first block to allocate doesn't start on a word
+       //      boundary in the bitmap, then treat that first word
+       //      specially.
+       //
+
+       firstBit = startingBlock % kBitsPerWord;
+       if (firstBit != 0) {
+               bitMask = kAllBitsSetInWord >> firstBit;        //      turn off all bits before firstBit
+               numBits = kBitsPerWord - firstBit;                      //      number of remaining bits in this word
+               if (numBits > numBlocks) {
+                       numBits = numBlocks;                                    //      entire allocation is inside this one word
+                       bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));        //      turn off bits after last
+               }
+#if DEBUG
+               if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+                       panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
+               }
+#endif
+               *currentWord |= SWAP_BE32 (bitMask);            //      set the bits in the bitmap
+               numBlocks -= numBits;                                           //      adjust number of blocks left to allocate
+
+               ++currentWord;                                                          //      move to next word
+               --wordsLeft;                                                            //      one less word left in this block
+       }
+
+       //
+       //      Allocate whole words (32 blocks) at a time.
+       //
+
+       bitMask = kAllBitsSetInWord;                                    //      put this in a register for 68K
+       while (numBlocks >= kBitsPerWord) {
+               if (wordsLeft == 0) {
+                       //      Read in the next bitmap block
+                       startingBlock += bitsPerBlock;                  //      generate a block number in the next bitmap block
+
+                       buffer = NULL;
+                       err = ReleaseBitmapBlock(vcb, blockRef, true);
+                       if (err != noErr) goto Exit;
+
+                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+                                                                 HFS_ALLOC_IGNORE_RESERVED);
+                       if (err != noErr) goto Exit;
+
+                       // XXXdbg
+                       if (hfsmp->jnl) {
+                               journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+                       }
+
+                       //      Readjust currentWord and wordsLeft
+                       currentWord = buffer;
+                       wordsLeft = wordsPerBlock;
+               }
+#if DEBUG
+               if (*currentWord != 0) {
+                       panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
+               }
+#endif
+               *currentWord = SWAP_BE32 (bitMask);
+               numBlocks -= kBitsPerWord;
+
+               ++currentWord;                                                          //      move to next word
+               --wordsLeft;                                                            //      one less word left in this block
+       }
+
+       //
+       //      Allocate any remaining blocks.
+       //
+
+       if (numBlocks != 0) {
+               bitMask = ~(kAllBitsSetInWord >> numBlocks);    //      set first numBlocks bits
+               if (wordsLeft == 0) {
+                       //      Read in the next bitmap block
+                       startingBlock += bitsPerBlock;                          //      generate a block number in the next bitmap block
+
+                       buffer = NULL;
+                       err = ReleaseBitmapBlock(vcb, blockRef, true);
+                       if (err != noErr) goto Exit;
+
+                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+                                                                 HFS_ALLOC_IGNORE_RESERVED);
+                       if (err != noErr) goto Exit;
+
+                       // XXXdbg
+                       if (hfsmp->jnl) {
+                               journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+                       }
+
+                       //      Readjust currentWord and wordsLeft
+                       currentWord = buffer;
+                       wordsLeft = wordsPerBlock;
+               }
+#if DEBUG
+               if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+                       panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
+               }
+#endif
+               *currentWord |= SWAP_BE32 (bitMask);                    //      set the bits in the bitmap
+
+               //      No need to update currentWord or wordsLeft
+       }
+
+Exit:
+
+       if (buffer)
+               (void)ReleaseBitmapBlock(vcb, blockRef, true);
+
+       if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
+
+       return err;
+}
+
+
+/*
+ * BlockMarkFree
+ * 
+ * This is a wrapper function around the internal calls which will actually mark the blocks
+ * as freed.  It will mark the blocks in the red-black tree if appropriate.  We need to do 
+ * this logic here to avoid callers having to deal with whether or not the red-black tree
+ * is enabled.
+ *
+ */
+OSErr BlockMarkFree(
+               ExtendedVCB             *vcb,
+               u_int32_t               startingBlock,
+               register u_int32_t      numBlocks)
+{
+       struct hfsmount *hfsmp;
+       hfsmp = VCBTOHFS(vcb);
+
+       return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
+}
+
+
+/*
+ * BlockMarkFreeUnused
+ * 
+ * Scan the bitmap block beyond end of current file system for bits 
+ * that are marked as used.  If any of the bits are marked as used,
+ * this function marks them free.
+ *
+ * Note:  This was specifically written to mark all bits beyond 
+ * end of current file system during hfs_extendfs(), which makes
+ * sure that all the new blocks added to the file system are 
+ * marked as free.   We expect that all the blocks beyond end of
+ * current file system are always marked as free, but there might 
+ * be cases where are marked as used.  This function assumes that 
+ * the number of blocks marked as used incorrectly are relatively
+ * small, otherwise this can overflow journal transaction size
+ * on certain file system configurations (example, large unused 
+ * bitmap with relatively small journal). 
+ *
+ * Input:
+ *     startingBlock: First block of the range to mark unused
+ *     numBlocks: Number of blocks in the range to mark unused
+ *
+ * Returns: zero on success, non-zero on error.
+ */
+OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t        numBlocks)
+{
+       int error = 0;
+       struct hfsmount *hfsmp = VCBTOHFS(vcb);
+       u_int32_t curNumBlocks;
+       u_int32_t  bitsPerBlock;
+       u_int32_t lastBit;
+
+       /* Use the optimal bitmap I/O size instead of bitmap block size */
+       bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
+
+       /* 
+        * First clear any non bitmap allocation block aligned bits
+        *
+        * Calculate the first bit in the bitmap block next to 
+        * the bitmap block containing the bit for startingBlock.
+        * Using this value, we calculate the total number of 
+        * bits to be marked unused from startingBlock to the 
+        * end of bitmap block containing startingBlock. 
+        */
+       lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
+       curNumBlocks = lastBit - startingBlock;
+       if (curNumBlocks > numBlocks) {
+               curNumBlocks = numBlocks;
+       }
+       error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
+       if (error) {
+               return error;
+       }
+       startingBlock += curNumBlocks;
+       numBlocks -= curNumBlocks;
+
+       /* 
+        * Check a full bitmap block for any 'used' bit.  If any bit is used,
+        * mark all the bits only in that bitmap block as free.  This ensures
+        * that we do not write unmodified bitmap blocks and do not 
+        * overwhelm the journal. 
+        *
+        * The code starts by checking full bitmap block at a time, and 
+        * marks entire bitmap block as free only if any bit in that bitmap 
+        * block is marked as used.  In the end, it handles the last bitmap 
+        * block which might be partially full by only checking till the 
+        * caller-specified last bit and if any bit is set, only mark that 
+        * range as free.
+        */
+       while (numBlocks) {
+               if (numBlocks >= bitsPerBlock) {
+                       curNumBlocks = bitsPerBlock;
+               } else {
+                       curNumBlocks = numBlocks;
+               }
+               if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
+                       error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
+                       if (error) {
+                               return error;
+                       }
+               }
+               startingBlock += curNumBlocks;
+               numBlocks -= curNumBlocks;
+       }
+
+       return error;
+}
+
+/*
+_______________________________________________________________________
+
+Routine:       BlockMarkFreeInternal
+
+Function:      Mark a contiguous group of blocks as free (clear in the
+                       bitmap).  It assumes those bits are currently marked
+                       allocated (set in the bitmap).
+
+Inputs:
+       vcb                             Pointer to volume where space is to be freed
+       startingBlock   First block number to mark as freed
+       numBlocks               Number of blocks to mark as freed
+       do_validate     If true, validate that the blocks being 
+                                       deallocated to check if they are within totalBlocks
+                                       for current volume and whether they were allocated
+                                       before they are marked free.
+_______________________________________________________________________
+*/
+static 
+OSErr BlockMarkFreeInternal(
+               ExtendedVCB             *vcb,
+               u_int32_t               startingBlock_in,
+               register u_int32_t      numBlocks_in,
+               Boolean                 do_validate)
+{
+       OSErr           err;
+       u_int32_t       startingBlock = startingBlock_in;
+       u_int32_t       numBlocks = numBlocks_in;
+       uint32_t        unmapStart = startingBlock_in;
+       uint32_t        unmapCount = numBlocks_in;
+       uint32_t        wordIndexInBlock;
+       u_int32_t       *currentWord;   //      Pointer to current word within bitmap block
+       u_int32_t       wordsLeft;              //      Number of words left in this bitmap block
+       u_int32_t       bitMask;                //      Word with given bits already set (ready to OR in)
+       u_int32_t       currentBit;             //      Bit index within word of current bit to allocate
+       u_int32_t       numBits;                //      Number of bits in word to allocate
+       u_int32_t       *buffer = NULL;
+       uintptr_t       blockRef = 0;
+       u_int32_t       bitsPerBlock;
+       u_int32_t       wordsPerBlock;
+       // XXXdbg
+       struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+       if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);
+
+       /*
+        * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
+        * need to be able to free blocks being relocated during hfs_truncatefs.
+        */
+       if ((do_validate == true) && 
+                       (startingBlock + numBlocks > vcb->totalBlocks)) {
+#if ALLOC_DEBUG || DEBUG
+               panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
+               __builtin_unreachable();
+#else
+               printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
+               hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
+               err = EIO;
+               goto Exit;
+#endif
+       }
+
+       //
+       //      Pre-read the bitmap block containing the first word of allocation
+       //
+
+       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef, 
+                                                 HFS_ALLOC_IGNORE_RESERVED);
+       if (err != noErr) goto Exit;
+       // XXXdbg
+       if (hfsmp->jnl) {
+               journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+       }
+
+       uint32_t min_unmap = 0, max_unmap = UINT32_MAX;
+
+       // Work out the bounds of any unmap we can send down
+       struct rl_entry *range;
+       for (int i = 0; i < 2; ++i) {
+               TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
+                       if (range->rl_start < startingBlock
+                               && range->rl_end >= min_unmap) {
+                               min_unmap = range->rl_end + 1;
+                       }
+                       if (range->rl_end >= startingBlock + numBlocks
+                               && range->rl_start < max_unmap) {
+                               max_unmap = range->rl_start;
+                       }
+               }
+       }
+
+       //
+       //      Figure out how many bits and words per bitmap block.
+       //
+       bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
+       wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+       wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+
+       //
+       // Look for a range of free blocks immediately before startingBlock
+       // (up to the start of the current bitmap block).  Set unmapStart to
+       // the first free block.
+       //
+       currentWord = buffer + wordIndexInBlock;
+       currentBit = startingBlock % kBitsPerWord;
+       bitMask = kHighBitInWordMask >> currentBit;
+       while (unmapStart > min_unmap) {
+               // Move currentWord/bitMask back by one bit
+               bitMask <<= 1;
+               if (bitMask == 0) {
+                       if (--currentWord < buffer)
+                               break;
+                       bitMask = kLowBitInWordMask;
+               }
+
+               if (*currentWord & SWAP_BE32(bitMask))
+                       break;  // Found an allocated block.  Stop searching.
+               --unmapStart;
+               ++unmapCount;
+       }
+
+       //
+       //      If the first block to free doesn't start on a word
+       //      boundary in the bitmap, then treat that first word
+       //      specially.
+       //
+
+       currentWord = buffer + wordIndexInBlock;
+       wordsLeft = wordsPerBlock - wordIndexInBlock;
+       currentBit = startingBlock % kBitsPerWord;
+       if (currentBit != 0) {
+               bitMask = kAllBitsSetInWord >> currentBit;      //      turn off all bits before currentBit
+               numBits = kBitsPerWord - currentBit;            //      number of remaining bits in this word
+               if (numBits > numBlocks) {
+                       numBits = numBlocks;                                    //      entire allocation is inside this one word
+                       bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits));      //      turn off bits after last
+               }
+               if ((do_validate == true) && 
+                               (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
+                       goto Corruption;
+               }
+               *currentWord &= SWAP_BE32 (~bitMask);           //      clear the bits in the bitmap
+               numBlocks -= numBits;                                           //      adjust number of blocks left to free
+
+               ++currentWord;                                                          //      move to next word
+               --wordsLeft;                                                            //      one less word left in this block
+       }
+
+       //
+       //      Free whole words (32 blocks) at a time.
+       //
+
+       while (numBlocks >= kBitsPerWord) {
+               if (wordsLeft == 0) {
                        //      Read in the next bitmap block
                        startingBlock += bitsPerBlock;                  //      generate a block number in the next bitmap block
-                       
+
+                       buffer = NULL;
+                       err = ReleaseBitmapBlock(vcb, blockRef, true);
+                       if (err != noErr) goto Exit;
+
+                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+                                                                 HFS_ALLOC_IGNORE_RESERVED);
+                       if (err != noErr) goto Exit;
+
+                       // XXXdbg
+                       if (hfsmp->jnl) {
+                               journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+                       }
+
+                       //      Readjust currentWord and wordsLeft
+                       currentWord = buffer;
+                       wordsLeft = wordsPerBlock;
+               }
+               if ((do_validate == true) && 
+                               (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
+                       goto Corruption;
+               }
+               *currentWord = 0;                                                       //      clear the entire word
+               numBlocks -= kBitsPerWord;
+
+               ++currentWord;                                                          //      move to next word
+               --wordsLeft;                                                                    //      one less word left in this block
+       }
+
+       //
+       //      Free any remaining blocks.
+       //
+
+       if (numBlocks != 0) {
+               bitMask = ~(kAllBitsSetInWord >> numBlocks);    //      set first numBlocks bits
+               if (wordsLeft == 0) {
+                       //      Read in the next bitmap block
+                       startingBlock += bitsPerBlock;                          //      generate a block number in the next bitmap block
+
+                       buffer = NULL;
+                       err = ReleaseBitmapBlock(vcb, blockRef, true);
+                       if (err != noErr) goto Exit;
+
+                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef, 
+                                                                 HFS_ALLOC_IGNORE_RESERVED);
+                       if (err != noErr) goto Exit;
+
+                       // XXXdbg
+                       if (hfsmp->jnl) {
+                               journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+                       }
+
+                       //      Readjust currentWord and wordsLeft
+                       currentWord = buffer;
+                       wordsLeft = wordsPerBlock;
+               }
+               if ((do_validate == true) && 
+                               (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
+                       goto Corruption;
+               }
+               *currentWord &= SWAP_BE32 (~bitMask);                   //      clear the bits in the bitmap
+
+               //      No need to update currentWord or wordsLeft
+       }
+
+       //
+       // Look for a range of free blocks immediately after the range we just freed
+       // (up to the end of the current bitmap block).
+       //
+       wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord;
+       wordsLeft = wordsPerBlock - wordIndexInBlock;
+       currentWord = buffer + wordIndexInBlock;
+       currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord;
+       bitMask = kHighBitInWordMask >> currentBit;
+       while (unmapStart + unmapCount < max_unmap) {
+               // Move currentWord/bitMask/wordsLeft forward one bit
+               bitMask >>= 1;
+               if (bitMask == 0) {
+                       if (--wordsLeft == 0)
+                               break;
+                       ++currentWord;
+                       bitMask = kHighBitInWordMask;
+               }
+
+               if (*currentWord & SWAP_BE32(bitMask))
+                       break;  // Found an allocated block.  Stop searching.
+               ++unmapCount;
+       }
+
+Exit:
+
+       if (buffer)
+               (void)ReleaseBitmapBlock(vcb, blockRef, true);
+
+       if (err == noErr) {
+               hfs_unmap_free_extent(vcb, unmapStart, unmapCount);
+       }
+
+       if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
+
+       return err;
+
+Corruption:
+#if DEBUG
+       panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
+       __builtin_unreachable();
+#else
+       printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s <%u, %u>\n",
+                       vcb->vcbVN, startingBlock_in, numBlocks_in);
+       hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
+       err = EIO;
+       goto Exit;
+#endif
+}
+
+
+/*
+_______________________________________________________________________
+
+Routine:       BlockFindContiguous
+
+Function:      Find a contiguous range of blocks that are free (bits
+                       clear in the bitmap).  If a contiguous range of the
+                       minimum size can't be found, an error will be returned.
+                       This is only needed to support the bitmap-scanning logic,
+                       as the red-black tree should be able to do this by internally
+                       searching its tree.
+
+Inputs:
+       vcb                             Pointer to volume where space is to be allocated
+       startingBlock   Preferred first block of range
+       endingBlock             Last possible block in range + 1
+       minBlocks               Minimum number of blocks needed.  Must be > 0.
+       maxBlocks               Maximum (ideal) number of blocks desired
+       useMetaZone     OK to dip into metadata allocation zone
+
+Outputs:
+       actualStartBlock        First block of range found, or 0 if error
+       actualNumBlocks         Number of blocks found, or 0 if error
+
+Returns:
+       noErr                   Found at least minBlocks contiguous
+       dskFulErr               No contiguous space found, or all less than minBlocks
+_______________________________________________________________________
+*/
+
+static OSErr BlockFindContiguous(
+               ExtendedVCB             *vcb,
+               u_int32_t               startingBlock,
+               u_int32_t               endingBlock,
+               u_int32_t               minBlocks,
+               u_int32_t               maxBlocks,
+               Boolean                 useMetaZone,
+               Boolean                 trustSummary,
+               u_int32_t               *actualStartBlock,
+               u_int32_t               *actualNumBlocks,
+               hfs_block_alloc_flags_t flags)
+{
+       OSErr                   err;
+       register u_int32_t      currentBlock;           //      Block we're currently looking at.
+       u_int32_t                       firstBlock;                     //      First free block in current extent.
+       u_int32_t                       stopBlock;                      //      If we get to this block, stop searching for first free block.
+       u_int32_t                       foundBlocks;            //      Number of contiguous free blocks in current extent.
+       u_int32_t                       *buffer = NULL;
+       register u_int32_t      *currentWord;
+       register u_int32_t      bitMask;
+       register u_int32_t      wordsLeft;
+       register u_int32_t      tempWord;
+       uintptr_t  blockRef = 0;
+       u_int32_t  wordsPerBlock;
+       u_int32_t  updated_free_extent = 0;
+       struct hfsmount *hfsmp = (struct hfsmount*) vcb;
+       HFSPlusExtentDescriptor best = { 0, 0 };
+
+       if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);
+
+       /*
+        * When we're skipping the metadata zone and the start/end
+        * range overlaps with the metadata zone then adjust the 
+        * start to be outside of the metadata zone.  If the range
+        * is entirely inside the metadata zone then we can deny the
+        * request (dskFulErr).
+        */
+       if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
+               if (startingBlock <= vcb->hfs_metazone_end) {
+                       if (endingBlock > (vcb->hfs_metazone_end + 2))
+                               startingBlock = vcb->hfs_metazone_end + 1;
+                       else
+                               goto DiskFull;
+               }
+       }
+
+       if ((endingBlock - startingBlock) < minBlocks)
+       {
+               //      The set of blocks we're checking is smaller than the minimum number
+               //      of blocks, so we couldn't possibly find a good range.
+               goto DiskFull;
+       }
+
+       stopBlock = endingBlock - minBlocks + 1;
+       currentBlock = startingBlock;
+       firstBlock = 0;
+
+       /*
+        * Skip over metadata blocks.
+        */
+       if (!useMetaZone)
+               currentBlock = NextBitmapBlock(vcb, currentBlock);
+
+       /*
+        * Use the summary table if we can.  Skip over any totally
+        * allocated blocks.  currentBlock should now point to the first
+        * block beyond the metadata zone if the metazone allocations are not
+        * allowed in this invocation.
+        */
+       if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
+               uint32_t suggestion;
+               err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
+               if (err && err != ENOSPC)
+                       goto ErrorExit;
+               if (err == ENOSPC || suggestion >= stopBlock)
+                       goto DiskFull;
+               currentBlock = suggestion;
+       }
+
+
+       //
+       //      Pre-read the first bitmap block.
+       //
+       err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
+       if ( err != noErr ) goto ErrorExit;
+
+       //
+       //      Figure out where currentBlock is within the buffer.
+       //
+       wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+
+       wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1);  // Current index into buffer
+       currentWord = buffer + wordsLeft;
+       wordsLeft = wordsPerBlock - wordsLeft;
+
+       uint32_t remaining = (hfsmp->freeBlocks - hfsmp->lockedBlocks
+                                                 - (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
+                                                        ? 0 : hfsmp->tentativeBlocks));
+
+       /*
+        * This outer do-while loop is the main body of this function.  Its job is 
+        * to search through the blocks (until we hit 'stopBlock'), and iterate
+        * through swaths of allocated bitmap until it finds free regions.
+        */
+
+       do
+       {
+               foundBlocks = 0;
+               /*
+                * We will try and update the summary table as we search
+                * below.  Note that we will never update the summary table
+                * for the first and last blocks that the summary table
+                * covers.  Ideally, we should, but the benefits probably
+                * aren't that significant so we leave things alone for now.
+                */
+               uint32_t summary_block_scan = 0;
+               /*
+                * Inner while loop 1:
+                *              Look for free blocks, skipping over allocated ones.
+                *
+                * Initialization starts with checking the initial partial word
+                * if applicable.
+                */
+               bitMask = currentBlock & kBitsWithinWordMask;
+               if (bitMask)
+               {                       
+                       tempWord = SWAP_BE32(*currentWord);                     //      Fetch the current word only once
+                       bitMask = kHighBitInWordMask >> bitMask;
+                       while (tempWord & bitMask)
+                       {
+                               bitMask >>= 1;
+                               ++currentBlock;
+                       }
+
+                       //      Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)? 
+                       if (bitMask)
+                               goto FoundUnused;
+
+                       //      Didn't find any unused bits, so we're done with this word.
+                       ++currentWord;
+                       --wordsLeft;
+               }
+
+               //
+               //      Check whole words
+               //
+               while (currentBlock < stopBlock)
+               {
+                       //      See if it's time to read another block.
+                       if (wordsLeft == 0)
+                       {
+                               buffer = NULL;
+                               if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+                                       /*
+                                        * If summary_block_scan is non-zero, then we must have
+                                        * pulled a bitmap file block into core, and scanned through
+                                        * the entire thing.  Because we're in this loop, we are 
+                                        * implicitly trusting that the bitmap didn't have any knowledge
+                                        * about this particular block.  As a result, update the bitmap
+                                        * (lazily, now that we've scanned it) with our findings that 
+                                        * this particular block is completely used up.
+                                        */
+                                       if (summary_block_scan != 0) {
+                                               uint32_t summary_bit;
+                                               (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
+                                               hfs_set_summary (hfsmp, summary_bit, 1);
+                                               summary_block_scan = 0;
+                                       }
+                               }
+                               err = ReleaseBitmapBlock(vcb, blockRef, false);
+                               if (err != noErr) goto ErrorExit;
+
+                               /*
+                                * Skip over metadata blocks.
+                                */
+                               if (!useMetaZone) {
+                                       currentBlock = NextBitmapBlock(vcb, currentBlock);
+                                       if (currentBlock >= stopBlock) {
+                                               goto LoopExit;
+                                       }
+                               }
+
+                               /* Skip over fully allocated bitmap blocks if we can */
+                               if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
+                                       uint32_t suggestion;
+                                       err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
+                                       if (err && err != ENOSPC)
+                                               goto ErrorExit;
+                                       if (err == ENOSPC || suggestion >= stopBlock)
+                                               goto LoopExit;
+                                       currentBlock = suggestion;
+                               }
+
+                               err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
+                               if ( err != noErr ) goto ErrorExit;
+
+                               /*
+                                * Set summary_block_scan to be the block we just read into the block cache.
+                                *
+                                * At this point, we've just read an allocation block worth of bitmap file
+                                * into the buffer above, but we don't know if it is completely allocated or not.
+                                * If we find that it is completely allocated/full then we will jump 
+                                * through this loop again and set the appropriate summary bit as fully allocated.
+                                */     
+                               summary_block_scan = currentBlock;
+                               currentWord = buffer;
+                               wordsLeft = wordsPerBlock;
+                       }
+
+                       //      See if any of the bits are clear
+                       if ((tempWord = SWAP_BE32(*currentWord)) + 1)   //      non-zero if any bits were clear
+                       {
+                               //      Figure out which bit is clear
+                               bitMask = kHighBitInWordMask;
+                               while (tempWord & bitMask)
+                               {
+                                       bitMask >>= 1;
+                                       ++currentBlock;
+                               }
+
+                               break;          //      Found the free bit; break out to FoundUnused.
+                       }
+
+                       //      Keep looking at the next word
+                       currentBlock += kBitsPerWord;
+                       ++currentWord;
+                       --wordsLeft;
+               }
+
+FoundUnused:
+               //      Make sure the unused bit is early enough to use
+               if (currentBlock >= stopBlock)
+               {
+                       break;
+               }
+
+               //      Remember the start of the extent
+               firstBlock = currentBlock;
+
+
+               /*
+                * Inner while loop 2:
+                *              We get here if we find a free block. Count the number
+                *              of contiguous free blocks observed.
+                * 
+                * Initialization starts with checking the initial partial word
+                * if applicable.
+                */
+               bitMask = currentBlock & kBitsWithinWordMask;
+               if (bitMask)
+               {
+                       tempWord = SWAP_BE32(*currentWord);                     //      Fetch the current word only once
+                       bitMask = kHighBitInWordMask >> bitMask;
+                       while (bitMask && !(tempWord & bitMask))
+                       {
+                               bitMask >>= 1;
+                               ++currentBlock;
+                       }
+
+                       //      Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)? 
+                       if (bitMask)
+                               goto FoundUsed;
+
+                       //      Didn't find any used bits, so we're done with this word.
+                       ++currentWord;
+                       --wordsLeft;
+               }
+
+               //
+               //      Check whole words
+               //
+               while (currentBlock < endingBlock)
+               {
+                       //      See if it's time to read another block.
+                       if (wordsLeft == 0)
+                       {
+                               buffer = NULL;
+                               err = ReleaseBitmapBlock(vcb, blockRef, false);
+                               if (err != noErr) goto ErrorExit;
+
+                               /*
+                                * Skip over metadata blocks.
+                                */
+                               if (!useMetaZone) {
+                                       u_int32_t nextBlock;
+
+                                       nextBlock = NextBitmapBlock(vcb, currentBlock);
+                                       if (nextBlock != currentBlock) {
+                                               goto LoopExit;  /* allocation gap, so stop */
+                                       }
+                               }
+
+                               err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
+                               if ( err != noErr ) goto ErrorExit;
+
+                               currentWord = buffer;
+                               wordsLeft = wordsPerBlock;
+                       }
+
+                       //      See if any of the bits are set
+                       if ((tempWord = SWAP_BE32(*currentWord)) != 0)
+                       {
+                               //      Figure out which bit is set
+                               bitMask = kHighBitInWordMask;
+                               while (!(tempWord & bitMask))
+                               {
+                                       bitMask >>= 1;
+                                       ++currentBlock;
+                               }
+
+                               break;          //      Found the used bit; break out to FoundUsed.
+                       }
+
+                       //      Keep looking at the next word
+                       currentBlock += kBitsPerWord;
+                       ++currentWord;
+                       --wordsLeft;
+
+                       //      If we found at least maxBlocks, we can quit early.
+                       if ((currentBlock - firstBlock) >= maxBlocks)
+                               break;
+               }
+
+FoundUsed:
+               //      Make sure we didn't run out of bitmap looking for a used block.
+               //      If so, pin to the end of the bitmap.
+               if (currentBlock > endingBlock)
+                       currentBlock = endingBlock;
+
+               //      Figure out how many contiguous free blocks there were.
+               //      Pin the answer to maxBlocks.
+               foundBlocks = currentBlock - firstBlock;
+               if (foundBlocks > maxBlocks)
+                       foundBlocks = maxBlocks;
+
+               if (remaining) {
+                       if (foundBlocks > remaining) {
+#if DEBUG || DEVELOPMENT
+                               printf("hfs: found more blocks than are indicated free!\n");
+#endif
+                               remaining = UINT32_MAX;
+                       } else
+                               remaining -= foundBlocks;
+               }
+
+               if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
+                       if (foundBlocks > best.blockCount) {
+                               best.startBlock = firstBlock;
+                               best.blockCount = foundBlocks;
+                       }
+
+                       if (foundBlocks >= maxBlocks || best.blockCount >= remaining)
+                               break;
+
+                       /*
+                        * Note that we will go ahead and add this free extent to our
+                        * cache below but that's OK because we'll remove it again if we
+                        * decide to use this extent.
+                        */
+               } else if (foundBlocks >= minBlocks)
+                       break;          //      Found what we needed!
+
+               /*
+                * We did not find the total blocks we were looking for, but
+                * add this free block run to our free extent cache list, if possible.
+                */
+
+               // If we're ignoring tentative ranges, we need to account for them here
+               if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
+                       struct rl_entry free_extent = rl_make(firstBlock, firstBlock + foundBlocks - 1);
+                       struct rl_entry *range;;
+                       TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS], rl_link) {
+                               rl_subtract(&free_extent, range);
+                               if (rl_len(range) == 0)
+                                       break;
+                       }
+                       firstBlock = free_extent.rl_start;
+                       foundBlocks = rl_len(&free_extent);
+               }
+
+               if (foundBlocks) {
+                       if (hfsmp->jnl == NULL) {
+                               /* If there is no journal, go ahead and add to the free ext cache. */
+                               updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
+                       }
+                       else {
+                               /*
+                                * If journaled, only add to the free extent cache if this block is not
+                                * waiting for a TRIM to complete; that implies that the transaction that freed it
+                                * has not yet been committed to stable storage. 
+                                */
+                               int recently_deleted = 0;
+                               uint32_t nextblock;
+                               err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock, 
+                                               (uint64_t)foundBlocks, &recently_deleted, &nextblock);
+                               if ((err) || (recently_deleted == 0))  {
+                                       /* if we hit an error, or the blocks not recently freed, go ahead and insert it */
+                                       updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
+                               }
+                               err = 0;
+                       }
+               }
+       } while (currentBlock < stopBlock);
+LoopExit:
+
+       if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
+               firstBlock = best.startBlock;
+               foundBlocks = best.blockCount;
+       }
+
+       //      Return the outputs.
+       if (foundBlocks < minBlocks)
+       {
+DiskFull:
+               err = dskFulErr;
+ErrorExit:
+               *actualStartBlock = 0;
+               *actualNumBlocks = 0;
+       }
+       else
+       {
+               err = noErr;
+               *actualStartBlock = firstBlock;
+               *actualNumBlocks = foundBlocks;
+               /*
+                * Sanity check for overflow
+                */
+               if ((firstBlock + foundBlocks) > vcb->allocLimit) {
+                       panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
+                                       vcb->vcbVN, startingBlock, endingBlock, currentBlock,
+                                       firstBlock, stopBlock, minBlocks, foundBlocks);
+               }
+       }
+
+       if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
+               int i;
+               u_int32_t min_start = vcb->totalBlocks;
+
+               // set the nextAllocation pointer to the smallest free block number
+               // we've seen so on the next mount we won't rescan unnecessarily
+               lck_spin_lock(&vcb->vcbFreeExtLock);
+               for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
+                       if (vcb->vcbFreeExt[i].startBlock < min_start) {
+                               min_start = vcb->vcbFreeExt[i].startBlock;
+                       }
+               }
+               lck_spin_unlock(&vcb->vcbFreeExtLock);
+               if (min_start != vcb->totalBlocks) {
+                       if (min_start < vcb->nextAllocation) {
+                               vcb->nextAllocation = min_start;
+                       }
+                       if (min_start < vcb->sparseAllocation) {
+                               vcb->sparseAllocation = min_start;
+                       }
+               }
+       }
+
+       if (buffer)
+               (void) ReleaseBitmapBlock(vcb, blockRef, false);
+
+       if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
+
+       return err;
+}
+
+
+/* 
+ * Count number of bits set in the given 32-bit unsigned number 
+ *
+ * Returns:
+ *     Number of bits set
+ */
+static int num_bits_set(u_int32_t num) 
+{
+       int count;
+
+       for (count = 0; num; count++) {
+               num &= num - 1;
+       }
+
+       return count;
+}
+
+/* 
+ * For a given range of blocks, find the total number of blocks 
+ * allocated.  If 'stop_on_first' is true, it stops as soon as it 
+ * encounters the first allocated block.  This option is useful 
+ * to determine if any block is allocated or not. 
+ *
+ * Inputs:
+ *     startingBlock   First allocation block number of the range to be scanned.
+ *     numBlocks       Total number of blocks that need to be scanned.
+ *     stop_on_first   Stop the search after the first allocated block is found.
+ *
+ * Output:
+ *     allocCount      Total number of allocation blocks allocated in the given range.
+ *
+ *                     On error, it is the number of allocated blocks found 
+ *                     before the function got an error. 
+ *
+ *                     If 'stop_on_first' is set, 
+ *                             allocCount = 1 if any allocated block was found.
+ *                             allocCount = 0 if no allocated block was found.
+ *
+ * Returns:
+ *     0 on success, non-zero on failure. 
+ */
+static int 
+hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, 
+               u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
+{
+       u_int32_t  *currentWord;   // Pointer to current word within bitmap block
+       u_int32_t  wordsLeft;      // Number of words left in this bitmap block
+       u_int32_t  bitMask;        // Word with given bits already set (ready to test)
+       u_int32_t  firstBit;       // Bit index within word of first bit to allocate
+       u_int32_t  numBits;        // Number of bits in word to allocate
+       u_int32_t  *buffer = NULL;
+       uintptr_t  blockRef;
+       u_int32_t  bitsPerBlock;
+       u_int32_t  wordsPerBlock;
+       u_int32_t  blockCount = 0;
+       int  error;
+
+       if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0);
+
+       /*
+        * Pre-read the bitmap block containing the first word of allocation
+        */
+       error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef, 
+                                                       HFS_ALLOC_IGNORE_TENTATIVE);
+       if (error)
+               goto JustReturn;
+
+       /*
+        * Initialize currentWord, and wordsLeft.
+        */
+       {
+               u_int32_t wordIndexInBlock;
+
+               bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
+               wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
+
+               wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+               currentWord = buffer + wordIndexInBlock;
+               wordsLeft = wordsPerBlock - wordIndexInBlock;
+       }
+
+       /*
+        * First test any non word aligned bits.
+        */
+       firstBit = startingBlock % kBitsPerWord;
+       if (firstBit != 0) {
+               bitMask = kAllBitsSetInWord >> firstBit;
+               numBits = kBitsPerWord - firstBit;
+               if (numBits > numBlocks) {
+                       numBits = numBlocks;
+                       bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
+               }
+               if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+                       if (stop_on_first) {
+                               blockCount = 1;
+                               goto Exit;
+                       }
+                       blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
+               }
+               numBlocks -= numBits;
+               ++currentWord;
+               --wordsLeft;
+       }
+
+       /*
+        * Test whole words (32 blocks) at a time.
+        */
+       while (numBlocks >= kBitsPerWord) {
+               if (wordsLeft == 0) {
+                       /* Read in the next bitmap block. */
+                       startingBlock += bitsPerBlock;
+
                        buffer = NULL;
-                       err = ReleaseBitmapBlock(vcb, blockRef, true);
-                       if (err != noErr) goto Exit;
+                       error = ReleaseBitmapBlock(hfsmp, blockRef, false);
+                       if (error) goto Exit;
+
+                       error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef, 
+                                                                       HFS_ALLOC_IGNORE_TENTATIVE);
+                       if (error) goto Exit;
+
+                       /* Readjust currentWord and wordsLeft. */
+                       currentWord = buffer;
+                       wordsLeft = wordsPerBlock;
+               }
+               if (*currentWord != 0) {
+                       if (stop_on_first) {
+                               blockCount = 1;
+                               goto Exit;
+                       } 
+                       blockCount += num_bits_set(*currentWord);
+               }
+               numBlocks -= kBitsPerWord;
+               ++currentWord;
+               --wordsLeft;
+       }
+
+       /*
+        * Test any remaining blocks.
+        */
+       if (numBlocks != 0) {
+               bitMask = ~(kAllBitsSetInWord >> numBlocks);
+               if (wordsLeft == 0) {
+                       /* Read in the next bitmap block */
+                       startingBlock += bitsPerBlock;
+
+                       buffer = NULL;
+                       error = ReleaseBitmapBlock(hfsmp, blockRef, false);
+                       if (error) goto Exit;
+
+                       error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef, 
+                                                                       HFS_ALLOC_IGNORE_TENTATIVE);
+                       if (error) goto Exit;
+
+                       currentWord = buffer;
+                       wordsLeft = wordsPerBlock;
+               }
+               if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+                       if (stop_on_first) {
+                               blockCount = 1;
+                               goto Exit;
+                       }
+                       blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
+               }
+       }
+Exit:
+       if (buffer) {
+               (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
+       }
+       if (allocCount) {
+               *allocCount = blockCount;
+       }
+
+JustReturn:
+       if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0);
+
+       return (error);
+}
+
+/* 
+ * Count total number of blocks that are allocated in the given 
+ * range from the bitmap.  This is used to preflight total blocks 
+ * that need to be relocated during volume resize.  
+ *
+ * The journal or allocation file lock must be held.
+ *
+ * Returns:
+ *     0 on success, non-zero on failure.  
+ *     On failure, allocCount is zero. 
+ */
+       int
+hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
+               u_int32_t numBlocks, u_int32_t *allocCount)
+{
+       return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
+}
+
+/*
+ * Test to see if any blocks in a range are allocated.
+ * 
+ * Note:  On error, this function returns 1, which means that 
+ * one or more blocks in the range are allocated.  This function 
+ * is primarily used for volume resize and we do not want 
+ * to report to the caller that the blocks are free when we 
+ * were not able to deterministically find it out.  So on error, 
+ * we always report that the blocks are allocated.  
+ *
+ * The journal or allocation file lock must be held.
+ *
+ * Returns 
+ *     0 if all blocks in the range are free.
+ *     1 if blocks in the range are allocated, or there was an error.
+ */
+       int 
+hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
+{
+       int error; 
+       u_int32_t allocCount;
+
+       error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
+       if (error) {
+               /* On error, we always say that the blocks are allocated 
+                * so that volume resize does not return false success.
+                */
+               return 1;
+       } else {
+               /* The function was deterministically able to find out 
+                * if there was any block allocated or not.  In that case,
+                * the value in allocCount is good enough to be returned 
+                * back to the caller.
+                */
+               return allocCount;
+       }
+} 
+
+/*
+ * CONFIG_HFS_RBTREE
+ * Check to see if the red-black tree is live.  Allocation file lock must be held
+ * shared or exclusive to call this function. Note that we may call this even if
+ * HFS is built without activating the red-black tree code.
+ */
+__private_extern__
+int 
+hfs_isrbtree_active(struct hfsmount *hfsmp){
+
+#pragma unused (hfsmp)
+
+       /* Just return 0 for now */
+       return 0;
+}
+
+
+
+/* Summary Table Functions */
+/*
+ * hfs_check_summary:
+ * 
+ * This function should be used to query the summary table to see if we can
+ * bypass a bitmap block or not when we're trying to find a free allocation block.
+ *
+ *
+ * Inputs:
+ *             allocblock - allocation block number. Will be used to infer the correct summary bit.
+ *             hfsmp -- filesystem in question.
+ * 
+ * Output Arg:
+ *             *freeblocks - set to 1 if we believe at least one free blocks in this vcbVBMIOSize
+ *             page of bitmap file.
+ * 
+ *
+ * Returns:
+ *             0 on success
+ *             EINVAL on error
+ *     
+ */
+
+static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t allocblock, uint32_t *freeblocks) {
+
+       int err = EINVAL;
+       if (hfsmp->vcbVBMIOSize) {
+               if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+                       uint32_t index;
+                       if (hfs_get_summary_index (hfsmp, allocblock, &index)) {
+                               *freeblocks = 0;
+                               return EINVAL;
+                       }
+
+                       /* Ok, now that we have the bit index into the array, what byte is it in ? */
+                       uint32_t byteindex = index / kBitsPerByte;
+                       uint8_t current_byte = hfsmp->hfs_summary_table[byteindex];
+                       uint8_t bit_in_byte = index % kBitsPerByte;
+
+                       if (current_byte & (1 << bit_in_byte)) {
+                               /* 
+                                * We do not believe there is anything free in the
+                                * entire vcbVBMIOSize'd block.
+                                */
+                               *freeblocks = 0;
+                       }       
+                       else {
+                               /* Looks like there might be a free block here... */
+                               *freeblocks = 1;
+                       }
+               }
+               err = 0;
+       }
+
+       return err;
+}
+
+
+#if 0
+/*
+ * hfs_get_next_summary
+ *
+ * From a given allocation block, jump to the allocation block at the start of the
+ * next vcbVBMIOSize boundary.  This is useful when trying to quickly skip over
+ * large swaths of bitmap once we have determined that the bitmap is relatively full. 
+ *
+ * Inputs: hfsmount, starting allocation block number
+ * Output Arg: *newblock will contain the allocation block number to start
+ * querying.
+ * 
+ * Returns:
+ *             0 on success
+ *             EINVAL if the block argument is too large to be used, or the summary table not live.
+ *             EFBIG if there are no more summary bits to be queried
+ */
+static int 
+hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
+
+       u_int32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
+       u_int32_t start_offset;
+       u_int32_t next_offset;
+       int err = EINVAL; 
+
+       if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+               if ((err = hfs_get_summary_index(hfsmp, block, &start_offset))) {
+                       return err;
+               }
+
+               next_offset = start_offset++;   
+
+               if ((start_offset >= hfsmp->hfs_summary_size) || (next_offset >= hfsmp->hfs_summary_size)) {
+                       /* Can't jump to the next summary bit. */
+                       return EINVAL;
+               }
+
+               /* Otherwise, compute and return */
+               *newblock = next_offset * bits_per_iosize;
+               if (*newblock >= hfsmp->totalBlocks) {
+                       return EINVAL;
+               }
+               err = 0;
+       }
+
+       return err;
+}
+
+#endif
+
+/*
+ * hfs_release_summary 
+ * 
+ * Given an extent that is about to be de-allocated on-disk, determine the number
+ * of summary bitmap bits that need to be marked as 'potentially available'.
+ * Then go ahead and mark them as free.
+ *
+ *     Inputs:
+ *             hfsmp           - hfs mount
+ *             block           - starting allocation block.
+ *             length          - length of the extent.
+ * 
+ *     Returns:
+ *             EINVAL upon any errors.
+ */
+static int hfs_release_summary(struct hfsmount *hfsmp, uint32_t start_blk, uint32_t length) {
+       int err = EINVAL;
+       uint32_t end_blk = (start_blk + length) - 1;
+
+       if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+               /* Figure out what the starting / ending block's summary bits are */
+               uint32_t start_bit;
+               uint32_t end_bit;
+               uint32_t current_bit;
+
+               err = hfs_get_summary_index (hfsmp, start_blk, &start_bit);
+               if (err) {
+                       goto release_err;
+               }
+               err = hfs_get_summary_index (hfsmp, end_blk, &end_bit);
+               if (err) {
+                       goto release_err;
+               }
+
+               if (ALLOC_DEBUG) {
+                       if (start_bit > end_bit) {
+                               panic ("HFS: start > end!, %d %d ", start_bit, end_bit);
+                       }
+               }
+               current_bit = start_bit;
+               while (current_bit <= end_bit) {
+                       err = hfs_set_summary (hfsmp, current_bit, 0); 
+                       current_bit++;
+               }
+       }
+
+release_err:
+       return err;
+}
+
+/*
+ * hfs_find_summary_free
+ * 
+ * Given a allocation block as input, returns an allocation block number as output as a 
+ * suggestion for where to start scanning the bitmap in order to find free blocks.  It will
+ * determine the vcbVBMIOsize of the input allocation block, convert that into a summary
+ * bit, then keep iterating over the summary bits in order to find the first free one.
+ * 
+ * Inputs:
+ *             hfsmp           - hfs mount
+ *             block           - starting allocation block
+ *             newblock        - output block as suggestion
+ * 
+ * Returns:
+ *             0 on success
+ *             ENOSPC if we could not find a free block 
+ */
+
+int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block,  uint32_t *newblock) {
+
+       int err = ENOSPC;
+       uint32_t bit_index = 0;
+       uint32_t maybe_has_blocks = 0;
+
+       if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+               uint32_t byte_index;
+               uint8_t curbyte;
+               uint8_t bit_in_byte;
+               uint32_t summary_cap;
+
+               /* 
+                * We generate a cap for the summary search because the summary table
+                * always represents a full summary of the bitmap FILE, which may
+                * be way more bits than are necessary for the actual filesystem 
+                * whose allocations are mapped by the bitmap.
+                * 
+                * Compute how much of hfs_summary_size is useable for the given number
+                * of allocation blocks eligible on this FS.
+                */
+               err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit - 1, &summary_cap);
+               if (err) {
+                       goto summary_exit;
+               }
+
+               /* Check the starting block first */
+               err = hfs_check_summary (hfsmp, block, &maybe_has_blocks);
+               if (err) {
+                       goto summary_exit;
+               }
+
+               if (maybe_has_blocks) {
+                       /* 
+                        * It looks like the initial start block could have something.  
+                        * Short-circuit and just use that.
+                        */
+                       *newblock = block;
+                       goto summary_exit;
+               }
+
+               /*
+                * OK, now we know that the first block was useless.  
+                * Get the starting summary bit, and find it in the array 
+                */
+               maybe_has_blocks = 0;
+               err = hfs_get_summary_index (hfsmp, block, &bit_index);
+               if (err) {
+                       goto summary_exit;
+               }
+
+               /* Iterate until we find something. */
+               while (bit_index <= summary_cap) {
+                       byte_index = bit_index / kBitsPerByte;
+                       curbyte = hfsmp->hfs_summary_table[byte_index];
+                       bit_in_byte = bit_index % kBitsPerByte;
+
+                       if (curbyte & (1 << bit_in_byte)) {
+                               /* nothing here.  increment and move on */
+                               bit_index++;
+                       }
+                       else {
+                               /* 
+                                * found something! convert bit_index back into 
+                                * an allocation block for use. 'newblock' will now
+                                * contain the proper allocation block # based on the bit
+                                * index.
+                                */
+                               err = hfs_get_summary_allocblock (hfsmp, bit_index, newblock);  
+                               if (err) {
+                                       goto summary_exit;
+                               }
+                               maybe_has_blocks = 1;
+                               break;
+                       }
+               }
+
+               /* If our loop didn't find anything, set err to ENOSPC */
+               if (maybe_has_blocks == 0) {
+                       err = ENOSPC;
+               }
+       }
+
+       /* If the summary table is not active for this mount, we'll just return ENOSPC */
+summary_exit:
+       if (maybe_has_blocks) {
+               err = 0;
+       }
+
+       return err;
+}
+
+/*
+ * hfs_get_summary_allocblock
+ * 
+ * Convert a summary bit into an allocation block number to use to start searching for free blocks.
+ * 
+ * Inputs:
+ *             hfsmp                   - hfs mount
+ *             summarybit              - summmary bit index 
+ *             *alloc                  - allocation block number in the bitmap file.
+ *
+ * Output:
+ *             0 on success
+ *             EINVAL on failure
+ */
+int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t
+               summarybit, uint32_t *alloc) {
+       uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
+       uint32_t allocblk;
+
+       allocblk = summarybit * bits_per_iosize;
+
+       if (allocblk >= hfsmp->totalBlocks) {
+               return EINVAL;
+       }
+       else {
+               *alloc = allocblk;
+       }
+
+       return 0;
+}
+
+
+/*
+ * hfs_set_summary:
+ * 
+ * This function should be used to manipulate the summary table 
+ *
+ * The argument 'inuse' will set the value of the bit in question to one or zero
+ * depending on its value.
+ *
+ * Inputs:
+ *             hfsmp           - hfs mount
+ *             summarybit      - the bit index into the summary table to set/unset.
+ *             inuse           - the value to assign to the bit.
+ *
+ * Returns:
+ *             0 on success
+ *             EINVAL on error
+ *     
+ */
+
+static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse) {
+
+       int err = EINVAL;
+       if (hfsmp->vcbVBMIOSize) {
+               if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {     
+
+                       if (ALLOC_DEBUG) {
+                               if (hfsmp->hfs_summary_table == NULL) {
+                                       panic ("hfs_set_summary: no table for %p ", hfsmp);
+                               }
+                       }
+
+                       /* Ok, now that we have the bit index into the array, what byte is it in ? */
+                       uint32_t byte_index = summarybit / kBitsPerByte;
+                       uint8_t current_byte = hfsmp->hfs_summary_table[byte_index];
+                       uint8_t bit_in_byte = summarybit % kBitsPerByte;
+
+                       if (inuse) {
+                               current_byte = (current_byte | (1 << bit_in_byte));
+                       }
+                       else {
+                               current_byte = (current_byte & ~(1 << bit_in_byte));
+                       }
+
+                       hfsmp->hfs_summary_table[byte_index] = current_byte;
+               }
+               err = 0;
+       }
+
+       return err;
+}
+
+
+/*
+ * hfs_get_summary_index:
+ *
+ * This is a helper function which determines what summary bit represents the vcbVBMIOSize worth
+ * of IO against the bitmap file.
+ * 
+ * Returns:
+ *             0 on success
+ *             EINVAL on failure
+ */
+static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t* index) {
+       uint32_t summary_bit;
+       uint32_t bits_per_iosize;
+       int err = EINVAL;
+
+       if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+               /* Is the input block bigger than the total number of blocks? */
+               if (block >= hfsmp->totalBlocks) {
+                       return EINVAL;
+               }
+
+               /* Is there even a vbmIOSize set? */
+               if (hfsmp->vcbVBMIOSize == 0) {
+                       return EINVAL;
+               }
+
+               bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
+
+               summary_bit = block / bits_per_iosize;
+
+               *index = summary_bit;
+               err = 0;
+       }
+
+       return err;
+}
+
+/*
+ * hfs_init_summary
+ * 
+ * From a given mount structure, compute how big the summary table should be for the given
+ * filesystem, then allocate and bzero the memory.
+ *
+ * Returns:
+ * 0 on success
+ * EINVAL on failure
+ */
+int
+hfs_init_summary (struct hfsmount *hfsmp) {
+
+       uint32_t summary_size;  
+       uint32_t summary_size_bytes;
+       uint8_t *summary_table;
+
+       if (hfsmp->hfs_allocation_cp == NULL) {
+               if (ALLOC_DEBUG) {
+                       printf("hfs: summary table cannot progress without a bitmap cnode! \n");
+               }
+               return EINVAL;
+       }
+       /* 
+        * The practical maximum size of the summary table is 16KB:  
+        *
+        *              (512MB maximum bitmap size / (4k -- min alloc block size)) / 8 bits/byte.
+        * 
+        * HFS+ will allow filesystems with allocation block sizes smaller than 4k, but
+        * the end result is that we'll start to issue I/O in 2k or 1k sized chunks, which makes
+        * supporting this much worse.  The math would instead look like this:
+        * (512MB / 2k) / 8 == 32k. 
+        * 
+        * So, we will disallow the summary table if the allocation block size is < 4k.
+        */
+
+       if (hfsmp->blockSize < HFS_MIN_SUMMARY_BLOCKSIZE) {
+               printf("hfs: summary table not allowed on FS with block size of %d\n", hfsmp->blockSize);
+               return EINVAL;
+       }
+
+       summary_size = hfsmp->hfs_allocation_cp->c_blocks;
+
+       if (ALLOC_DEBUG) {
+               printf("HFS Summary Table Initialization: Bitmap %u blocks\n", 
+                               hfsmp->hfs_allocation_cp->c_blocks);
+       }
+
+       /*
+        * If the bitmap IO size is not the same as the allocation block size then
+        * then re-compute the number of summary bits necessary.  Note that above, the 
+        * the default size is the number of allocation blocks in the bitmap *FILE* 
+        * (not the number of bits in the bitmap itself).  If the allocation block size
+        * is large enough though, we may need to increase this. 
+        */
+       if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
+               uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
+               lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
+
+               /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
+               summary_size = (uint32_t) lrg_size;
+       }
+
+       /* 
+        * If the block size is the same as the IO Size, then the total number of blocks
+        * is already equal to the number of IO units, which is our number of summary bits.
+        */
+
+       summary_size_bytes = summary_size / kBitsPerByte;
+       /* Always add one byte, just in case we have a dangling number of bits */
+       summary_size_bytes++;
+
+       if (ALLOC_DEBUG) {
+               printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, summary_size); 
+               printf("HFS Summary Table Size (in bytes) %d \n", summary_size_bytes); 
+       }
+
+       /* Store the field in the mount point, and then MALLOC/bzero the memory */
+       hfsmp->hfs_summary_size = summary_size;
+       hfsmp->hfs_summary_bytes = summary_size_bytes;
+
+       MALLOC (summary_table, uint8_t*, summary_size_bytes, M_TEMP, M_WAITOK); 
+       if (summary_table == NULL) {
+               return ENOMEM;
+       }
+       bzero (summary_table, summary_size_bytes);
+
+       /* enable the summary table */
+       hfsmp->hfs_flags |= HFS_SUMMARY_TABLE;
+       hfsmp->hfs_summary_table = summary_table;
+
+       if (ALLOC_DEBUG) {
+               if (hfsmp->hfs_summary_table == NULL) {
+                       panic ("HFS Summary Init: no table for %p\n", hfsmp);
+               }
+       }
+       return 0;
+}
+
+/*
+ * hfs_rebuild_summary
+ *
+ * This function should be used to allocate a new hunk of memory for use as a summary
+ * table, then copy the existing data into it.  We use it whenever the filesystem's size
+ * changes.  When a resize is in progress, you can still use the extant summary
+ * table if it is active.
+ * 
+ * Inputs:
+ *             hfsmp           -- FS in question
+ *             newlength       -- new length of the FS in allocation blocks.
+ *
+ * Outputs: 
+ *             0 on success, EINVAL on failure.  If this function fails,  the summary table
+ *             will be disabled for future use.
+ *
+ */
+static int hfs_rebuild_summary (struct hfsmount *hfsmp) {
+
+       uint32_t new_summary_size;
+
+       new_summary_size = hfsmp->hfs_allocation_cp->c_blocks;
+
+
+       if (ALLOC_DEBUG) {
+               printf("HFS Summary Table Re-init: bitmap %u blocks\n", new_summary_size);
+       }
+
+       /* 
+        * If the bitmap IO size is not the same as the allocation block size, then re-compute
+        * the number of summary bits necessary.  Note that above, the default size is the number
+        * of allocation blocks in the bitmap *FILE* (not the number of bits that the bitmap manages).
+        * If the allocation block size is large enough though, we may need to increase this, as 
+        * bitmap IO is capped at 64k per IO
+        */
+       if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
+               uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
+               lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
+
+               /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
+               new_summary_size = (uint32_t) lrg_size;
+       }
+
+       /* 
+        * Ok, we have the new summary bitmap theoretical max size.  See if it's the same as 
+        * what we've got already...
+        */
+       if (new_summary_size != hfsmp->hfs_summary_size) {
+               uint32_t summarybytes = new_summary_size / kBitsPerByte;
+               uint32_t copysize;
+               uint8_t *newtable;
+               /* Add one byte for slop */
+               summarybytes++;
+
+               if (ALLOC_DEBUG) {
+                       printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, new_summary_size);
+                       printf("HFS Summary Table Size (in bytes) %d \n", summarybytes);
+               }
+
+               /* Attempt to MALLOC the memory */
+               MALLOC (newtable, uint8_t*, summarybytes, M_TEMP, M_WAITOK);
+               if (newtable == NULL) {
+                       /* 
+                        * ERROR!  We need to disable the table now 
+                        */
+                       FREE (hfsmp->hfs_summary_table, M_TEMP);
+                       hfsmp->hfs_summary_table = NULL;
+                       hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE; 
+                       return EINVAL;
+               }
+               bzero (newtable, summarybytes);
+
+               /* 
+                * The new table may be smaller than the old one. If this is true, then
+                * we can't copy the full size of the existing summary table into the new
+                * one. 
+                * 
+                * The converse is not an issue since we bzeroed the table above. 
+                */ 
+               copysize = hfsmp->hfs_summary_bytes;
+               if (summarybytes < hfsmp->hfs_summary_bytes) {  
+                       copysize = summarybytes;
+               }
+               memcpy (newtable, hfsmp->hfs_summary_table, copysize); 
+
+               /* We're all good.  Destroy the old copy and update ptrs */
+               FREE (hfsmp->hfs_summary_table, M_TEMP);
+
+               hfsmp->hfs_summary_table = newtable;
+               hfsmp->hfs_summary_size = new_summary_size;     
+               hfsmp->hfs_summary_bytes = summarybytes;
+       }
+
+       return 0;
+}
+
+
+#if ALLOC_DEBUG
+/* 
+ * hfs_validate_summary
+ * 
+ * Validation routine for the summary table.  Debug-only function.
+ * 
+ * Bitmap lock must be held.
+ *
+ */
+void hfs_validate_summary (struct hfsmount *hfsmp) {
+       uint32_t i;
+       int err;
+
+       /* 
+        * Iterate over all of the bits in the summary table, and verify if 
+        * there really are free blocks in the pages that we believe may
+        * may contain free blocks.
+        */
+
+       if (hfsmp->hfs_summary_table == NULL) {
+               panic ("HFS Summary: No HFS summary table!");
+       }       
+
+       /* 131072 bits == 16384 bytes.  This is the theoretical max size of the summary table. we add 1 byte for slop */
+       if (hfsmp->hfs_summary_size == 0 || hfsmp->hfs_summary_size > 131080) {
+               panic("HFS Summary: Size is bad! %d", hfsmp->hfs_summary_size);
+       }
+
+       if (hfsmp->vcbVBMIOSize == 0) {
+               panic("HFS Summary: no VCB VBM IO Size !");
+       }
+
+       printf("hfs: summary validation beginning on %s\n", hfsmp->vcbVN);
+       printf("hfs: summary validation %d summary bits, %d summary blocks\n", hfsmp->hfs_summary_size, hfsmp->totalBlocks);
+
+
+       /* iterate through all possible summary bits */
+       for (i = 0; i < hfsmp->hfs_summary_size ; i++) {
+
+               uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
+               uint32_t byte_offset = hfsmp->vcbVBMIOSize * i;
+
+               /* Compute the corresponding allocation block for the summary bit. */
+               uint32_t alloc_block = i * bits_per_iosize;
+
+               /* 
+                * We use a uint32_t pointer here because it will speed up 
+                * access to the real bitmap data on disk. 
+                */
+               uint32_t *block_data;
+               struct buf *bp;
+               int counter;
+               int counter_max;
+               int saw_free_bits = 0;
+
+               /* Get the block */
+               if ((err = ReadBitmapRange (hfsmp, byte_offset, hfsmp->vcbVBMIOSize, &block_data,  &bp))) {
+                       panic ("HFS Summary: error (%d) in ReadBitmapRange!", err);
+               }
+
+               /* Query the status of the bit and then make sure we match */
+               uint32_t maybe_has_free_blocks;
+               err = hfs_check_summary (hfsmp, alloc_block, &maybe_has_free_blocks);
+               if (err) {
+                       panic ("HFS Summary: hfs_check_summary returned error (%d) ", err);
+               }
+               counter_max = hfsmp->vcbVBMIOSize / kBytesPerWord;
+
+               for (counter = 0; counter < counter_max; counter++) {
+                       uint32_t word = block_data[counter];
+
+                       /* We assume that we'll not find any free bits here. */
+                       if (word != kAllBitsSetInWord) {
+                               if (maybe_has_free_blocks) {
+                                       /* All done */
+                                       saw_free_bits = 1;
+                                       break;
+                               }
+                               else {
+                                       panic ("HFS Summary: hfs_check_summary saw free bits!");
+                               }
+                       }
+               }
+
+               if (maybe_has_free_blocks && (saw_free_bits == 0)) {
+                       panic ("HFS Summary: did not see free bits !"); 
+               }
+
+               /* Release the block. */
+               if ((err =  ReleaseScanBitmapRange (bp))) {
+                       panic ("HFS Summary: Error (%d) in ReleaseScanBitmapRange", err);
+               }
+       }
+
+       printf("hfs: summary validation completed successfully on %s\n", hfsmp->vcbVN);
+
+       return;
+}
+#endif
+
+/*
+ * hfs_alloc_scan_range:
+ *
+ * This function should be used to scan large ranges of the allocation bitmap
+ * at one time.  It makes two key assumptions:
+ * 
+ *             1) Bitmap lock is held during the duration of the call (exclusive)
+ *             2) There are no pages in the buffer cache for any of the bitmap 
+ *             blocks that we may encounter.  It *MUST* be completely empty.
+ * 
+ * The expected use case is when we are scanning the bitmap in full while we are 
+ * still mounting the filesystem in order to issue TRIMs or build up the summary 
+ * table for the mount point. It should be done after any potential journal replays
+ * are completed and their I/Os fully issued.
+ * 
+ * The key reason for assumption (2) above is that this function will try to issue 
+ * I/O against the bitmap file in chunks as large a possible -- essentially as 
+ * much as the buffer layer will handle (1MB).  Because the size of these I/Os 
+ * is larger than what would be expected during normal runtime we must invalidate 
+ * the buffers as soon as we are done with them so that they do not persist in 
+ * the buffer cache for other threads to find, as they'll typically be doing 
+ * allocation-block size I/Os instead.
+ * 
+ * Input Args:
+ *             hfsmp           - hfs mount data structure
+ *             startbit        - allocation block # to start our scan. It must be aligned
+ *                                     on a vcbVBMIOsize boundary.
+ *             list            - journal trim list data structure for issuing TRIMs
+ *
+ * Output Args:
+ *             bitToScan       - Return the next bit to scan if this function is called again. 
+ *                                     Caller will supply this into the next invocation
+ *                                     of this call as 'startbit'.     
+ */
+
+static int hfs_alloc_scan_range(struct hfsmount *hfsmp, u_int32_t startbit, 
+               u_int32_t *bitToScan, struct jnl_trim_list *list) {
+
+       int error;
+       int readwrite = 1;
+       u_int32_t curAllocBlock;
+       struct buf *blockRef = NULL;
+       u_int32_t *buffer = NULL;
+       u_int32_t free_offset = 0; //tracks the start of the current free range
+       u_int32_t size = 0; // tracks the length of the current free range.
+       u_int32_t iosize = 0; //how much io we should generate against the bitmap
+       u_int32_t byte_off; // byte offset into the bitmap file.
+       u_int32_t completed_size; // how much io was actually completed
+       u_int32_t last_bitmap_block;
+       u_int32_t current_word; 
+       u_int32_t word_index = 0;       
+
+       /* summary table building */
+       uint32_t summary_bit = 0;
+       uint32_t saw_free_blocks = 0;
+       uint32_t last_marked = 0;
+
+       if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+               readwrite = 0;
+       }
+
+       /* 
+        * Compute how much I/O we should generate here.
+        * hfs_scan_range_size will validate that the start bit 
+        * converted into a byte offset into the bitmap file,
+        * is aligned on a VBMIOSize boundary. 
+        */
+       error = hfs_scan_range_size (hfsmp, startbit, &iosize);
+       if (error) {
+               if (ALLOC_DEBUG) {
+                       panic ("hfs_alloc_scan_range: hfs_scan_range_size error %d\n", error);
+               }
+               return error;
+       }
+
+       if (iosize < hfsmp->vcbVBMIOSize) {
+               if (ALLOC_DEBUG) {
+                       panic ("hfs_alloc_scan_range: iosize too small! (iosize %d)\n", iosize);
+               }
+               return EINVAL;
+       }
+
+       /* hfs_scan_range_size should have verified startbit.  Convert it to bytes */
+       byte_off = startbit / kBitsPerByte;
+
+       /*
+        * When the journal replays blocks, it does so by writing directly to the disk
+        * device (bypassing any filesystem vnodes and such).  When it finishes its I/Os
+        * it also immediately re-reads and invalidates the range covered by the bp so
+        * it does not leave anything lingering in the cache (for iosize reasons).  
+        * 
+        * As such, it is safe to do large I/Os here with ReadBitmapRange. 
+        *
+        * NOTE: It is not recommended, but it is possible to call the function below
+        * on sections of the bitmap that may be in core already as long as the pages are not
+        * dirty.  In that case, we'd notice that something starting at that
+        * logical block of the bitmap exists in the metadata cache, and we'd check 
+        * if the iosize requested is the same as what was already allocated for it.  
+        * Odds are pretty good we're going to request something larger.  In that case, 
+        * we just free the existing memory associated with the buf and reallocate a 
+        * larger range. This function should immediately invalidate it as soon as we're 
+        * done scanning, so this shouldn't cause any coherency issues.
+        */
+
+       error = ReadBitmapRange(hfsmp, byte_off, iosize, &buffer, &blockRef);
+       if (error) {
+               if (ALLOC_DEBUG) {
+                       panic ("hfs_alloc_scan_range: start %d iosize %d ReadBitmapRange error %d\n", startbit, iosize, error);
+               }
+               return error;
+       }
+
+       /* 
+        * At this point, we have a giant wired buffer that represents some portion of
+        * the bitmap file that we want to analyze.   We may not have gotten all 'iosize'
+        * bytes though, so clip our ending bit to what we actually read in.
+        */
+       completed_size = buf_count(blockRef);
+       last_bitmap_block = completed_size * kBitsPerByte;
+       last_bitmap_block = last_bitmap_block + startbit;
+
+       /* Cap the last block to the total number of blocks if required */
+       if (last_bitmap_block > hfsmp->totalBlocks) {
+               last_bitmap_block = hfsmp->totalBlocks;
+       }       
+
+       /* curAllocBlock represents the logical block we're analyzing. */
+       curAllocBlock = startbit;       
+       word_index = 0;
+       size = 0;
+
+       if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+               if (hfs_get_summary_index (hfsmp, startbit, &summary_bit)) {
+                       error = EINVAL;
+                       if (ALLOC_DEBUG) {
+                               panic ("hfs_alloc_scan_range: Could not acquire summary index for %u", startbit);
+                       }
+                       return error;
+               }
+               /* 
+                * summary_bit should now be set to the summary bit corresponding to
+                * the allocation block of the first bit that we're supposed to scan
+                */ 
+       }
+       saw_free_blocks = 0;
+
+       while (curAllocBlock < last_bitmap_block) {
+               u_int32_t bit;
+
+               /* Update the summary table as needed */
+               if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+                       if (ALLOC_DEBUG) {
+                               if (hfsmp->hfs_summary_table == NULL) {
+                                       panic ("hfs_alloc_scan_range: no summary table!");
+                               }
+                       }       
+
+                       uint32_t temp_summary;
+                       error = hfs_get_summary_index (hfsmp, curAllocBlock, &temp_summary);
+                       if (error) {
+                               if (ALLOC_DEBUG) {
+                                       panic ("hfs_alloc_scan_range: could not get summary index for %u", curAllocBlock);
+                               }
+                               return EINVAL;
+                       }
+
+                       if (ALLOC_DEBUG) {
+                               if (temp_summary < summary_bit) {
+                                       panic ("hfs_alloc_scan_range: backwards summary bit?\n");
+                               }
+                       }
+
+                       /* 
+                        * If temp_summary is greater than summary_bit, then this
+                        * means that the next allocation block crosses a vcbVBMIOSize boundary
+                        * and we should treat this range of on-disk data as part of a new summary
+                        * bit.
+                        */ 
+                       if (temp_summary > summary_bit) {
+                               if (saw_free_blocks == 0) {
+                                       /* Mark the bit as totally consumed in the summary table */
+                                       hfs_set_summary (hfsmp, summary_bit, 1);
+                               }
+                               else {
+                                       /* Mark the bit as potentially free in summary table */
+                                       hfs_set_summary (hfsmp, summary_bit, 0);
+                               }
+                               last_marked = summary_bit;
+                               /* 
+                                * Any time we set the summary table, update our counter which tracks
+                                * what the last bit that was fully marked in the summary table. 
+                                *  
+                                * Then reset our marker which says we haven't seen a free bit yet.
+                                */
+                               saw_free_blocks = 0;
+                               summary_bit = temp_summary;
+                       }
+               } /* End summary table conditions */
+
+               current_word = SWAP_BE32(buffer[word_index]);
+               /* Iterate through the word 1 bit at a time... */
+               for (bit = 0 ; bit < kBitsPerWord ; bit++, curAllocBlock++) {
+                       if (curAllocBlock >= last_bitmap_block) {
+                               break;
+                       }
+                       u_int32_t allocated = (current_word & (kHighBitInWordMask >> bit));
 
-                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
-                       if (err != noErr) goto Exit;
+                       if (allocated) { 
+                               if (size != 0) {
+                                       if (readwrite) {
+                                               /* Insert the previously tracked range of free blocks to the trim list */
+                                               hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
+                                       }
+                                       add_free_extent_cache (hfsmp, free_offset, size);
+                                       size = 0;
+                                       free_offset = 0;
+                               }
+                       }
+                       else {
+                               /* Not allocated */
+                               size++;
+                               if (free_offset == 0) {
+                                       /* Start a new run of free spcae at curAllocBlock */
+                                       free_offset = curAllocBlock;
+                               }
+                               if (saw_free_blocks == 0) {
+                                       saw_free_blocks = 1;
+                               }
+                       }
+               } /* end for loop iterating through the word */
 
-                       // XXXdbg
-                       if (hfsmp->jnl) {
-                               journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+               if (curAllocBlock < last_bitmap_block) {
+                       word_index++;
+               }
+
+       } /* End while loop (iterates through last_bitmap_block) */
+
+
+       /* 
+        * We've (potentially) completed our pass through this region of bitmap, 
+        * but one thing we may not have done is updated that last summary bit for 
+        * the last page we scanned, because we would have never transitioned across 
+        * a vcbVBMIOSize boundary again.  Check for that and update the last bit
+        * as needed.
+        * 
+        * Note that 'last_bitmap_block' is *not* inclusive WRT the very last bit in the bitmap
+        * for the region of bitmap on-disk that we were scanning. (it is one greater).
+        */
+       if ((curAllocBlock >= last_bitmap_block) && 
+                       (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) { 
+               uint32_t temp_summary;
+               /* temp_block should be INSIDE the region we just scanned, so subtract 1 */
+               uint32_t temp_block = last_bitmap_block - 1;
+               error = hfs_get_summary_index (hfsmp, temp_block, &temp_summary);
+               if (error) {
+                       if (ALLOC_DEBUG) {
+                               panic ("hfs_alloc_scan_range: end bit curAllocBlock %u, last_bitmap_block %u", curAllocBlock, last_bitmap_block);
                        }
+                       return EINVAL;
+               }
 
-                       //      Readjust currentWord and wordsLeft
-                       currentWord = buffer;
-                       wordsLeft = wordsPerBlock;
+               /* Did we already update this in the table? */
+               if (temp_summary > last_marked) {
+                       if (saw_free_blocks == 0) {
+                               hfs_set_summary (hfsmp, temp_summary, 1);
+                       }
+                       else {
+                               hfs_set_summary (hfsmp, temp_summary, 0);
+                       }
                }
-#if DEBUG_BUILD
-               if (*currentWord != 0) {
-                       panic("BlockMarkAllocated: blocks already allocated!");
+       }
+
+       /* 
+        * We may have been tracking a range of free blocks that hasn't been inserted yet. 
+        * Keep the logic for the TRIM and free extent separate from that of the summary 
+        * table management even though they are closely linked.
+        */
+       if (size != 0) {
+               if (readwrite) {
+                       hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
                }
-#endif
-               *currentWord = SWAP_BE32 (bitMask);
-               numBlocks -= kBitsPerWord;
+               add_free_extent_cache (hfsmp, free_offset, size);
+       }
 
-               ++currentWord;                                                          //      move to next word
-               --wordsLeft;                                                            //      one less word left in this block
+       /* 
+        * curAllocBlock represents the next block we need to scan when we return
+        * to this function. 
+        */
+       *bitToScan = curAllocBlock;
+       ReleaseScanBitmapRange(blockRef);
+
+       return 0;
+
+}
+
+
+
+/*
+ * Compute the maximum I/O size to generate against the bitmap file
+ * Will attempt to generate at LEAST VBMIOsize I/Os for interior ranges of the bitmap. 
+ * 
+ * Inputs:
+ *             hfsmp           -- hfsmount to look at 
+ *             bitmap_off      -- bit offset into the bitmap file
+ *     
+ * Outputs:
+ *             iosize  -- iosize to generate.
+ *
+ * Returns:
+ *             0 on success; EINVAL otherwise 
+ */
+static int hfs_scan_range_size (struct hfsmount *hfsmp, uint32_t bitmap_st, uint32_t *iosize) {
+
+       /* 
+        * The maximum bitmap size is 512MB regardless of ABN size, so we can get away
+        * with 32 bit math in this function.
+        */
+
+       uint32_t bitmap_len;
+       uint32_t remaining_bitmap;
+       uint32_t target_iosize;
+       uint32_t bitmap_off; 
+
+       /* Is this bit index not word aligned?  If so, immediately fail. */
+       if (bitmap_st % kBitsPerWord) {
+               if (ALLOC_DEBUG) {
+                       panic ("hfs_scan_range_size unaligned start bit! bitmap_st %d \n", bitmap_st);
+               }
+               return EINVAL;
        }
-       
-       //
-       //      Allocate any remaining blocks.
-       //
-       
-       if (numBlocks != 0) {
-               bitMask = ~(kAllBitsSetInWord >> numBlocks);    //      set first numBlocks bits
-               if (wordsLeft == 0) {
-                       //      Read in the next bitmap block
-                       startingBlock += bitsPerBlock;                          //      generate a block number in the next bitmap block
-                       
-                       buffer = NULL;
-                       err = ReleaseBitmapBlock(vcb, blockRef, true);
-                       if (err != noErr) goto Exit;
 
-                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
-                       if (err != noErr) goto Exit;
+       /* bitmap_off is in bytes, not allocation blocks/bits */
+       bitmap_off = bitmap_st / kBitsPerByte;
 
-                       // XXXdbg
-                       if (hfsmp->jnl) {
-                               journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+       if ((hfsmp->totalBlocks <= bitmap_st) || (bitmap_off > (512 * 1024 * 1024))) {
+               if (ALLOC_DEBUG) {
+                       panic ("hfs_scan_range_size: invalid start! bitmap_st %d, bitmap_off %d\n", bitmap_st, bitmap_off);
+               }
+               return EINVAL;
+       }
+
+       /* 
+        * Also invalid if it's not at least aligned to HFS bitmap logical
+        * block boundaries.  We don't have to emit an iosize that's an 
+        * exact multiple of the VBMIOSize, but it must start on such 
+        * a boundary.
+        *
+        * The vcbVBMIOSize may be SMALLER than the allocation block size
+        * on a FS with giant allocation blocks, but it will never be
+        * greater than it, so it should be safe to start I/O
+        * aligned on a VBMIOsize boundary. 
+        */
+       if (bitmap_off & (hfsmp->vcbVBMIOSize - 1)) {
+               if (ALLOC_DEBUG) {
+                       panic ("hfs_scan_range_size: unaligned start! bitmap_off %d\n", bitmap_off);
+               }
+               return EINVAL;
+       }
+
+       /* 
+        * Generate the total bitmap file length in bytes, then round up
+        * that value to the end of the last allocation block, if needed (It 
+        * will probably be needed).  We won't scan past the last actual 
+        * allocation block.  
+        *
+        * Unless we're completing the bitmap scan (or bitmap < 1MB), we
+        * have to complete the I/O on VBMIOSize boundaries, but we can only read
+        * up until the end of the bitmap file.
+        */
+       bitmap_len = roundup(hfsmp->totalBlocks, hfsmp->blockSize * 8) / 8;
+
+       remaining_bitmap = bitmap_len - bitmap_off;
+
+       /* 
+        * io size is the MIN of the maximum I/O we can generate or the
+        * remaining amount of bitmap.
+        */
+       target_iosize = MIN((MAXBSIZE), remaining_bitmap);
+       *iosize = target_iosize;
+
+       return 0;
+}
+
+
+
+
+/*
+ * This function is basically the same as hfs_isallocated, except it's designed for 
+ * use with the red-black tree validation code.  It assumes we're only checking whether
+ * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
+ * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
+ *
+ * This should not be called in general purpose scanning code.
+ */
+int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
+
+       u_int32_t  *currentWord;   // Pointer to current word within bitmap block
+       u_int32_t  bitMask;        // Word with given bits already set (ready to test)
+       u_int32_t  firstBit;       // Bit index within word of first bit to allocate
+       u_int32_t  numBits;        // Number of bits in word to allocate
+       u_int32_t  bitsPerBlock;
+       uintptr_t  blockRef = 0;
+       u_int32_t  wordsPerBlock;
+       u_int32_t  numBlocks = 1;
+       u_int32_t  *buffer = NULL;
+
+       int  inuse = 0;
+       int error;
+
+
+       if (bp_buf) {
+               /* just use passed-in buffer if avail. */
+               buffer = bp_buf;
+       }
+       else {
+               /*
+                * Pre-read the bitmap block containing the first word of allocation
+                */
+               error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
+                                                               HFS_ALLOC_IGNORE_TENTATIVE);
+               if (error)
+                       return (error);
+       }
+
+       /*
+        * Initialize currentWord, and wordsLeft.
+        */
+       u_int32_t wordIndexInBlock;
+
+       bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
+       wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
+
+       wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+       currentWord = buffer + wordIndexInBlock;
+
+       /*
+        * First test any non word aligned bits.
+        */
+       firstBit = startingBlock % kBitsPerWord;
+       bitMask = kAllBitsSetInWord >> firstBit;
+       numBits = kBitsPerWord - firstBit;
+       if (numBits > numBlocks) {
+               numBits = numBlocks;
+               bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
+       }
+       if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+               inuse = 1;
+               goto Exit;
+       }
+       numBlocks -= numBits;
+       ++currentWord;
+
+Exit:
+       if(bp_buf == NULL) {
+               if (buffer) {
+                       (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
+               }
+       }
+       return (inuse);
+
+
+
+}
+
+/*
+ * This function resets all of the data structures relevant to the
+ * free extent cache stored in the hfsmount struct.  
+ * 
+ * If we are using the red-black tree code then we need to account for the fact that 
+ * we may encounter situations where we need to jettison the tree.  If that is the 
+ * case, then we fail-over to the bitmap scanning logic, but we need to ensure that 
+ * the free ext cache is zeroed before we start using it.  
+ *
+ * We also reset and disable the cache when allocLimit is updated... which 
+ * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()). 
+ * It is independent of the type of allocator being used currently.
+ */
+void ResetVCBFreeExtCache(struct hfsmount *hfsmp) 
+{
+       int bytes;
+       void *freeExt;
+
+       if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0);
+
+       lck_spin_lock(&hfsmp->vcbFreeExtLock);
+
+       /* reset Free Extent Count */
+       hfsmp->vcbFreeExtCnt = 0;
+
+       /* reset the actual array */
+       bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor);
+       freeExt = (void*)(hfsmp->vcbFreeExt);
+
+       bzero (freeExt, bytes);
+
+       lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+
+       if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0);
+
+       return;
+}
+
+/*
+ * This function is used to inform the allocator if we have to effectively shrink
+ * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs. 
+ *
+ * The bitmap lock must be held when calling this function.  This function also modifies the
+ * allocLimit field in the hfs mount point structure in the general case. 
+ * 
+ * In the shrinking case, we'll have to remove all free extents from the red-black
+ * tree past the specified offset new_end_block.  In the growth case, we'll have to force
+ * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
+ * 
+ * new_end_block represents the total number of blocks available for allocation in the resized
+ * filesystem.  Block #new_end_block should not be allocatable in the resized filesystem since it
+ * will be out of the (0, n-1) range that are indexable in the bitmap.
+ *
+ * Returns     0 on success
+ *                     errno on failure
+ */
+__private_extern__
+u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
+
+       /* 
+        * Update allocLimit to the argument specified
+        */
+       hfsmp->allocLimit = new_end_block;
+
+       /* Invalidate the free extent cache completely so that 
+        * it does not have any extents beyond end of current 
+        * volume.
+        */
+       ResetVCBFreeExtCache(hfsmp);
+
+       /* Force a rebuild of the summary table. */
+       (void) hfs_rebuild_summary (hfsmp);
+
+       // Delete any tentative ranges that are in the area we're shrinking
+       struct rl_entry *range, *next_range;
+       TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
+                                          rl_link, next_range) {
+               if (rl_overlap(range, new_end_block, RL_INFINITY) != RL_NOOVERLAP)
+                       hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
+       }
+
+       return 0;
+}
+
+/*
+ * Remove an extent from the list of free extents.
+ *
+ * This is a low-level routine.         It does not handle overlaps or splitting;
+ * that is the responsibility of the caller.  The input extent must exactly
+ * match an extent already in the list; it will be removed, and any following
+ * extents in the list will be shifted up.
+ *
+ * Inputs:
+ *     startBlock - Start of extent to remove
+ *     blockCount - Number of blocks in extent to remove
+ *
+ * Result:
+ *     The index of the extent that was removed.
+ */
+static void remove_free_extent_list(struct hfsmount *hfsmp, int index)
+{
+       if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) {
+               if (ALLOC_DEBUG)
+                       panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
+               else
+                       printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
+               return;
+       }
+       int shift_count = hfsmp->vcbFreeExtCnt - index - 1;
+       if (shift_count > 0) {
+               memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
+       }
+       hfsmp->vcbFreeExtCnt--;
+}
+
+
+/*
+ * Add an extent to the list of free extents.
+ *
+ * This is a low-level routine.         It does not handle overlaps or coalescing;
+ * that is the responsibility of the caller.  This routine *does* make
+ * sure that the extent it is adding is inserted in the correct location.
+ * If the list is full, this routine will handle either removing the last
+ * extent in the list to make room for the new extent, or ignoring the
+ * new extent if it is "worse" than the last extent in the list.
+ *
+ * Inputs:
+ *     startBlock - Start of extent to add
+ *     blockCount - Number of blocks in extent to add
+ *
+ * Result:
+ *     The index where the extent that was inserted, or kMaxFreeExtents
+ *     if the extent was not inserted (the list was full, and the extent
+ *     being added was "worse" than everything in the list).
+ */
+static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
+{
+       uint32_t i;
+
+       /* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */
+       if (ALLOC_DEBUG) {
+               uint32_t endBlock = startBlock + blockCount;
+               for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
+                       if (endBlock < hfsmp->vcbFreeExt[i].startBlock ||
+                                       startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) {
+                               continue;
                        }
-                       
-                       //      Readjust currentWord and wordsLeft
-                       currentWord = buffer;
-                       wordsLeft = wordsPerBlock;
+                       panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d",
+                                       hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i);
                }
-#if DEBUG_BUILD
-               if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
-                       panic("BlockMarkAllocated: blocks already allocated!");
+       }        
+
+       /* Figure out what index the new extent should be inserted at. */
+       for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
+               if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+                       /* The list is sorted by increasing offset. */
+                       if (startBlock < hfsmp->vcbFreeExt[i].startBlock) {
+                               break;
+                       }
+               } else {
+                       /* The list is sorted by decreasing size. */
+                       if (blockCount > hfsmp->vcbFreeExt[i].blockCount) {
+                               break;
+                       }
+               }
+       }
+
+       /* When we get here, i is the index where the extent should be inserted. */
+       if (i == kMaxFreeExtents) {
+               /*
+                * The new extent is worse than anything already in the list,
+                * and the list is full, so just ignore the extent to be added.
+                */
+               return i;
+       }
+
+       /*
+        * Grow the list (if possible) to make room for an insert.
+        */
+       if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents)
+               hfsmp->vcbFreeExtCnt++;
+
+       /*
+        * If we'll be keeping any extents after the insert position, then shift them.
+        */
+       int shift_count = hfsmp->vcbFreeExtCnt - i - 1;
+       if (shift_count > 0) {
+               memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
+       }
+
+       /* Finally, store the new extent at its correct position. */
+       hfsmp->vcbFreeExt[i].startBlock = startBlock;
+       hfsmp->vcbFreeExt[i].blockCount = blockCount;
+       return i;
+}
+
+
+/*
+ * Remove an entry from free extent cache after it has been allocated.
+ *
+ * This is a high-level routine.  It handles removing a portion of a
+ * cached extent, potentially splitting it into two (if the cache was
+ * already full, throwing away the extent that would sort last).  It
+ * also handles removing an extent that overlaps multiple extents in
+ * the cache.
+ *
+ * Inputs: 
+ *     hfsmp           - mount point structure 
+ *     startBlock      - starting block of the extent to be removed. 
+ *     blockCount      - number of blocks of the extent to be removed.
+ */
+static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
+{
+       u_int32_t i, insertedIndex;
+       u_int32_t currentStart, currentEnd, endBlock;
+       int extentsRemoved = 0;
+
+       if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
+
+       endBlock = startBlock + blockCount;
+
+       lck_spin_lock(&hfsmp->vcbFreeExtLock);
+
+       /*
+        * Iterate over all of the extents in the free extent cache, removing or
+        * updating any entries that overlap with the input extent.
+        */
+       for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
+               currentStart = hfsmp->vcbFreeExt[i].startBlock;
+               currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount;
+
+               /*
+                * If the current extent is entirely before or entirely after the
+                * the extent to be removed, then we keep it as-is.
+                */
+               if (currentEnd <= startBlock || currentStart >= endBlock) {
+                       continue;
+               }
+
+               /*
+                * If the extent being removed entirely contains the current extent,
+                * then remove the current extent.
+                */
+               if (startBlock <= currentStart && endBlock >= currentEnd) {
+                       remove_free_extent_list(hfsmp, i);
+
+                       /*
+                        * We just removed the extent at index i.  The extent at
+                        * index i+1 just got shifted to index i.  So decrement i
+                        * to undo the loop's "++i", and the next iteration will
+                        * examine index i again, which contains the next extent
+                        * in the list.
+                        */
+                       --i;
+                       ++extentsRemoved;
+                       continue;
+               }
+
+               /*
+                * If the extent being removed is strictly "in the middle" of the
+                * current extent, then we need to split the current extent into
+                * two discontiguous extents (the "head" and "tail").  The good
+                * news is that we don't need to examine any other extents in
+                * the list.
+                */
+               if (startBlock > currentStart && endBlock < currentEnd) {
+                       remove_free_extent_list(hfsmp, i);
+                       add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
+                       add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
+                       break;
+               }
+
+               /*
+                * The only remaining possibility is that the extent to be removed
+                * overlaps the start or end (but not both!) of the current extent.
+                * So we need to replace the current extent with a shorter one.
+                *
+                * The only tricky part is that the updated extent might be at a
+                * different index than the original extent.  If the updated extent
+                * was inserted after the current extent, then we need to re-examine
+                * the entry at index i, since it now contains the extent that was
+                * previously at index i+1.      If the updated extent was inserted
+                * before or at the same index as the removed extent, then the
+                * following extents haven't changed position.
+                */
+               remove_free_extent_list(hfsmp, i);
+               if (startBlock > currentStart) {
+                       /* Remove the tail of the current extent. */
+                       insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
+               } else {
+                       /* Remove the head of the current extent. */
+                       insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
+               }
+               if (insertedIndex > i) {
+                       --i;    /* Undo the "++i" in the loop, so we examine the entry at index i again. */
+               }
+       }
+
+       lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+
+       sanity_check_free_ext(hfsmp, 0);
+
+       if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);
+
+       return;
+}
+
+
+/*
+ * Add an entry to free extent cache after it has been deallocated.     
+ *
+ * This is a high-level routine.  It will merge overlapping or contiguous
+ * extents into a single, larger extent.
+ *
+ * If the extent provided has blocks beyond current allocLimit, it is
+ * clipped to allocLimit (so that we won't accidentally find and allocate
+ * space beyond allocLimit).
+ *
+ * Inputs: 
+ *     hfsmp           - mount point structure 
+ *     startBlock      - starting block of the extent to be removed. 
+ *     blockCount      - number of blocks of the extent to be removed.
+ *
+ * Returns:
+ *     true            - if the extent was added successfully to the list
+ *     false           - if the extent was not added to the list, maybe because 
+ *                       the extent was beyond allocLimit, or is not best 
+ *                       candidate to be put in the cache.
+ */
+static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
+{
+       Boolean retval = false;
+       uint32_t endBlock;
+       uint32_t currentEnd;
+       uint32_t i; 
+
+       if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
+
+#if DEBUG
+       for (i = 0; i < 2; ++i) {
+               struct rl_entry *range;
+               TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
+                       assert(rl_overlap(range, startBlock,
+                                                         startBlock + blockCount - 1) == RL_NOOVERLAP);
                }
+       }
 #endif
-               *currentWord |= SWAP_BE32 (bitMask);                    //      set the bits in the bitmap
 
-               //      No need to update currentWord or wordsLeft
+       /* No need to add extent that is beyond current allocLimit */
+       if (startBlock >= hfsmp->allocLimit) {
+               goto out_not_locked;
+       }
+
+       /* If end of the free extent is beyond current allocLimit, clip the extent */
+       if ((startBlock + blockCount) > hfsmp->allocLimit) {
+               blockCount = hfsmp->allocLimit - startBlock;
+       }
+
+       lck_spin_lock(&hfsmp->vcbFreeExtLock);
+
+       /*
+        * Make a pass through the free extent cache, looking for known extents that
+        * overlap or are contiguous with the extent to be added.  We'll remove those
+        * extents from the cache, and incorporate them into the new extent to be added.
+        */
+       endBlock = startBlock + blockCount;
+       for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) {
+               currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount;
+               if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) {
+                       /* Extent i does not overlap and is not contiguous, so keep it. */
+                       continue;
+               } else {
+                       /* We need to remove extent i and combine it with the input extent. */
+                       if (hfsmp->vcbFreeExt[i].startBlock < startBlock)
+                               startBlock = hfsmp->vcbFreeExt[i].startBlock;
+                       if (currentEnd > endBlock)
+                               endBlock = currentEnd;
+
+                       remove_free_extent_list(hfsmp, i);
+                       /*
+                        * We just removed the extent at index i.  The extent at
+                        * index i+1 just got shifted to index i.  So decrement i
+                        * to undo the loop's "++i", and the next iteration will
+                        * examine index i again, which contains the next extent
+                        * in the list.
+                        */
+                       --i;
+               }
+       }
+       add_free_extent_list(hfsmp, startBlock, endBlock - startBlock);
+
+       lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+
+out_not_locked:
+       sanity_check_free_ext(hfsmp, 0);
+
+       if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+               KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);
+
+       return retval;
+}
+
+/* Debug function to check if the free extent cache is good or not */
+static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
+{
+       u_int32_t i, j;
+
+       /* Do not do anything if debug is not on */
+       if (ALLOC_DEBUG == 0) {
+               return;
        }
 
-Exit:
+       lck_spin_lock(&hfsmp->vcbFreeExtLock);
 
-       if (buffer)
-               (void)ReleaseBitmapBlock(vcb, blockRef, true);
+       if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents)
+               panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt);
 
-       return err;
-}
+       /* 
+        * Iterate the Free extent cache and ensure no entries are bogus or refer to
+        * allocated blocks.
+        */
+       for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
+               u_int32_t start, nblocks;
+
+               start   = hfsmp->vcbFreeExt[i].startBlock;
+               nblocks = hfsmp->vcbFreeExt[i].blockCount;
+
+               /* Check if any of the blocks in free extent cache are allocated.  
+                * This should not be enabled always because it might take 
+                * very long for large extents that get added to the list.
+                *
+                * We have to drop vcbFreeExtLock while we call hfs_isallocated
+                * because it is going to do I/O.  Note that the free extent
+                * cache could change.  That's a risk we take when using this
+                * debugging code.  (Another alternative would be to try to
+                * detect when the free extent cache changed, and perhaps
+                * restart if the list changed while we dropped the lock.)
+                */
+               if (check_allocated) {
+                       lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+                       if (hfs_isallocated(hfsmp, start, nblocks)) {
+                               panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
+                                               hfsmp, i, start, nblocks);
+                       }
+                       lck_spin_lock(&hfsmp->vcbFreeExtLock);
+               }
 
+               /* Check if any part of the extent is beyond allocLimit */
+               if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
+                       panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
+                                       hfsmp, i, start, nblocks, hfsmp->allocLimit);
+               }
 
-/*
-_______________________________________________________________________
+               /* Check if there are any duplicate start blocks */
+               for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
+                       if (start == hfsmp->vcbFreeExt[j].startBlock) {
+                               panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n", 
+                                               hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock, 
+                                               hfsmp->vcbFreeExt[j].blockCount);
+                       }
+               }
 
-Routine:       BlockMarkFree
+               /* Check if the entries are out of order */
+               if ((i+1) != hfsmp->vcbFreeExtCnt) {
+                       if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+                               /* sparse devices are sorted by starting block number (ascending) */
+                               if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) {
+                                       panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n", 
+                                                       hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, 
+                                                       hfsmp->vcbFreeExt[i+1].blockCount);
+                               }
+                       } else {
+                               /* normally sorted by block count (descending) */
+                               if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
+                                       panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n", 
+                                                       hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, 
+                                                       hfsmp->vcbFreeExt[i+1].blockCount);
+                               }
+                       }
+               }
+       }
+       lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+}
 
-Function:      Mark a contiguous group of blocks as free (clear in the
-                       bitmap).  It assumes those bits are currently marked
-                       allocated (set in the bitmap).
+#define BIT_RIGHT_MASK(bit)    (0xffffffffffffffffull >> (bit))
+#define kHighBitInDoubleWordMask 0x8000000000000000ull
 
-Inputs:
-       vcb                             Pointer to volume where space is to be freed
-       startingBlock   First block number to mark as freed
-       numBlocks               Number of blocks to mark as freed
-_______________________________________________________________________
-*/
-__private_extern__
-OSErr BlockMarkFree(
-       ExtendedVCB             *vcb,
-       UInt32                  startingBlock,
-       register UInt32 numBlocks)
+static int clzll(uint64_t x)
 {
-       OSErr                   err;
-       register UInt32 *currentWord;   //      Pointer to current word within bitmap block
-       register UInt32 wordsLeft;              //      Number of words left in this bitmap block
-       register UInt32 bitMask;                //      Word with given bits already set (ready to OR in)
-       UInt32                  firstBit;               //      Bit index within word of first bit to allocate
-       UInt32                  numBits;                //      Number of bits in word to allocate
-       UInt32                  *buffer = NULL;
-       UInt32  blockRef;
-       UInt32  bitsPerBlock;
-       UInt32  wordsPerBlock;
-    // XXXdbg
-       struct hfsmount *hfsmp = VCBTOHFS(vcb);
+       if (x == 0)
+               return 64;
+       else
+               return __builtin_clzll(x);
+}
 
-       if (startingBlock + numBlocks > vcb->totalBlocks) {
-           panic("hfs: block mark free: trying to free non-existent blocks (%d %d %d)\n",
-                 startingBlock, numBlocks, vcb->totalBlocks);
-       }
+#if !HFS_ALLOC_TEST
 
+static errno_t get_more_bits(bitmap_context_t *bitmap_ctx)
+{
+       uint32_t        start_bit;
+       uint32_t        iosize = 0;
+       uint32_t        byte_offset;
+       uint32_t        last_bitmap_block;
+       int                     error;
+       struct hfsmount *hfsmp = bitmap_ctx->hfsmp;
+#if !HFS_ALLOC_TEST
+       uint64_t        lock_elapsed;
+#endif
 
-       //
-       //      Pre-read the bitmap block containing the first word of allocation
-       //
 
-       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
-       if (err != noErr) goto Exit;
-       // XXXdbg
-       if (hfsmp->jnl) {
-               journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+       if (bitmap_ctx->bp)
+               ReleaseScanBitmapRange(bitmap_ctx->bp);
+       
+       if (msleep(NULL, NULL, PINOD | PCATCH,
+                          "hfs_fsinfo", NULL) == EINTR) {
+               return EINTR;
        }
 
-       //
-       //      Initialize currentWord, and wordsLeft.
-       //
-       {
-               UInt32 wordIndexInBlock;
+#if !HFS_ALLOC_TEST
+       /*
+        * Let someone else use the allocation map after we've processed over HFS_FSINFO_MAX_LOCKHELD_TIME .
+        * lock_start is initialized in hfs_find_free_extents().
+        */
+       absolutetime_to_nanoseconds(mach_absolute_time() - bitmap_ctx->lock_start, &lock_elapsed);
+
+       if (lock_elapsed >= HFS_FSINFO_MAX_LOCKHELD_TIME) {
+
+               hfs_systemfile_unlock(hfsmp, bitmap_ctx->lockflags);
                
-               bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
-               wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+               /* add tsleep here to force context switch and fairness */
+               tsleep((caddr_t)get_more_bits, PRIBIO, "hfs_fsinfo", 1);
 
-               wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
-               currentWord = buffer + wordIndexInBlock;
-               wordsLeft = wordsPerBlock - wordIndexInBlock;
-       }
-       
-       //
-       //      If the first block to free doesn't start on a word
-       //      boundary in the bitmap, then treat that first word
-       //      specially.
-       //
+               hfs_journal_lock(hfsmp);
 
-       firstBit = startingBlock % kBitsPerWord;
-       if (firstBit != 0) {
-               bitMask = kAllBitsSetInWord >> firstBit;        //      turn off all bits before firstBit
-               numBits = kBitsPerWord - firstBit;                      //      number of remaining bits in this word
-               if (numBits > numBlocks) {
-                       numBits = numBlocks;                                    //      entire allocation is inside this one word
-                       bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));        //      turn off bits after last
-               }
-               if ((*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
-                       goto Corruption;
+               /* Flush the journal and wait for all I/Os to finish up */
+               error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+               if (error) {
+                       hfs_journal_unlock(hfsmp);
+                       return error;
                }
-               *currentWord &= SWAP_BE32 (~bitMask);           //      clear the bits in the bitmap
-               numBlocks -= numBits;                                           //      adjust number of blocks left to free
 
-               ++currentWord;                                                          //      move to next word
-               --wordsLeft;                                                            //      one less word left in this block
-       }
+               /*
+                * Take bitmap lock to ensure it is not being modified while journal is still held.
+                * Since we are reading larger than normal blocks from the bitmap, which
+                * might confuse other parts of the bitmap code using normal blocks, we
+                * take exclusive lock here.
+                */
+               bitmap_ctx->lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
 
-       //
-       //      Free whole words (32 blocks) at a time.
-       //
+               bitmap_ctx->lock_start = mach_absolute_time();
 
-       while (numBlocks >= kBitsPerWord) {
-               if (wordsLeft == 0) {
-                       //      Read in the next bitmap block
-                       startingBlock += bitsPerBlock;                  //      generate a block number in the next bitmap block
-                       
-                       buffer = NULL;
-                       err = ReleaseBitmapBlock(vcb, blockRef, true);
-                       if (err != noErr) goto Exit;
+               /* Release the journal lock */
+               hfs_journal_unlock(hfsmp);
 
-                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
-                       if (err != noErr) goto Exit;
+               /*
+                * Bitmap is read in large block size (up to 1MB),
+                * unlike the runtime which reads the bitmap in the
+                * 4K block size.  If the bitmap is read by both ways
+                * at the same time, it can result in multiple buf_t with
+                * different sizes and potentially case data corruption.
+                * To avoid this, we invalidate all the existing buffers
+                * associated with the bitmap vnode.
+                */
+               error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
+               if (error) {
+                       /* hfs_systemfile_unlock will be called in the caller */
+                       return error;
+               }
+       }
+#endif
 
-                       // XXXdbg
-                       if (hfsmp->jnl) {
-                               journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
-                       }
+       start_bit = bitmap_ctx->run_offset;
 
-                       //      Readjust currentWord and wordsLeft
-                       currentWord = buffer;
-                       wordsLeft = wordsPerBlock;
-               }
-               if (*currentWord != SWAP_BE32 (kAllBitsSetInWord)) {
-                       goto Corruption;
-               }
-               *currentWord = 0;                                                       //      clear the entire word
-               numBlocks -= kBitsPerWord;
-               
-               ++currentWord;                                                          //      move to next word
-               --wordsLeft;                                                            //      one less word left in this block
+       if (start_bit >= bitmap_ctx->hfsmp->totalBlocks) {
+               bitmap_ctx->chunk_end = 0;
+               bitmap_ctx->bp = NULL;
+               bitmap_ctx->bitmap = NULL;
+               return 0;
        }
-       
-       //
-       //      Free any remaining blocks.
-       //
-       
-       if (numBlocks != 0) {
-               bitMask = ~(kAllBitsSetInWord >> numBlocks);    //      set first numBlocks bits
-               if (wordsLeft == 0) {
-                       //      Read in the next bitmap block
-                       startingBlock += bitsPerBlock;                          //      generate a block number in the next bitmap block
-                       
-                       buffer = NULL;
-                       err = ReleaseBitmapBlock(vcb, blockRef, true);
-                       if (err != noErr) goto Exit;
 
-                       err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
-                       if (err != noErr) goto Exit;
+       assert(start_bit % 8 == 0);
 
-                       // XXXdbg
-                       if (hfsmp->jnl) {
-                               journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
-                       }
-                       
-                       //      Readjust currentWord and wordsLeft
-                       currentWord = buffer;
-                       wordsLeft = wordsPerBlock;
-               }
-               if ((*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
-                       goto Corruption;
-               }
-               *currentWord &= SWAP_BE32 (~bitMask);                   //      clear the bits in the bitmap
+       /*
+        * Compute how much I/O we should generate here.
+        * hfs_scan_range_size will validate that the start bit
+        * converted into a byte offset into the bitmap file,
+        * is aligned on a VBMIOSize boundary.
+        */
+       error = hfs_scan_range_size (bitmap_ctx->hfsmp, start_bit, &iosize);
+       if (error)
+               return error;
 
-               //      No need to update currentWord or wordsLeft
-       }
+       assert(iosize != 0);
 
-Exit:
+       /* hfs_scan_range_size should have verified startbit.  Convert it to bytes */
+       byte_offset = start_bit / kBitsPerByte;
 
-       if (buffer)
-               (void)ReleaseBitmapBlock(vcb, blockRef, true);
+       /*
+        * When the journal replays blocks, it does so by writing directly to the disk
+        * device (bypassing any filesystem vnodes and such).  When it finishes its I/Os
+        * it also immediately re-reads and invalidates the range covered by the bp so
+        * it does not leave anything lingering in the cache (for iosize reasons).
+        *
+        * As such, it is safe to do large I/Os here with ReadBitmapRange.
+        *
+        * NOTE: It is not recommended, but it is possible to call the function below
+        * on sections of the bitmap that may be in core already as long as the pages are not
+        * dirty.  In that case, we'd notice that something starting at that
+        * logical block of the bitmap exists in the metadata cache, and we'd check
+        * if the iosize requested is the same as what was already allocated for it.
+        * Odds are pretty good we're going to request something larger.  In that case,
+        * we just free the existing memory associated with the buf and reallocate a
+        * larger range. This function should immediately invalidate it as soon as we're
+        * done scanning, so this shouldn't cause any coherency issues.
+        */
+       error = ReadBitmapRange(bitmap_ctx->hfsmp, byte_offset, iosize, (uint32_t **)&bitmap_ctx->bitmap, &bitmap_ctx->bp);
+       if (error)
+               return error;
 
-       return err;
+       /*
+        * At this point, we have a giant wired buffer that represents some portion of
+        * the bitmap file that we want to analyze.   We may not have gotten all 'iosize'
+        * bytes though, so clip our ending bit to what we actually read in.
+        */
+       last_bitmap_block = start_bit + buf_count(bitmap_ctx->bp) * kBitsPerByte;
 
-Corruption:
-#if DEBUG_BUILD
-       panic("BlockMarkFree: blocks not allocated!");
-#else
-       printf("hfs: WARNING - blocks on volume %s not allocated!\n", vcb->vcbVN);
-       vcb->vcbAtrb |= kHFSVolumeInconsistentMask;
-       MarkVCBDirty(vcb);
-       err = EIO;
-       goto Exit;
-#endif
+       /* Cap the last block to the total number of blocks if required */
+       if (last_bitmap_block > bitmap_ctx->hfsmp->totalBlocks)
+               last_bitmap_block = bitmap_ctx->hfsmp->totalBlocks;
+
+       bitmap_ctx->chunk_current = 0;  // new chunk of bitmap
+       bitmap_ctx->chunk_end = last_bitmap_block - start_bit;
+
+       return 0;
 }
 
+#endif // !HFS_ALLOC_TEST
 
-/*
-_______________________________________________________________________
+// Returns number of contiguous bits set at start
+static int bit_count_set(void *bitmap, int start, int end)
+{
+       if (start == end)
+               return 0;
 
-Routine:       BlockFindContiguous
+       assert(end > start);
 
-Function:      Find a contiguous range of blocks that are free (bits
-                       clear in the bitmap).  If a contiguous range of the
-                       minimum size can't be found, an error will be returned.
+       const int start_bit = start & 63;
+       const int end_bit   = end & 63;
 
-Inputs:
-       vcb                             Pointer to volume where space is to be allocated
-       startingBlock   Preferred first block of range
-       endingBlock             Last possible block in range + 1
-       minBlocks               Minimum number of blocks needed.  Must be > 0.
-       maxBlocks               Maximum (ideal) number of blocks desired
-       useMetaZone     OK to dip into metadata allocation zone
+       uint64_t *p = (uint64_t *)bitmap + start / 64;
+       uint64_t x = ~OSSwapBigToHostInt64(*p);
 
-Outputs:
-       actualStartBlock        First block of range found, or 0 if error
-       actualNumBlocks         Number of blocks found, or 0 if error
+       if ((start & ~63) == (end & ~63)) {
+               // Start and end in same 64 bits
+               x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit);
+               return clzll(x) - start_bit;
+       }
 
-Returns:
-       noErr                   Found at least minBlocks contiguous
-       dskFulErr               No contiguous space found, or all less than minBlocks
-_______________________________________________________________________
-*/
+       // Deal with initial unaligned bit
+       x &= BIT_RIGHT_MASK(start_bit);
 
-static OSErr BlockFindContiguous(
-       ExtendedVCB             *vcb,
-       UInt32                  startingBlock,
-       UInt32                  endingBlock,
-       UInt32                  minBlocks,
-       UInt32                  maxBlocks,
-       Boolean                 useMetaZone,
-       UInt32                  *actualStartBlock,
-       UInt32                  *actualNumBlocks)
-{
-       OSErr                   err;
-       register UInt32 currentBlock;           //      Block we're currently looking at.
-       UInt32                  firstBlock;                     //      First free block in current extent.
-       UInt32                  stopBlock;                      //      If we get to this block, stop searching for first free block.
-       UInt32                  foundBlocks;            //      Number of contiguous free blocks in current extent.
-       UInt32                  *buffer = NULL;
-       register UInt32 *currentWord;
-       register UInt32 bitMask;
-       register UInt32 wordsLeft;
-       register UInt32 tempWord;
-       UInt32  blockRef;
-       UInt32  wordsPerBlock;
-
-       if (!useMetaZone) {
-               struct hfsmount *hfsmp = VCBTOHFS(vcb);
+       if (x)
+               return clzll(x) - start_bit;
 
-               
-               if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
-                   (startingBlock <= hfsmp->hfs_metazone_end))
-                       startingBlock = hfsmp->hfs_metazone_end + 1;
+       // Go fast
+       ++p;
+       int count = 64 - start_bit;
+       int nquads = (end - end_bit - start - 1) / 64;
+
+       while (nquads--) {
+               if (*p != 0xffffffffffffffffull) {
+                       x = ~OSSwapBigToHostInt64(*p);
+                       return count + clzll(x);
+               }
+               ++p;
+               count += 64;
        }
 
-       if ((endingBlock - startingBlock) < minBlocks)
-       {
-               //      The set of blocks we're checking is smaller than the minimum number
-               //      of blocks, so we couldn't possibly find a good range.
-               goto DiskFull;
+       if (end_bit) {
+               x = ~OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit);
+               count += clzll(x);
        }
 
-       stopBlock = endingBlock - minBlocks + 1;
-       currentBlock = startingBlock;
-       firstBlock = 0;
+       return count;
+}
 
-       /*
-        * Skip over metadata blocks.
-        */
-       if (!useMetaZone)
-               currentBlock = NextBitmapBlock(vcb, currentBlock);
+/* Returns the number of a run of cleared bits:
+ *  bitmap is a single chunk of memory being examined
+ *  start: the start bit relative to the current buffer to be examined; start is inclusive.
+ *  end: the end bit relative to the current buffer to be examined; end is not inclusive.
+ */
+static int bit_count_clr(void *bitmap, int start, int end)
+{
+       if (start == end)
+               return 0;
 
-       //
-       //      Pre-read the first bitmap block.
-       //
-       err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
-       if ( err != noErr ) goto ErrorExit;
+       assert(end > start);
 
-       //
-       //      Figure out where currentBlock is within the buffer.
-       //
-       wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+       const int start_bit = start & 63;
+       const int end_bit   = end & 63;
 
-       wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1);  // Current index into buffer
-       currentWord = buffer + wordsLeft;
-       wordsLeft = wordsPerBlock - wordsLeft;
-       
-       do
-       {
-               foundBlocks = 0;
-               
-               //============================================================
-               //      Look for a free block, skipping over allocated blocks.
-               //============================================================
+       uint64_t *p = (uint64_t *)bitmap + start / 64;
+       uint64_t x = OSSwapBigToHostInt64(*p);
 
-               //
-               //      Check an initial partial word (if any)
-               //
-               bitMask = currentBlock & kBitsWithinWordMask;
-               if (bitMask)
-               {                       
-                       tempWord = SWAP_BE32(*currentWord);                     //      Fetch the current word only once
-                       bitMask = kHighBitInWordMask >> bitMask;
-                       while (tempWord & bitMask)
-                       {
-                               bitMask >>= 1;
-                               ++currentBlock;
-                       }
+       if ((start & ~63) == (end & ~63)) {
+               // Start and end in same 64 bits
+               x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit);
 
-                       //      Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)? 
-                       if (bitMask)
-                               goto FoundUnused;
+               return clzll(x) - start_bit;
+       }
+
+       // Deal with initial unaligned bit
+       x &= BIT_RIGHT_MASK(start_bit);
+
+       if (x)
+               return clzll(x) - start_bit;
+
+       // Go fast
+       ++p;
+       int count = 64 - start_bit;
+       int nquads = (end - end_bit - start - 1) / 64;
+
+       while (nquads--) {
+               if (*p) {
+                       x = OSSwapBigToHostInt64(*p);
+                       return count + clzll(x);
+               }
+               ++p;
+               count += 64;
+       }
+
+       if (end_bit) {
+               x = OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit);
+
+               count += clzll(x);
+       }
+
+       return count;
+}
+
+#if !HFS_ALLOC_TEST
+static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set)
+{
+       uint32_t        end, start_summary_bit, end_summary_bit;
+       errno_t         error = 0;
+
+       if (count == 0)
+               goto out;
 
-                       //      Didn't find any unused bits, so we're done with this word.
-                       ++currentWord;
-                       --wordsLeft;
-               }
+       if (!ISSET(bitmap_ctx->hfsmp->hfs_flags, HFS_SUMMARY_TABLE))
+               return 0;
 
-               //
-               //      Check whole words
-               //
-               while (currentBlock < stopBlock)
-               {
-                       //      See if it's time to read another block.
-                       if (wordsLeft == 0)
-                       {
-                               buffer = NULL;
-                               err = ReleaseBitmapBlock(vcb, blockRef, false);
-                               if (err != noErr) goto ErrorExit;
+       if (hfs_get_summary_index (bitmap_ctx->hfsmp, start, &start_summary_bit)) {
+               error = EINVAL;
+               goto out;
+       }
 
-                               /*
-                                * Skip over metadata blocks.
-                                */
-                               if (!useMetaZone) {
-                                       currentBlock = NextBitmapBlock(vcb, currentBlock);
-                                       if (currentBlock >= stopBlock) {
-                                               goto LoopExit;
-                                       }
-                               }
+       end = start + count - 1;
+       if (hfs_get_summary_index (bitmap_ctx->hfsmp, end, &end_summary_bit)) {
+               error = EINVAL;
+               goto out;
+       }
 
-                               err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
-                               if ( err != noErr ) goto ErrorExit;
-                               
-                               currentWord = buffer;
-                               wordsLeft = wordsPerBlock;
-                       }
-                       
-                       //      See if any of the bits are clear
-                       if ((tempWord = SWAP_BE32(*currentWord)) + 1)   //      non-zero if any bits were clear
-                       {
-                               //      Figure out which bit is clear
-                               bitMask = kHighBitInWordMask;
-                               while (tempWord & bitMask)
-                               {
-                                       bitMask >>= 1;
-                                       ++currentBlock;
-                               }
-                               
-                               break;          //      Found the free bit; break out to FoundUnused.
-                       }
+       // if summary table bit has been updated with free block previously, leave it.
+       if ((start_summary_bit == bitmap_ctx->last_free_summary_bit) && set)
+               start_summary_bit++;
 
-                       //      Keep looking at the next word
-                       currentBlock += kBitsPerWord;
-                       ++currentWord;
-                       --wordsLeft;
-               }
+       for (uint32_t summary_bit = start_summary_bit; summary_bit <= end_summary_bit; summary_bit++)
+               hfs_set_summary (bitmap_ctx->hfsmp, summary_bit, set);
 
-FoundUnused:
-               //      Make sure the unused bit is early enough to use
-               if (currentBlock >= stopBlock)
-               {
-                       break;
-               }
+       if (!set)
+               bitmap_ctx->last_free_summary_bit = end_summary_bit;
 
-               //      Remember the start of the extent
-               firstBlock = currentBlock;
+out:
+       return error;
 
-               //============================================================
-               //      Count the number of contiguous free blocks.
-               //============================================================
+}
+#endif //!HFS_ALLOC_TEST
 
-               //
-               //      Check an initial partial word (if any)
-               //
-               bitMask = currentBlock & kBitsWithinWordMask;
-               if (bitMask)
-               {
-                       tempWord = SWAP_BE32(*currentWord);                     //      Fetch the current word only once
-                       bitMask = kHighBitInWordMask >> bitMask;
-                       while (bitMask && !(tempWord & bitMask))
-                       {
-                               bitMask >>= 1;
-                               ++currentBlock;
-                       }
+/*
+ * Read in chunks of the bitmap into memory, and find a run of cleared/set bits;
+ * the run can extend across chunk boundaries.
+ * bit_count_clr can be passed to get a run of cleared bits.
+ * bit_count_set can be passed to get a run of set bits.
+ */
+static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count)
+{
+       int count;
+       errno_t error = 0;
 
-                       //      Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)? 
-                       if (bitMask)
-                               goto FoundUsed;
+       *bit_count = 0;
 
-                       //      Didn't find any used bits, so we're done with this word.
-                       ++currentWord;
-                       --wordsLeft;
+       do {
+               if (bitmap_ctx->run_offset == 0 || bitmap_ctx->chunk_current == bitmap_ctx->chunk_end) {
+                       if ((error = get_more_bits(bitmap_ctx)) != 0)
+                               goto out;
                }
-               
-               //
-               //      Check whole words
-               //
-               while (currentBlock < endingBlock)
-               {
-                       //      See if it's time to read another block.
-                       if (wordsLeft == 0)
-                       {
-                               buffer = NULL;
-                               err = ReleaseBitmapBlock(vcb, blockRef, false);
-                               if (err != noErr) goto ErrorExit;
 
-                               /*
-                                * Skip over metadata blocks.
-                                */
-                               if (!useMetaZone) {
-                                       UInt32 nextBlock;
+               if (bitmap_ctx->chunk_end == 0)
+                       break;
 
-                                       nextBlock = NextBitmapBlock(vcb, currentBlock);
-                                       if (nextBlock != currentBlock) {
-                                               goto LoopExit;  /* allocation gap, so stop */
-                                       }
-                               }
+               count = fn(bitmap_ctx->bitmap, bitmap_ctx->chunk_current, bitmap_ctx->chunk_end);
 
-                               err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
-                               if ( err != noErr ) goto ErrorExit;
-                               
-                               currentWord = buffer;
-                               wordsLeft = wordsPerBlock;
-                       }
-                       
-                       //      See if any of the bits are set
-                       if ((tempWord = SWAP_BE32(*currentWord)) != 0)
-                       {
-                               //      Figure out which bit is set
-                               bitMask = kHighBitInWordMask;
-                               while (!(tempWord & bitMask))
-                               {
-                                       bitMask >>= 1;
-                                       ++currentBlock;
-                               }
-                               
-                               break;          //      Found the used bit; break out to FoundUsed.
-                       }
+               bitmap_ctx->run_offset += count;
+               bitmap_ctx->chunk_current += count;
+               *bit_count += count;
 
-                       //      Keep looking at the next word
-                       currentBlock += kBitsPerWord;
-                       ++currentWord;
-                       --wordsLeft;
-                       
-                       //      If we found at least maxBlocks, we can quit early.
-                       if ((currentBlock - firstBlock) >= maxBlocks)
-                               break;
-               }
+       } while (bitmap_ctx->chunk_current >= bitmap_ctx->chunk_end && count);
 
-FoundUsed:
-               //      Make sure we didn't run out of bitmap looking for a used block.
-               //      If so, pin to the end of the bitmap.
-               if (currentBlock > endingBlock)
-                       currentBlock = endingBlock;
+out:
+       return error;
 
-               //      Figure out how many contiguous free blocks there were.
-               //      Pin the answer to maxBlocks.
-               foundBlocks = currentBlock - firstBlock;
-               if (foundBlocks > maxBlocks)
-                       foundBlocks = maxBlocks;
-               if (foundBlocks >= minBlocks)
-                       break;          //      Found what we needed!
+}
 
-               //      This free chunk wasn't big enough.  Try inserting it into the free extent cache in case
-               //      the allocation wasn't forced contiguous.
-               tempWord = vcb->vcbFreeExtCnt;
-               if (tempWord == kMaxFreeExtents && vcb->vcbFreeExt[kMaxFreeExtents-1].blockCount < foundBlocks)
-                       --tempWord;
-               if (tempWord < kMaxFreeExtents)
-               {
-                       //      We're going to add this extent.  Bubble any smaller extents down in the list.
-                       while (tempWord && vcb->vcbFreeExt[tempWord-1].blockCount < foundBlocks)
-                       {
-                               vcb->vcbFreeExt[tempWord] = vcb->vcbFreeExt[tempWord-1];
-                               --tempWord;
-                       }
-                       vcb->vcbFreeExt[tempWord].startBlock = firstBlock;
-                       vcb->vcbFreeExt[tempWord].blockCount = foundBlocks;
-                       
-                       if (vcb->vcbFreeExtCnt < kMaxFreeExtents)
-                               ++vcb->vcbFreeExtCnt;
-               }
-       } while (currentBlock < stopBlock);
-LoopExit:
+// Returns count of number of bits clear
+static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count)
+{
+       return hfs_bit_count(bitmap_ctx, bit_count_clr, count);
+}
 
-       //      Return the outputs.
-       if (foundBlocks < minBlocks)
-       {
-DiskFull:
-               err = dskFulErr;
-ErrorExit:
-               *actualStartBlock = 0;
-               *actualNumBlocks = 0;
-       }
-       else
-       {
-               err = noErr;
-               *actualStartBlock = firstBlock;
-               *actualNumBlocks = foundBlocks;
-       }
-       
-       if (buffer)
-               (void) ReleaseBitmapBlock(vcb, blockRef, false);
+// Returns count of number of bits set
+static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count)
+{
+       return hfs_bit_count(bitmap_ctx, bit_count_set, count);
+}
 
-       return err;
+static uint32_t hfs_bit_offset(bitmap_context_t *bitmap_ctx)
+{
+       return bitmap_ctx->run_offset;
 }
 
 /*
- * Test to see if any blocks in a range are allocated.
- *
- * The journal or allocation file lock must be held.
+ * Perform a full scan of the bitmap file.
+ * Note: during the scan of bitmap file, it may drop and reacquire the
+ * bitmap lock to let someone else use the bitmap for fairness.
+ * Currently it is used by HFS_GET_FSINFO statistic gathing, which
+ * is run while other processes might perform HFS operations.
  */
-__private_extern__
-int 
-hfs_isallocated(struct hfsmount *hfsmp, u_long startingBlock, u_long numBlocks)
+
+errno_t hfs_find_free_extents(struct hfsmount *hfsmp,
+                                                         void (*callback)(void *data, off_t free_extent_size), void *callback_arg)
 {
-       UInt32  *currentWord;   // Pointer to current word within bitmap block
-       UInt32  wordsLeft;      // Number of words left in this bitmap block
-       UInt32  bitMask;        // Word with given bits already set (ready to test)
-       UInt32  firstBit;       // Bit index within word of first bit to allocate
-       UInt32  numBits;        // Number of bits in word to allocate
-       UInt32  *buffer = NULL;
-       UInt32  blockRef;
-       UInt32  bitsPerBlock;
-       UInt32  wordsPerBlock;
-       int  inuse = 0;
-       int  error;
+       struct bitmap_context bitmap_ctx;
+       uint32_t count;
+       errno_t error = 0;
+
+       if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
+               error = hfs_init_summary(hfsmp);
+               if (error)
+                       return error;
+       }
 
-       /*
-        * Pre-read the bitmap block containing the first word of allocation
-        */
-       error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
-       if (error)
-               return (error);
+       bzero(&bitmap_ctx, sizeof(struct bitmap_context));
 
        /*
-        * Initialize currentWord, and wordsLeft.
+        * The journal maintains list of recently deallocated blocks to
+        * issue DKIOCUNMAPs when the corresponding journal transaction is
+        * flushed to the disk.  To avoid any race conditions, we only
+        * want one active trim list.  Therefore we make sure that the
+        * journal trim list is sync'ed, empty, and not modifiable for
+        * the duration of our scan.
+        *
+        * Take the journal lock before flushing the journal to the disk.
+        * We will keep on holding the journal lock till we don't get the
+        * bitmap lock to make sure that no new journal transactions can
+        * start.  This will make sure that the journal trim list is not
+        * modified after the journal flush and before getting bitmap lock.
+        * We can release the journal lock after we acquire the bitmap
+        * lock as it will prevent any further block deallocations.
         */
-       {
-               UInt32 wordIndexInBlock;
-               
-               bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
-               wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
+       hfs_journal_lock(hfsmp);
 
-               wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
-               currentWord = buffer + wordIndexInBlock;
-               wordsLeft = wordsPerBlock - wordIndexInBlock;
+       /* Flush the journal and wait for all I/Os to finish up */
+       error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+       if (error) {
+               hfs_journal_unlock(hfsmp);
+               return error;
        }
-       
+
        /*
-        * First test any non word aligned bits.
+        * Take bitmap lock to ensure it is not being modified.
+        * Since we are reading larger than normal blocks from the bitmap, which
+        * might confuse other parts of the bitmap code using normal blocks, we
+        * take exclusive lock here.
         */
-       firstBit = startingBlock % kBitsPerWord;
-       if (firstBit != 0) {
-               bitMask = kAllBitsSetInWord >> firstBit;
-               numBits = kBitsPerWord - firstBit;
-               if (numBits > numBlocks) {
-                       numBits = numBlocks;
-                       bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
-               }
-               if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
-                       inuse = 1;
-                       goto Exit;
-               }
-               numBlocks -= numBits;
-               ++currentWord;
-               --wordsLeft;
-       }
+       bitmap_ctx.lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+#if !HFS_ALLOC_TEST
+       bitmap_ctx.lock_start = mach_absolute_time();
+#endif
+
+       /* Release the journal lock */
+       hfs_journal_unlock(hfsmp);
 
        /*
-        * Test whole words (32 blocks) at a time.
+        * Bitmap is read in large block size (up to 1MB),
+        * unlike the runtime which reads the bitmap in the
+        * 4K block size.  If the bitmap is read by both ways
+        * at the same time, it can result in multiple buf_t with
+        * different sizes and potentially case data corruption.
+        * To avoid this, we invalidate all the existing buffers
+        * associated with the bitmap vnode.
         */
-       while (numBlocks >= kBitsPerWord) {
-               if (wordsLeft == 0) {
-                       /* Read in the next bitmap block. */
-                       startingBlock += bitsPerBlock;
-                       
-                       buffer = NULL;
-                       error = ReleaseBitmapBlock(hfsmp, blockRef, false);
-                       if (error) goto Exit;
-
-                       error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
-                       if (error) goto Exit;
+       error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
+       if (error)
+               goto out;
 
-                       /* Readjust currentWord and wordsLeft. */
-                       currentWord = buffer;
-                       wordsLeft = wordsPerBlock;
-               }
-               if (*currentWord != 0) {
-                       inuse = 1;
-                       goto Exit;
-               }
-               numBlocks -= kBitsPerWord;
-               ++currentWord;
-               --wordsLeft;
-       }
-       
        /*
-        * Test any remaining blocks.
+        * Get the list of all free extent ranges.  hfs_alloc_scan_range()
+        * will call hfs_fsinfo_data_add() to account for all the free
+        * extent ranges found during scan.
         */
-       if (numBlocks != 0) {
-               bitMask = ~(kAllBitsSetInWord >> numBlocks);
-               if (wordsLeft == 0) {
-                       /* Read in the next bitmap block */
-                       startingBlock += bitsPerBlock;
-                       
-                       buffer = NULL;
-                       error = ReleaseBitmapBlock(hfsmp, blockRef, false);
-                       if (error) goto Exit;
+       bitmap_ctx.hfsmp = hfsmp;
+       bitmap_ctx.run_offset = 0;
 
-                       error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
-                       if (error) goto Exit;
+       while (bitmap_ctx.run_offset < hfsmp->totalBlocks) {
 
-                       currentWord = buffer;
-                       wordsLeft = wordsPerBlock;
-               }
-               if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
-                       inuse = 1;
-                       goto Exit;
-               }
+               uint32_t start = hfs_bit_offset(&bitmap_ctx);
+
+               if ((error = hfs_bit_count_clr(&bitmap_ctx, &count)) != 0)
+                       goto out;
+
+               if (count)
+                       callback(callback_arg, hfs_blk_to_bytes(count, hfsmp->blockSize));
+
+               if ((error = update_summary_table(&bitmap_ctx, start, count, false)) != 0)
+                       goto out;
+
+               start = hfs_bit_offset(&bitmap_ctx);
+
+               if ((error = hfs_bit_count_set(&bitmap_ctx, &count)) != 0)
+                       goto out;
+
+               if ((error = update_summary_table(&bitmap_ctx, start, count, true)) != 0)
+                       goto out;
        }
-Exit:
-       if (buffer) {
-               (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
+
+out:
+       if (bitmap_ctx.lockflags) {
+               hfs_systemfile_unlock(hfsmp, bitmap_ctx.lockflags);
        }
-       return (inuse);
-}
 
+       return error;
+}