2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 File: VolumeAllocation.c
31 Contains: Routines for accessing and modifying the volume bitmap.
35 Copyright: � 1996-2009 by Apple Computer, Inc., all rights reserved.
42 Allocate space on a volume. Can allocate space contiguously.
43 If not contiguous, then allocation may be less than what was
44 asked for. Returns the starting block number, and number of
45 blocks. (Will only do a single extent???)
47 Deallocate a contiguous run of allocation blocks.
50 Exported wrapper to mark blocks as in-use. This will correctly determine
51 whether or not the red-black tree is enabled and call the appropriate function
54 Exported wrapper to mark blocks as freed. This will correctly determine whether or
55 not the red-black tree is enabled and call the appropriate function if applicable.
59 Since the red-black tree obviates the need to maintain the free extent cache, we do
60 not update it if the tree is also live. As a result, if we ever need to destroy the trees
61 we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
62 bitmap scanning allocator.
63 We also reset and disable the free extent cache when volume resizing is
67 Adjusts the AllocLimit field in the hfs mount point. This is used when we need to prevent
68 allocations from occupying space in the region we are modifying during a filesystem resize.
69 At other times, it should be consistent with the total number of allocation blocks in the
70 filesystem. It is also used to shrink or grow the number of blocks that the red-black tree should
71 know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
72 number of items in the tree that we can allocate from.
75 Issues DKIOCUNMAPs to the device as it fills the internal volume buffer when iterating
79 Note that the RBTree routines are guarded by a cpp check for CONFIG_HFS_ALLOC_RBTREE. This
80 is to cut down on space for functions that could not possibly be used if they are not planning to
81 use the red-black tree code.
84 Make an internal call to BlockMarkFree and then update
85 and/or create Red-Black Tree allocation tree nodes to correspond
86 to the free space being generated.
88 Mark a contiguous range of blocks as free. The corresponding
89 bits in the volume bitmap will be cleared. This will actually do the work
90 of modifying the bitmap for us.
92 BlockMarkAllocatedRBTree
93 Make an internal call to BlockAllocateMarked, which will update the
94 bitmap on-disk when we allocate blocks. If that is successful, then
95 we'll remove the appropriate entries from the red-black tree.
96 BlockMarkAllocatedInternal
97 Mark a contiguous range of blocks as allocated. The cor-
98 responding bits in the volume bitmap are set. Also tests to see
99 if any of the blocks were previously unallocated.
101 Find a contiguous range of blocks of a given size. The caller
102 specifies where to begin the search (by block number). The
103 block number of the first block in the range is returned. This is only
104 called by the bitmap scanning logic as the red-black tree should be able
105 to do this internally by searching its tree.
107 Find and allocate a contiguous range of blocks up to a given size. The
108 first range of contiguous free blocks found are allocated, even if there
109 are fewer blocks than requested (and even if a contiguous range of blocks
110 of the given size exists elsewhere).
111 BlockAllocateAnyBitmap
112 Finds a range of blocks per the above requirements without using the
113 Allocation RB Tree. This relies on the bitmap-scanning logic in order to find
114 any valid range of free space needed.
115 BlockAllocateAnyRBTree
116 Finds a valid range of blocks per the above requirements by searching
117 the red-black tree. We can just make an internal call to
118 BlockAllocateContigRBTree to find the valid range.
120 Find and allocate a contiguous range of blocks of a given size. If
121 a contiguous range of free blocks of the given size isn't found, then
122 the allocation fails (i.e. it is "all or nothing"). This routine is
123 essentially a wrapper function around its related sub-functions,
124 BlockAllocateContigBitmap and BlockAllocateContigRBTree, which use,
125 respectively, the original HFS+ bitmap scanning logic and the new
126 Red-Black Tree to search and manage free-space decisions. This function
127 contains logic for when to use which of the allocation algorithms,
128 depending on the free space contained in the volume.
129 BlockAllocateContigBitmap
130 Finds and allocates a range of blocks specified by the size parameters
131 using the original HFS+ bitmap scanning logic. The red-black tree
132 will not be updated if this function is used.
133 BlockAllocateContigRBTree
134 Finds and allocates a range of blocks specified by the size parameters
135 using the new red/black tree data structure and search algorithms
136 provided by the tree library. Updates the red/black tree nodes after
137 the on-disk data structure (bitmap) has been updated.
139 Try to allocate space from known free space in the volume's
143 Given an allocation block number, read the bitmap block that
144 contains that allocation block into a caller-supplied buffer.
147 Release a bitmap block back into the buffer cache.
149 remove_free_extent_cache
150 Remove an extent from the free extent cache. Handles overlaps
151 with multiple extents in the cache, and handles splitting an
152 extent in the cache if the extent to be removed is in the middle
155 add_free_extent_cache
156 Add an extent to the free extent cache. It will merge the
157 input extent with extents already in the cache.
162 Test to see if any blocks in a range are allocated. Journal or
163 allocation file lock must be held.
166 Test to see if any blocks in a range are allocated. Releases and
167 invalidates the block used when finished.
170 Test to see if the allocation red-black tree is live. This function
171 requires either an exclusive or shared lock on the allocation bitmap file
172 in the HFS mount structure, to prevent red-black tree pointers from disappearing.
174 hfs_isrbtree_allocated
175 Test to see if the specified extent is marked as allocated in the red-black tree.
176 Multiplexes between the metadata zone trees and the normal allocation zone trees
177 depending on the offset of the extent specified.
180 Void function that wraps around the above function (hfs_isrbtree_allocated)
181 and checks to see that the return value was appropriate based on the assertion we're
182 trying to validate (whether or not the specified extent should be marked as free
186 Exhaustive search function that will check every allocation block for its status in the
187 red-black tree and then check the corresponding status in the bitmap file. If the two are out
188 of sync, it will panic. Note that this function is extremely expensive and must NEVER
189 be run outside of debug code.
192 Checks the embedded linked list structure of the red black tree for integrity. The next pointer
193 should always point to whatever extent_tree_offset_next returns.
196 Red Black Tree Specific Routines
198 Build a red-black tree for the given filesystem's bitmap.
201 Destroy the tree on the given filesystem
205 Given a starting allocation block number, figures out which physical block contains that
206 allocation block's bit, and scans it from the starting bit until either the ending bit or
207 the end of the block. Free space extents are inserted into the appropriate red-black tree.
211 #include "../../hfs_macos_defs.h"
213 #include <sys/types.h>
215 #include <sys/systm.h>
216 #include <sys/sysctl.h>
217 #include <sys/disk.h>
220 #include <kern/kalloc.h>
221 /* For VM Page size */
222 #include <libkern/libkern.h>
224 #include "../../hfs.h"
225 #include "../../hfs_dbg.h"
226 #include "../../hfs_format.h"
227 #include "../../hfs_endian.h"
228 #include "../../hfs_macos_defs.h"
229 #include "../headers/FileMgrInternal.h"
230 #include "../headers/HybridAllocator.h"
231 #include "../../hfs_kdebug.h"
233 /* Headers for unmap-on-mount support */
234 #include <vfs/vfs_journal.h>
235 #include <sys/disk.h>
237 #ifndef CONFIG_HFS_TRIM
238 #define CONFIG_HFS_TRIM 0
242 * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
243 * KERNEL_DEBUG_CONSTANT events are enabled at runtime. (They're
244 * disabled by default because there can be a lot of these events,
245 * and we don't want to overwhelm the kernel debug buffer. If you
246 * want to watch these events in particular, just set the sysctl.)
248 static int hfs_kdebug_allocation
= 0;
249 SYSCTL_DECL(_vfs_generic
);
250 SYSCTL_NODE(_vfs_generic
, OID_AUTO
, hfs
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "HFS file system");
251 SYSCTL_NODE(_vfs_generic_hfs
, OID_AUTO
, kdebug
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "HFS kdebug");
252 SYSCTL_INT(_vfs_generic_hfs_kdebug
, OID_AUTO
, allocation
, CTLFLAG_RW
|CTLFLAG_LOCKED
, &hfs_kdebug_allocation
, 0, "Enable kdebug logging for HFS allocations");
255 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
256 * BlockDeallocate, including the internal BlockAllocateXxx
257 * routines so we can see how an allocation was satisfied.
259 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
262 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
264 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
265 * or clearing bits, scanning the bitmap).
267 HFSDBG_ALLOC_ENABLED
= 1,
268 HFSDBG_EXT_CACHE_ENABLED
= 2,
269 HFSDBG_UNMAP_ENABLED
= 4,
270 HFSDBG_BITMAP_ENABLED
= 8
278 kBitsWithinWordMask
= kBitsPerWord
-1
281 #define kLowBitInWordMask 0x00000001ul
282 #define kHighBitInWordMask 0x80000000ul
283 #define kAllBitsSetInWord 0xFFFFFFFFul
286 #define ALLOC_DEBUG 0
288 static OSErr
ReadBitmapBlock(
292 uintptr_t *blockRef
);
294 static OSErr
ReleaseBitmapBlock(
299 static OSErr
BlockAllocateAny(
301 u_int32_t startingBlock
,
302 u_int32_t endingBlock
,
305 u_int32_t
*actualStartBlock
,
306 u_int32_t
*actualNumBlocks
);
308 static OSErr
BlockAllocateAnyBitmap(
310 u_int32_t startingBlock
,
311 u_int32_t endingBlock
,
314 u_int32_t
*actualStartBlock
,
315 u_int32_t
*actualNumBlocks
);
317 static OSErr
BlockAllocateContig(
319 u_int32_t startingBlock
,
323 u_int32_t
*actualStartBlock
,
324 u_int32_t
*actualNumBlocks
);
326 static OSErr
BlockAllocateContigBitmap(
328 u_int32_t startingBlock
,
332 u_int32_t
*actualStartBlock
,
333 u_int32_t
*actualNumBlocks
);
335 static OSErr
BlockFindContiguous(
337 u_int32_t startingBlock
,
338 u_int32_t endingBlock
,
342 u_int32_t
*actualStartBlock
,
343 u_int32_t
*actualNumBlocks
);
345 static OSErr
BlockAllocateKnown(
348 u_int32_t
*actualStartBlock
,
349 u_int32_t
*actualNumBlocks
);
351 static OSErr
BlockMarkAllocatedInternal (
353 u_int32_t startingBlock
,
354 register u_int32_t numBlocks
);
356 static OSErr
BlockMarkFreeInternal(
358 u_int32_t startingBlock
,
360 Boolean do_validate
);
363 static OSErr
ReleaseScanBitmapBlock( struct buf
*bp
);
365 static int hfs_track_unmap_blocks (struct hfsmount
*hfsmp
, u_int32_t offset
,
366 u_int32_t numBlocks
, struct jnl_trim_list
*list
);
368 static int hfs_issue_unmap (struct hfsmount
*hfsmp
, struct jnl_trim_list
*list
);
370 static int hfs_alloc_scan_block(struct hfsmount
*hfsmp
,
373 u_int32_t
*bitToScan
,
374 struct jnl_trim_list
*list
);
376 int hfs_isallocated_scan (struct hfsmount
*hfsmp
,
377 u_int32_t startingBlock
,
380 #if CONFIG_HFS_ALLOC_RBTREE
381 static OSErr
BlockAllocateAnyRBTree(
383 u_int32_t startingBlock
,
386 u_int32_t
*actualStartBlock
,
387 u_int32_t
*actualNumBlocks
);
389 static OSErr
BlockAllocateContigRBTree(
391 u_int32_t startingBlock
,
395 u_int32_t
*actualStartBlock
,
396 u_int32_t
*actualNumBlocks
,
397 u_int32_t forceContig
);
399 static OSErr
BlockMarkAllocatedRBTree(
401 u_int32_t startingBlock
,
402 u_int32_t numBlocks
);
404 static OSErr
BlockMarkFreeRBTree(
406 u_int32_t startingBlock
,
407 u_int32_t numBlocks
);
410 hfs_isrbtree_allocated (struct hfsmount
* hfsmp
,
411 u_int32_t startBlock
,
413 extent_node_t
** node1
);
416 hfs_validate_rbtree (struct hfsmount
*hfsmp
,
420 static void hfs_checktreelinks (struct hfsmount
*hfsmp
);
423 void check_rbtree_extents (struct hfsmount
*hfsmp
,
428 #define ASSERT_FREE 1
429 #define ASSERT_ALLOC 0
431 #endif /* CONFIG_HFS_ALLOC_RBTREE */
433 /* Functions for manipulating free extent cache */
434 static void remove_free_extent_cache(struct hfsmount
*hfsmp
, u_int32_t startBlock
, u_int32_t blockCount
);
435 static Boolean
add_free_extent_cache(struct hfsmount
*hfsmp
, u_int32_t startBlock
, u_int32_t blockCount
);
436 static void sanity_check_free_ext(struct hfsmount
*hfsmp
, int check_allocated
);
440 * Validation Routine to verify that the TRIM list maintained by the journal
441 * is in good shape relative to what we think the bitmap should have. We should
442 * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
445 int trim_validate_bitmap (struct hfsmount
*hfsmp
);
446 int trim_validate_bitmap (struct hfsmount
*hfsmp
) {
447 u_int64_t blockno_offset
;
454 uint32_t alloccount
= 0;
457 struct journal
*jnl
= (struct journal
*)hfsmp
->jnl
;
458 if (jnl
->active_tr
) {
459 struct jnl_trim_list
*trim
= &(jnl
->active_tr
->trim
);
460 count
= trim
->extent_count
;
461 for (i
= 0; i
< count
; i
++) {
462 blockno_offset
= trim
->extents
[i
].offset
;
463 blockno_offset
= blockno_offset
- (uint64_t)hfsmp
->hfsPlusIOPosOffset
;
464 blockno_offset
= blockno_offset
/ hfsmp
->blockSize
;
465 numblocks
= trim
->extents
[i
].length
/ hfsmp
->blockSize
;
467 startblk
= (u_int32_t
)blockno_offset
;
468 blks
= (u_int32_t
) numblocks
;
469 err
= hfs_count_allocated (hfsmp
, startblk
, blks
, &alloccount
);
471 if (err
== 0 && alloccount
!= 0) {
472 panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount
, startblk
);
484 ;________________________________________________________________________________
486 ; Routine: hfs_unmap_free_extent
488 ; Function: Make note of a range of allocation blocks that should be
489 ; unmapped (trimmed). That is, the given range of blocks no
490 ; longer have useful content, and the device can unmap the
491 ; previous contents. For example, a solid state disk may reuse
492 ; the underlying storage for other blocks.
494 ; This routine is only supported for journaled volumes. The extent
495 ; being freed is passed to the journal code, and the extent will
496 ; be unmapped after the current transaction is written to disk.
499 ; hfsmp - The volume containing the allocation blocks.
500 ; startingBlock - The first allocation block of the extent being freed.
501 ; numBlocks - The number of allocation blocks of the extent being freed.
502 ;________________________________________________________________________________
504 static void hfs_unmap_free_extent(struct hfsmount
*hfsmp
, u_int32_t startingBlock
, u_int32_t numBlocks
)
511 if (hfs_kdebug_allocation
& HFSDBG_UNMAP_ENABLED
)
512 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE
| DBG_FUNC_START
, startingBlock
, numBlocks
, 0, 0, 0);
515 if (hfs_isallocated(hfsmp
, startingBlock
, numBlocks
)) {
516 panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp
, startingBlock
, numBlocks
);
520 if (hfsmp
->jnl
!= NULL
) {
521 device_sz
= hfsmp
->hfs_logical_bytes
;
522 offset
= (u_int64_t
) startingBlock
* hfsmp
->blockSize
+ (u_int64_t
) hfsmp
->hfsPlusIOPosOffset
;
523 length
= (u_int64_t
) numBlocks
* hfsmp
->blockSize
;
525 /* Validate that the trim is in a valid range of bytes */
526 if ((offset
>= device_sz
) || ((offset
+ length
) > device_sz
)) {
527 printf("hfs_unmap_free_ext: ignoring trim @ off %lld len %lld \n", offset
, length
);
532 err
= journal_trim_add_extent(hfsmp
->jnl
, offset
, length
);
534 printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent", err
);
539 if (hfs_kdebug_allocation
& HFSDBG_UNMAP_ENABLED
)
540 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE
| DBG_FUNC_END
, err
, 0, 0, 0, 0);
546 ;________________________________________________________________________________
548 ; Routine: hfs_track_unmap_blocks
550 ; Function: Make note of a range of allocation blocks that should be
551 ; unmapped (trimmed). That is, the given range of blocks no
552 ; longer have useful content, and the device can unmap the
553 ; previous contents. For example, a solid state disk may reuse
554 ; the underlying storage for other blocks.
556 ; This routine is only supported for journaled volumes.
559 ; This function should *NOT* be used when the volume is fully
560 ; mounted. This function is intended to support a bitmap iteration
561 ; at mount time to fully inform the SSD driver of the state of all blocks
562 ; at mount time, and assumes that there is no allocation/deallocation
563 ; interference during its iteration.,
566 ; hfsmp - The volume containing the allocation blocks.
567 ; offset - The first allocation block of the extent being freed.
568 ; numBlocks - The number of allocation blocks of the extent being freed.
569 ; list - The list of currently tracked trim ranges.
570 ;________________________________________________________________________________
572 static int hfs_track_unmap_blocks (struct hfsmount
*hfsmp
, u_int32_t start
,
573 u_int32_t numBlocks
, struct jnl_trim_list
*list
) {
579 if ((hfsmp
->hfs_flags
& HFS_UNMAP
) && (hfsmp
->jnl
!= NULL
)) {
580 int extent_no
= list
->extent_count
;
581 offset
= (u_int64_t
) start
* hfsmp
->blockSize
+ (u_int64_t
) hfsmp
->hfsPlusIOPosOffset
;
582 length
= (u_int64_t
) numBlocks
* hfsmp
->blockSize
;
585 list
->extents
[extent_no
].offset
= offset
;
586 list
->extents
[extent_no
].length
= length
;
587 list
->extent_count
++;
588 if (list
->extent_count
== list
->allocated_count
) {
589 error
= hfs_issue_unmap (hfsmp
, list
);
597 ;________________________________________________________________________________
599 ; Routine: hfs_issue_unmap
601 ; Function: Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list
604 ; hfsmp - The volume containing the allocation blocks.
605 ; list - The list of currently tracked trim ranges.
606 ;________________________________________________________________________________
609 static int hfs_issue_unmap (struct hfsmount
*hfsmp
, struct jnl_trim_list
*list
) {
613 if (list
->extent_count
> 0) {
614 bzero(&unmap
, sizeof(unmap
));
615 unmap
.extents
= list
->extents
;
616 unmap
.extentsCount
= list
->extent_count
;
618 /* Issue a TRIM and flush them out */
619 error
= VNOP_IOCTL(hfsmp
->hfs_devvp
, DKIOCUNMAP
, (caddr_t
)&unmap
, 0, vfs_context_kernel());
621 bzero (list
->extents
, (list
->allocated_count
* sizeof(dk_extent_t
)));
622 list
->extent_count
= 0;
630 ;________________________________________________________________________________
632 ; Routine: hfs_unmap_alloc_extent
634 ; Function: Make note of a range of allocation blocks, some of
635 ; which may have previously been passed to hfs_unmap_free_extent,
636 ; is now in use on the volume. The given blocks will be removed
637 ; from any pending DKIOCUNMAP.
640 ; hfsmp - The volume containing the allocation blocks.
641 ; startingBlock - The first allocation block of the extent being allocated.
642 ; numBlocks - The number of allocation blocks being allocated.
643 ;________________________________________________________________________________
645 static void hfs_unmap_alloc_extent(struct hfsmount
*hfsmp
, u_int32_t startingBlock
, u_int32_t numBlocks
)
651 if (hfs_kdebug_allocation
& HFSDBG_UNMAP_ENABLED
)
652 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC
| DBG_FUNC_START
, startingBlock
, numBlocks
, 0, 0, 0);
654 if (hfsmp
->jnl
!= NULL
) {
655 offset
= (u_int64_t
) startingBlock
* hfsmp
->blockSize
+ (u_int64_t
) hfsmp
->hfsPlusIOPosOffset
;
656 length
= (u_int64_t
) numBlocks
* hfsmp
->blockSize
;
658 err
= journal_trim_remove_extent(hfsmp
->jnl
, offset
, length
);
660 printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent", err
);
664 if (hfs_kdebug_allocation
& HFSDBG_UNMAP_ENABLED
)
665 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC
| DBG_FUNC_END
, err
, 0, 0, 0, 0);
670 ;________________________________________________________________________________
672 ; Routine: hfs_trim_callback
674 ; Function: This function is called when a transaction that freed extents
675 ; (via hfs_unmap_free_extent/journal_trim_add_extent) has been
676 ; written to the on-disk journal. This routine will add those
677 ; extents to the free extent cache so that they can be reused.
679 ; CAUTION: This routine is called while the journal's trim lock
680 ; is held shared, so that no other thread can reuse any portion
681 ; of those extents. We must be very careful about which locks
682 ; we take from within this callback, to avoid deadlock. The
683 ; call to add_free_extent_cache will end up taking the cache's
684 ; lock (just long enough to add these extents to the cache).
686 ; CAUTION: If the journal becomes invalid (eg., due to an I/O
687 ; error when trying to write to the journal), this callback
688 ; will stop getting called, even if extents got freed before
689 ; the journal became invalid!
692 ; arg - The hfsmount of the volume containing the extents.
693 ; extent_count - The number of extents freed in the transaction.
694 ; extents - An array of extents (byte ranges) that were freed.
695 ;________________________________________________________________________________
697 __private_extern__
void
698 hfs_trim_callback(void *arg
, uint32_t extent_count
, const dk_extent_t
*extents
)
701 uint32_t startBlock
, numBlocks
;
702 struct hfsmount
*hfsmp
= arg
;
704 if (hfs_kdebug_allocation
& HFSDBG_UNMAP_ENABLED
)
705 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK
| DBG_FUNC_START
, 0, extent_count
, 0, 0, 0);
707 for (i
=0; i
<extent_count
; ++i
) {
708 /* Convert the byte range in *extents back to a range of allocation blocks. */
709 startBlock
= (extents
[i
].offset
- hfsmp
->hfsPlusIOPosOffset
) / hfsmp
->blockSize
;
710 numBlocks
= extents
[i
].length
/ hfsmp
->blockSize
;
711 (void) add_free_extent_cache(hfsmp
, startBlock
, numBlocks
);
714 if (hfs_kdebug_allocation
& HFSDBG_UNMAP_ENABLED
)
715 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
720 ;________________________________________________________________________________
722 ; Routine: UnmapBlocks
724 ; Function: Traverse the bitmap, and issue DKIOCUNMAPs to the underlying
725 ; device as needed so that the underlying disk device is as
726 ; up-to-date as possible with which blocks are unmapped.
729 ; hfsmp - The volume containing the allocation blocks.
730 ;________________________________________________________________________________
734 u_int32_t
UnmapBlocks (struct hfsmount
*hfsmp
) {
735 u_int32_t blocks_scanned
= 0;
737 struct jnl_trim_list trimlist
;
740 *struct jnl_trim_list {
741 uint32_t allocated_count;
742 uint32_t extent_count;
743 dk_extent_t *extents;
746 bzero (&trimlist
, sizeof(trimlist
));
747 if (CONFIG_HFS_TRIM
) {
748 int alloc_count
= PAGE_SIZE
/ sizeof(dk_extent_t
);
749 void *extents
= kalloc (alloc_count
* sizeof(dk_extent_t
));
750 if (extents
== NULL
) {
753 trimlist
.extents
= (dk_extent_t
*)extents
;
754 trimlist
.allocated_count
= alloc_count
;
755 trimlist
.extent_count
= 0;
759 while ((blocks_scanned
< hfsmp
->totalBlocks
) && (error
== 0)){
760 error
= hfs_alloc_scan_block (hfsmp
, blocks_scanned
, hfsmp
->totalBlocks
,
761 &blocks_scanned
, &trimlist
);
763 printf("HFS: bitmap unmap scan error: %d\n", error
);
768 hfs_issue_unmap(hfsmp
, &trimlist
);
770 if (trimlist
.extents
) {
771 kfree (trimlist
.extents
, (trimlist
.allocated_count
* sizeof(dk_extent_t
)));
778 ;________________________________________________________________________________
780 ; Routine: BlockAllocate
782 ; Function: Allocate space on a volume. If contiguous allocation is requested,
783 ; at least the requested number of bytes will be allocated or an
784 ; error will be returned. If contiguous allocation is not forced,
785 ; the space will be allocated with the first largest extent available
786 ; at the requested starting allocation block. If there is not enough
787 ; room there, a block allocation of less than the requested size will be
790 ; If the requested starting block is 0 (for new file allocations),
791 ; the volume's allocation block pointer will be used as a starting
795 ; vcb - Pointer to ExtendedVCB for the volume to allocate space on
796 ; fcb - Pointer to FCB for the file for which storage is being allocated
797 ; startingBlock - Preferred starting allocation block, 0 = no preference
798 ; minBlocks - Number of blocks requested. If the allocation is non-contiguous,
799 ; less than this may actually be allocated
800 ; maxBlocks - The maximum number of blocks to allocate. If there is additional free
801 ; space after bytesRequested, then up to maxBlocks bytes should really
802 ; be allocated. (Used by ExtendFileC to round up allocations to a multiple
803 ; of the file's clump size.)
804 ; flags - Flags to specify options like contiguous, use metadata zone,
805 ; skip free block check, etc.
808 ; (result) - Error code, zero for successful allocation
809 ; *startBlock - Actual starting allocation block
810 ; *actualBlocks - Actual number of allocation blocks allocated
813 ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
814 ;________________________________________________________________________________
816 OSErr
BlockAllocate (
817 ExtendedVCB
*vcb
, /* which volume to allocate space on */
818 u_int32_t startingBlock
, /* preferred starting block, or 0 for no preference */
819 u_int32_t minBlocks
, /* desired number of blocks to allocate */
820 u_int32_t maxBlocks
, /* maximum number of blocks to allocate */
821 u_int32_t flags
, /* option flags */
822 u_int32_t
*actualStartBlock
, /* actual first block of allocation */
823 u_int32_t
*actualNumBlocks
) /* number of blocks actually allocated; if forceContiguous */
824 /* was zero, then this may represent fewer than minBlocks */
826 u_int32_t freeBlocks
;
828 Boolean updateAllocPtr
= false; // true if nextAllocation needs to be updated
829 struct hfsmount
*hfsmp
;
831 Boolean forceContiguous
;
833 if (hfs_kdebug_allocation
& HFSDBG_ALLOC_ENABLED
)
834 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE
| DBG_FUNC_START
, startingBlock
, minBlocks
, maxBlocks
, flags
, 0);
836 if (flags
& HFS_ALLOC_FORCECONTIG
) {
837 forceContiguous
= true;
839 forceContiguous
= false;
842 if (flags
& HFS_ALLOC_METAZONE
) {
848 //TODO: Figure out when we need to re-enable the RB-Tree.
851 //TODO: Make sure we use allocLimit when appropriate.
854 * TODO: Update BlockAllocate and its sub-functions to do cooperative allocation and bitmap scanning
855 * in conjunction with the Generate Tree function. If the red-black tree does not currently contain
856 * an allocation block of appropriate size, then start scanning blocks FOR the tree generation function until
857 * we find what we need. We'll update the tree fields when we're done, indicating that we've advanced the
858 * high water mark for the tree.
862 // Initialize outputs in case we get an error
864 *actualStartBlock
= 0;
865 *actualNumBlocks
= 0;
866 hfsmp
= VCBTOHFS (vcb
);
867 freeBlocks
= hfs_freeblks(hfsmp
, 0);
870 /* Skip free block check if blocks are being allocated for relocating
871 * data during truncating a volume.
873 * During hfs_truncatefs(), the volume free block count is updated
874 * before relocating data to reflect the total number of free blocks
875 * that will exist on the volume after resize is successful. This
876 * means that we have reserved allocation blocks required for relocating
877 * the data and hence there is no need to check the free blocks.
878 * It will also prevent resize failure when the number of blocks in
879 * an extent being relocated is more than the free blocks that will
880 * exist after the volume is resized.
882 if ((flags
& HFS_ALLOC_SKIPFREEBLKS
) == 0) {
883 // If the disk is already full, don't bother.
884 if (freeBlocks
== 0) {
888 if (forceContiguous
&& freeBlocks
< minBlocks
) {
894 * Clip if necessary so we don't over-subscribe the free blocks.
896 if (minBlocks
> freeBlocks
) {
897 minBlocks
= freeBlocks
;
899 if (maxBlocks
> freeBlocks
) {
900 maxBlocks
= freeBlocks
;
905 // If caller didn't specify a starting block number, then use the volume's
906 // next block to allocate from.
908 if (startingBlock
== 0) {
909 HFS_MOUNT_LOCK(vcb
, TRUE
);
911 /* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
912 if (vcb
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
913 startingBlock
= vcb
->sparseAllocation
;
916 startingBlock
= vcb
->nextAllocation
;
918 HFS_MOUNT_UNLOCK(vcb
, TRUE
);
919 updateAllocPtr
= true;
923 if (startingBlock
>= vcb
->allocLimit
) {
924 startingBlock
= 0; /* overflow so start at beginning */
928 // If the request must be contiguous, then find a sequence of free blocks
929 // that is long enough. Otherwise, find the first free block.
931 if (forceContiguous
) {
932 err
= BlockAllocateContig(vcb
, startingBlock
, minBlocks
, maxBlocks
,
933 useMetaZone
, actualStartBlock
, actualNumBlocks
);
935 * If we allocated from a new position then also update the roving allocator.
936 * This will keep the roving allocation pointer up-to-date even
937 * if we are using the new R/B tree allocator, since
938 * it doesn't matter to us here, how the underlying allocator found
939 * the block to vend out.
941 if ((err
== noErr
) &&
942 (*actualStartBlock
> startingBlock
) &&
943 ((*actualStartBlock
< VCBTOHFS(vcb
)->hfs_metazone_start
) ||
944 (*actualStartBlock
> VCBTOHFS(vcb
)->hfs_metazone_end
))) {
945 updateAllocPtr
= true;
948 #if CONFIG_HFS_ALLOC_RBTREE
950 * If the RB-Tree Allocator is live, just go straight for a
951 * BlockAllocateAny call and return the result. Otherwise,
952 * resort to the bitmap scanner.
954 if (hfs_isrbtree_active(VCBTOHFS(vcb
))) {
955 /* Start by trying to allocate from the starting block forward */
956 err
= BlockAllocateAny(vcb
, startingBlock
, vcb
->allocLimit
,
957 maxBlocks
, useMetaZone
, actualStartBlock
,
961 * Because the RB-Tree is live, the previous call to BlockAllocateAny
962 * will use the rbtree variant. As a result, it will automatically search the
963 * metadata zone for a valid extent if needed. If we get a return value of
964 * noErr, we found a valid extent and we can skip to the end. If the error indicates
965 * the disk is full, that's an equally valid return code and we can skip to the end, too.
967 if (err
== noErr
|| err
== dskFulErr
) {
971 //TODO: only tear down tree if the tree is finished building.
972 //Make sure to handle the ENOSPC condition properly. We shouldn't error out in that case.
973 /* Tear down tree if we encounter an error */
974 if (hfsmp
->extent_tree_flags
& HFS_ALLOC_RB_ACTIVE
) {
975 hfsmp
->extent_tree_flags
|= HFS_ALLOC_RB_ERRORED
;
977 ResetVCBFreeExtCache(hfsmp
);
982 // fall through to the normal allocation since the rb-tree allocation failed.
988 * Scan the bitmap once, gather the N largest free extents, then
989 * allocate from these largest extents. Repeat as needed until
990 * we get all the space we needed. We could probably build up
991 * that list when the higher level caller tried (and failed) a
992 * contiguous allocation first.
994 * Note that the free-extent cache will be cease to be updated if
995 * we are using the red-black tree for allocations. If we jettison
996 * the tree, then we will reset the free-extent cache and start over.
999 err
= BlockAllocateKnown(vcb
, maxBlocks
, actualStartBlock
, actualNumBlocks
);
1000 /* dskFulErr out of BlockAllocateKnown indicates an empty Free Extent Cache */
1002 if (err
== dskFulErr
) {
1004 * Now we have to do a bigger scan. Start at startingBlock and go up until the
1007 err
= BlockAllocateAny(vcb
, startingBlock
, vcb
->allocLimit
,
1008 maxBlocks
, useMetaZone
, actualStartBlock
,
1011 if (err
== dskFulErr
) {
1013 * We may be out of space in the normal zone; go up to the starting block from
1014 * the start of the volume.
1016 err
= BlockAllocateAny(vcb
, 1, startingBlock
, maxBlocks
,
1017 useMetaZone
, actualStartBlock
,
1023 // if we actually allocated something then go update the
1024 // various bits of state that we maintain regardless of
1025 // whether there was an error (i.e. partial allocations
1026 // still need to update things like the free block count).
1028 if (*actualNumBlocks
!= 0) {
1030 // If we used the volume's roving allocation pointer, then we need to update it.
1031 // Adding in the length of the current allocation might reduce the next allocate
1032 // call by avoiding a re-scan of the already allocated space. However, the clump
1033 // just allocated can quite conceivably end up being truncated or released when
1034 // the file is closed or its EOF changed. Leaving the allocation pointer at the
1035 // start of the last allocation will avoid unnecessary fragmentation in this case.
1037 HFS_MOUNT_LOCK(vcb
, TRUE
);
1039 lck_spin_lock(&hfsmp
->vcbFreeExtLock
);
1040 if (vcb
->vcbFreeExtCnt
== 0 && vcb
->hfs_freed_block_count
== 0) {
1041 vcb
->sparseAllocation
= *actualStartBlock
;
1043 lck_spin_unlock(&hfsmp
->vcbFreeExtLock
);
1044 if (*actualNumBlocks
< vcb
->hfs_freed_block_count
) {
1045 vcb
->hfs_freed_block_count
-= *actualNumBlocks
;
1047 vcb
->hfs_freed_block_count
= 0;
1050 if (updateAllocPtr
&&
1051 ((*actualStartBlock
< VCBTOHFS(vcb
)->hfs_metazone_start
) ||
1052 (*actualStartBlock
> VCBTOHFS(vcb
)->hfs_metazone_end
))) {
1053 HFS_UPDATE_NEXT_ALLOCATION(vcb
, *actualStartBlock
);
1056 (void) remove_free_extent_cache(hfsmp
, *actualStartBlock
, *actualNumBlocks
);
1059 * Update the number of free blocks on the volume
1061 * Skip updating the free blocks count if the block are
1062 * being allocated to relocate data as part of hfs_truncatefs()
1064 if ((flags
& HFS_ALLOC_SKIPFREEBLKS
) == 0) {
1065 vcb
->freeBlocks
-= *actualNumBlocks
;
1068 HFS_MOUNT_UNLOCK(vcb
, TRUE
);
1070 hfs_generate_volume_notifications(VCBTOHFS(vcb
));
1075 if (*actualStartBlock
>= hfsmp
->totalBlocks
) {
1076 panic ("BlockAllocate: vending invalid blocks!");
1078 if (*actualStartBlock
>= hfsmp
->allocLimit
) {
1079 panic ("BlockAllocate: vending block past allocLimit!");
1082 if ((*actualStartBlock
+ *actualNumBlocks
) >= hfsmp
->totalBlocks
) {
1083 panic ("BlockAllocate: vending too many invalid blocks!");
1086 if ((*actualStartBlock
+ *actualNumBlocks
) >= hfsmp
->allocLimit
) {
1087 panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
1092 if (hfs_kdebug_allocation
& HFSDBG_ALLOC_ENABLED
)
1093 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE
| DBG_FUNC_END
, err
, *actualStartBlock
, *actualNumBlocks
, 0, 0);
1100 ;________________________________________________________________________________
1102 ; Routine: BlockDeallocate
1104 ; Function: Update the bitmap to deallocate a run of disk allocation blocks
1107 ; vcb - Pointer to ExtendedVCB for the volume to free space on
1108 ; firstBlock - First allocation block to be freed
1109 ; numBlocks - Number of allocation blocks to free up (must be > 0!)
1112 ; (result) - Result code
1115 ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
1116 ; The Allocator's red-black trees may also be modified as a result.
1117 ;________________________________________________________________________________
1120 OSErr
BlockDeallocate (
1121 ExtendedVCB
*vcb
, // Which volume to deallocate space on
1122 u_int32_t firstBlock
, // First block in range to deallocate
1123 u_int32_t numBlocks
, // Number of contiguous blocks to deallocate
1127 struct hfsmount
*hfsmp
;
1128 hfsmp
= VCBTOHFS(vcb
);
1130 if (hfs_kdebug_allocation
& HFSDBG_ALLOC_ENABLED
)
1131 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE
| DBG_FUNC_START
, firstBlock
, numBlocks
, flags
, 0, 0);
1134 // If no blocks to deallocate, then exit early
1136 if (numBlocks
== 0) {
1143 if (firstBlock
>= hfsmp
->totalBlocks
) {
1144 panic ("BlockDeallocate: freeing invalid blocks!");
1147 if ((firstBlock
+ numBlocks
) >= hfsmp
->totalBlocks
) {
1148 panic ("BlockDeallocate: freeing too many invalid blocks!");
1156 * If we're using the red-black tree code, then try to free the
1157 * blocks by marking them in the red-black tree first. If the tree
1158 * is not active for whatever reason (or we're not using the
1159 * R/B Tree code at all), then go straight for the BlockMarkFree
1162 * Remember that we can get into this function if the tree isn't finished
1163 * building. In that case, check to see if the block we're de-allocating is
1164 * past the high watermark
1166 #if CONFIG_HFS_ALLOC_RBTREE
1167 if (hfs_isrbtree_active(VCBTOHFS(vcb
))) {
1169 * BlockMarkFreeRBTree deals with the case where we are resizing the
1170 * filesystem (shrinking), and we need to manipulate the bitmap beyond the portion
1171 * that is currenly controlled by the r/b tree.
1174 //TODO: Update multiplexing code for the half-finished case.
1175 err
= BlockMarkFreeRBTree(vcb
, firstBlock
, numBlocks
);
1176 adjustFreeExtCache
= 0;
1179 err
= BlockMarkFreeInternal(vcb
, firstBlock
, numBlocks
, true);
1183 err
= BlockMarkFreeInternal(vcb
, firstBlock
, numBlocks
, true);
1189 // Update the volume's free block count, and mark the VCB as dirty.
1191 HFS_MOUNT_LOCK(vcb
, TRUE
);
1194 * Do not update the free block count. This flags is specified
1195 * when a volume is being truncated.
1197 if ((flags
& HFS_ALLOC_SKIPFREEBLKS
) == 0) {
1198 vcb
->freeBlocks
+= numBlocks
;
1201 vcb
->hfs_freed_block_count
+= numBlocks
;
1203 if (vcb
->nextAllocation
== (firstBlock
+ numBlocks
)) {
1204 HFS_UPDATE_NEXT_ALLOCATION(vcb
, (vcb
->nextAllocation
- numBlocks
));
1207 if (hfsmp
->jnl
== NULL
) {
1209 * In the journal case, we'll add the free extent once the journal
1210 * calls us back to tell us it wrote the transaction to disk.
1212 (void) add_free_extent_cache(vcb
, firstBlock
, numBlocks
);
1215 * If the journal case, we'll only update sparseAllocation once the
1216 * free extent cache becomes empty (when we remove the last entry
1217 * from the cache). Skipping it here means we're less likely to
1218 * find a recently freed extent via the bitmap before it gets added
1219 * to the free extent cache.
1221 if (firstBlock
< vcb
->sparseAllocation
) {
1222 vcb
->sparseAllocation
= firstBlock
;
1227 HFS_MOUNT_UNLOCK(vcb
, TRUE
);
1229 hfs_generate_volume_notifications(VCBTOHFS(vcb
));
1232 if (hfs_kdebug_allocation
& HFSDBG_ALLOC_ENABLED
)
1233 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE
| DBG_FUNC_END
, err
, 0, 0, 0, 0);
1239 u_int8_t freebitcount
[16] = {
1240 4, 3, 3, 2, 3, 2, 2, 1, /* 0 1 2 3 4 5 6 7 */
1241 3, 2, 2, 1, 2, 1, 1, 0, /* 8 9 A B C D E F */
1245 MetaZoneFreeBlocks(ExtendedVCB
*vcb
)
1247 u_int32_t freeblocks
;
1248 u_int32_t
*currCache
;
1258 bytesleft
= freeblocks
= 0;
1260 bit
= VCBTOHFS(vcb
)->hfs_metazone_start
;
1264 lastbit
= VCBTOHFS(vcb
)->hfs_metazone_end
;
1265 bytesperblock
= vcb
->vcbVBMIOSize
;
1268 * Count all the bits from bit to lastbit.
1270 while (bit
< lastbit
) {
1272 * Get next bitmap block.
1274 if (bytesleft
== 0) {
1276 (void) ReleaseBitmapBlock(vcb
, blockRef
, false);
1279 if (ReadBitmapBlock(vcb
, bit
, &currCache
, &blockRef
) != 0) {
1282 buffer
= (u_int8_t
*)currCache
;
1283 bytesleft
= bytesperblock
;
1286 freeblocks
+= freebitcount
[byte
& 0x0F];
1287 freeblocks
+= freebitcount
[(byte
>> 4) & 0x0F];
1288 bit
+= kBitsPerByte
;
1292 (void) ReleaseBitmapBlock(vcb
, blockRef
, false);
1294 return (freeblocks
);
1299 * Obtain the next allocation block (bit) that's
1300 * outside the metadata allocation zone.
1302 static u_int32_t
NextBitmapBlock(
1306 struct hfsmount
*hfsmp
= VCBTOHFS(vcb
);
1308 if ((hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) == 0)
1311 * Skip over metadata allocation zone.
1313 if ((bit
>= hfsmp
->hfs_metazone_start
) &&
1314 (bit
<= hfsmp
->hfs_metazone_end
)) {
1315 bit
= hfsmp
->hfs_metazone_end
+ 1;
1322 ;_______________________________________________________________________
1324 ; Routine: ReadBitmapBlock
1326 ; Function: Read in a bitmap block corresponding to a given allocation
1327 ; block (bit). Return a pointer to the bitmap block.
1330 ; vcb -- Pointer to ExtendedVCB
1331 ; bit -- Allocation block whose bitmap block is desired
1334 ; buffer -- Pointer to bitmap block corresonding to "block"
1336 ;_______________________________________________________________________
1338 static OSErr
ReadBitmapBlock(
1342 uintptr_t *blockRef
)
1345 struct buf
*bp
= NULL
;
1346 struct vnode
*vp
= NULL
;
1348 u_int32_t blockSize
;
1350 if (hfs_kdebug_allocation
& HFSDBG_BITMAP_ENABLED
)
1351 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK
| DBG_FUNC_START
, bit
, 0, 0, 0, 0);
1354 * volume bitmap blocks are protected by the allocation file lock
1356 REQUIRE_FILE_LOCK(vcb
->hfs_allocation_vp
, false);
1358 blockSize
= (u_int32_t
)vcb
->vcbVBMIOSize
;
1359 block
= (daddr64_t
)(bit
/ (blockSize
* kBitsPerByte
));
1361 if (vcb
->vcbSigWord
== kHFSPlusSigWord
) {
1362 vp
= vcb
->hfs_allocation_vp
; /* use allocation file vnode */
1365 vp
= VCBTOHFS(vcb
)->hfs_devvp
; /* use device I/O vnode */
1366 block
+= vcb
->vcbVBMSt
; /* map to physical block */
1369 err
= (int)buf_meta_bread(vp
, block
, blockSize
, NOCRED
, &bp
);
1377 *blockRef
= (uintptr_t)bp
;
1378 *buffer
= (u_int32_t
*)buf_dataptr(bp
);
1382 if (hfs_kdebug_allocation
& HFSDBG_BITMAP_ENABLED
)
1383 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK
| DBG_FUNC_END
, err
, 0, 0, 0, 0);
1390 ;_______________________________________________________________________
1392 ; Routine: ReleaseBitmapBlock
1394 ; Function: Relase a bitmap block.
1400 ;_______________________________________________________________________
1402 static OSErr
ReleaseBitmapBlock(
1407 struct buf
*bp
= (struct buf
*)blockRef
;
1409 if (hfs_kdebug_allocation
& HFSDBG_BITMAP_ENABLED
)
1410 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK
| DBG_FUNC_START
, dirty
, 0, 0, 0, 0);
1412 if (blockRef
== 0) {
1414 panic("hfs: ReleaseBitmapBlock: missing bp");
1421 struct hfsmount
*hfsmp
= VCBTOHFS(vcb
);
1424 journal_modify_block_end(hfsmp
->jnl
, bp
, NULL
, NULL
);
1433 if (hfs_kdebug_allocation
& HFSDBG_BITMAP_ENABLED
)
1434 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
1440 * ReleaseScanBitmapBlock is used to release struct bufs that were
1441 * created for use by bitmap scanning code. We want to force
1442 * them to be purged out of the buffer cache ASAP, so we'll release them differently
1443 * than in the ReleaseBitmapBlock case. Alternately, we know that we're only reading
1444 * the blocks, so we will never dirty them as part of the tree building scan.
1447 static OSErr
ReleaseScanBitmapBlock(struct buf
*bp
) {
1454 /* Mark the buffer invalid if it isn't locked, then release it */
1455 if ((buf_flags(bp
) & B_LOCKED
) == 0) {
1456 buf_markinvalid(bp
);
1467 _______________________________________________________________________
1469 Routine: BlockAllocateContig
1471 Function: Allocate a contiguous group of allocation blocks. The
1472 allocation is all-or-nothing. The caller guarantees that
1473 there are enough free blocks (though they may not be
1474 contiguous, in which case this call will fail).
1477 vcb Pointer to volume where space is to be allocated
1478 startingBlock Preferred first block for allocation
1479 minBlocks Minimum number of contiguous blocks to allocate
1480 maxBlocks Maximum number of contiguous blocks to allocate
1484 actualStartBlock First block of range allocated, or 0 if error
1485 actualNumBlocks Number of blocks allocated, or 0 if error
1486 _______________________________________________________________________
1488 static OSErr
BlockAllocateContig(
1490 u_int32_t startingBlock
,
1491 u_int32_t minBlocks
,
1492 u_int32_t maxBlocks
,
1493 Boolean useMetaZone
,
1494 u_int32_t
*actualStartBlock
,
1495 u_int32_t
*actualNumBlocks
)
1498 #if CONFIG_HFS_ALLOC_RBTREE
1499 if (hfs_isrbtree_active(VCBTOHFS(vcb
))) {
1500 return BlockAllocateContigRBTree(vcb
, startingBlock
, minBlocks
, maxBlocks
, useMetaZone
,
1501 actualStartBlock
, actualNumBlocks
, 1);
1504 return BlockAllocateContigBitmap(vcb
, startingBlock
, minBlocks
,
1505 maxBlocks
, useMetaZone
, actualStartBlock
, actualNumBlocks
);
1509 * Variant of BlockAllocateContig that uses the original bitmap-searching logic
1512 static OSErr
BlockAllocateContigBitmap(
1514 u_int32_t startingBlock
,
1515 u_int32_t minBlocks
,
1516 u_int32_t maxBlocks
,
1517 Boolean useMetaZone
,
1518 u_int32_t
*actualStartBlock
,
1519 u_int32_t
*actualNumBlocks
)
1523 if (hfs_kdebug_allocation
& HFSDBG_ALLOC_ENABLED
)
1524 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP
| DBG_FUNC_START
, startingBlock
, minBlocks
, maxBlocks
, useMetaZone
, 0);
1527 // Find a contiguous group of blocks at least minBlocks long.
1528 // Determine the number of contiguous blocks available (up
1533 * NOTE: If the only contiguous free extent of at least minBlocks
1534 * crosses startingBlock (i.e. starts before, ends after), then we
1535 * won't find it. Earlier versions *did* find this case by letting
1536 * the second search look past startingBlock by minBlocks. But
1537 * with the free extent cache, this can lead to duplicate entries
1538 * in the cache, causing the same blocks to be allocated twice.
1540 err
= BlockFindContiguous(vcb
, startingBlock
, vcb
->allocLimit
, minBlocks
,
1541 maxBlocks
, useMetaZone
, actualStartBlock
, actualNumBlocks
);
1542 if (err
== dskFulErr
&& startingBlock
!= 0) {
1544 * Constrain the endingBlock so we don't bother looking for ranges
1545 * that would overlap those found in the previous call.
1547 err
= BlockFindContiguous(vcb
, 1, startingBlock
, minBlocks
, maxBlocks
,
1548 useMetaZone
, actualStartBlock
, actualNumBlocks
);
1551 // Now mark those blocks allocated.
1554 err
= BlockMarkAllocatedInternal(vcb
, *actualStartBlock
, *actualNumBlocks
);
1556 if (hfs_kdebug_allocation
& HFSDBG_ALLOC_ENABLED
)
1557 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP
| DBG_FUNC_END
, err
, *actualStartBlock
, *actualNumBlocks
, 0, 0);
1562 #if CONFIG_HFS_ALLOC_RBTREE
1564 * Variant of BlockAllocateContig that uses the newer red-black tree library
1565 * in order to manage free space extents. This will search the red-black tree
1566 * and return results in the same fashion as BlockAllocateContigBitmap.
1568 * Note that this function is invoked from both the red-black tree variant of BlockAllocateany
1569 * as well as BlockAllocateContig. In order to determine when we should vend contiguous chunks over
1570 * locality-based-searches, we use the forceContig argument to determine who called us.
1573 static OSErr
BlockAllocateContigRBTree(
1575 u_int32_t startingBlock
,
1576 u_int32_t minBlocks
,
1577 u_int32_t maxBlocks
,
1578 Boolean useMetaZone
,
1579 u_int32_t
*actualStartBlock
,
1580 u_int32_t
*actualNumBlocks
,
1581 u_int32_t forceContig
)
1584 struct hfsmount
*hfsmp
= VCBTOHFS(vcb
);
1585 extent_node_t search_sentinel
;
1586 extent_node_t
*node
= NULL
;
1587 extent_node_t tempnode
;
1589 bzero (&tempnode
, sizeof(extent_node_t
));
1591 /* Begin search at the end of the file, via startingBlock */
1592 memset (&search_sentinel
, 0, sizeof(extent_node_t
));
1593 search_sentinel
.offset
= startingBlock
;
1595 *actualStartBlock
= 0;
1596 *actualNumBlocks
= 0;
1599 * Find the first available extent that satifies the allocation by searching
1600 * from the starting point and moving forward
1602 node
= extent_tree_off_search_next(&hfsmp
->offset_tree
, &search_sentinel
);
1605 *actualStartBlock
= node
->offset
;
1606 *actualNumBlocks
= node
->length
;
1609 /* If we managed to grab at least minBlocks of space, then we're done. */
1611 if (*actualNumBlocks
>= minBlocks
) {
1612 if (*actualNumBlocks
> maxBlocks
) {
1613 *actualNumBlocks
= maxBlocks
;
1617 /* Check to see if blocks are already marked as in-use */
1619 REQUIRE_FILE_LOCK(vcb
->hfs_allocation_vp
, false);
1620 if (hfs_isallocated(hfsmp
, *actualStartBlock
, *actualNumBlocks
)) {
1621 printf("bad node: %p, offset %d, length %d\n", node
, node
->offset
,node
->length
);
1622 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n",
1623 *actualStartBlock
, *actualNumBlocks
);
1628 * BlockMarkAllocatedRBTree is responsible for removing the nodes
1629 * from the red-black tree after the bitmap has been updated on-disk.
1631 err
= BlockMarkAllocatedRBTree(vcb
, *actualStartBlock
, *actualNumBlocks
);
1634 if ( ALLOC_DEBUG
) {
1635 REQUIRE_FILE_LOCK(vcb
->hfs_allocation_vp
, false);
1636 if (!hfs_isallocated(hfsmp
, *actualStartBlock
, *actualNumBlocks
)) {
1637 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
1638 *actualStartBlock
, *actualNumBlocks
);
1640 check_rbtree_extents (VCBTOHFS(vcb
), *actualStartBlock
, *actualNumBlocks
, ASSERT_ALLOC
);
1648 * We may have failed to grow at the end of the file. We'll try to find
1649 * appropriate free extents, searching by size in the normal allocation zone.
1651 * However, if we're allocating on behalf of a sparse device that hasn't explicitly
1652 * requested a contiguous chunk, then we try to search by offset, even if it
1653 * means fragmenting the file. We want all available entries starting
1654 * from the front of the disk to avoid creating new bandfiles. As a result,
1655 * we'll start by searching the offset tree rather than the normal length
1656 * tree. Note that this function can be invoked from BlockAllocateAny, in
1657 * which the minimum block size is 1 block, making it easy to succeed.
1659 search_sentinel
.offset
= hfsmp
->hfs_metazone_end
;
1660 search_sentinel
.length
= minBlocks
;
1662 if ((vcb
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) && (forceContig
== 0)) {
1663 /* just start with the first offset node */
1664 node
= extent_tree_off_search_next(&hfsmp
->offset_tree
, &search_sentinel
);
1668 * Otherwise, start from the end of the metadata zone or our next allocation pointer,
1669 * and try to find the first chunk of size >= min.
1671 node
= extent_tree_off_search_nextWithSize (&hfsmp
->offset_tree
, &search_sentinel
);
1674 extent_node_t
*metaend_node
;
1676 * Maybe there's a free extent coalesced with the space still in the metadata
1677 * zone. If there is, find it and allocate from the middle of it, starting at
1678 * the end of the metadata zone.
1680 * If search_prev yields a result that is not offset == metazone_end, then that
1681 * means no node existed at that offset. If the previous node's offset + length crosses
1682 * the metazone boundary, then allocate from there. If it is too small to
1683 * cross the metazone boundary, then it is of no importance and we'd have to
1686 metaend_node
= extent_tree_off_search_prev(&hfsmp
->offset_tree
, &search_sentinel
);
1688 if ((metaend_node
) && (metaend_node
->offset
< hfsmp
->hfs_metazone_end
)) {
1689 u_int32_t node_end
= metaend_node
->offset
+ metaend_node
->length
;
1690 if (node_end
> hfsmp
->hfs_metazone_end
) {
1691 u_int32_t modified_length
= node_end
- hfsmp
->hfs_metazone_end
;
1692 if (modified_length
>= minBlocks
) {
1694 * Then we can allocate it. Fill in the contents into tempnode,
1695 * and BlockMarkAllocatedRBTree below will take care of the rest.
1697 tempnode
.offset
= hfsmp
->hfs_metazone_end
;
1698 tempnode
.length
= MIN(minBlocks
, node_end
- tempnode
.offset
);
1706 /* If we can't find anything useful, search the metadata zone as a last resort. */
1708 if ((!node
) && useMetaZone
) {
1709 search_sentinel
.offset
= 0;
1710 search_sentinel
.length
= minBlocks
;
1711 node
= extent_tree_off_search_nextWithSize (&hfsmp
->offset_tree
, &search_sentinel
);
1714 /* If we found something useful, then go ahead and update the bitmap */
1715 if ((node
) && (node
->length
>= minBlocks
)) {
1716 *actualStartBlock
= node
->offset
;
1717 if (node
->length
>= maxBlocks
) {
1718 *actualNumBlocks
= maxBlocks
;
1721 *actualNumBlocks
= node
->length
;
1724 err
= BlockMarkAllocatedRBTree(vcb
, *actualStartBlock
, *actualNumBlocks
);
1727 if ( ALLOC_DEBUG
) {
1728 REQUIRE_FILE_LOCK(vcb
->hfs_allocation_vp
, false);
1729 if (!hfs_isallocated(hfsmp
, *actualStartBlock
, *actualNumBlocks
)) {
1730 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
1731 *actualStartBlock
, *actualNumBlocks
);
1733 check_rbtree_extents (VCBTOHFS(vcb
), *actualStartBlock
, *actualNumBlocks
, ASSERT_ALLOC
);
1738 int destroy_trees
= 0;
1740 * TODO: Add High-water mark check here. If we couldn't find anything useful,
1741 * when do we tear down the tree? Or should the logic be in BlockAllocateContig??
1743 if (destroy_trees
) {
1744 DestroyTrees(VCBTOHFS(vcb
));
1745 /* Reset the Free Ext Cache since we'll be using it now. */
1746 ResetVCBFreeExtCache(VCBTOHFS(vcb
));
1750 printf("HFS allocator: No space on FS (%s). Node %p Start %d Min %d, Max %d, Tree still alive.\n",
1751 hfsmp
->vcbVN
, node
, startingBlock
, minBlocks
, maxBlocks
);
1753 /* Dump the list ? */
1754 extent_tree_offset_print(&hfsmp
->offset_tree
);
1756 printf("HFS allocator: Done printing list on FS (%s). Min %d, Max %d, Tree still alive.\n",
1757 hfsmp
->vcbVN
, minBlocks
, maxBlocks
);
1767 if ((*actualStartBlock
+ *actualNumBlocks
) > vcb
->allocLimit
)
1768 panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb
->vcbVN
);
1772 *actualStartBlock
= 0;
1773 *actualNumBlocks
= 0;
1784 _______________________________________________________________________
1786 Routine: BlockAllocateAny
1788 Function: Allocate one or more allocation blocks. If there are fewer
1789 free blocks than requested, all free blocks will be
1790 allocated. The caller guarantees that there is at least
1794 vcb Pointer to volume where space is to be allocated
1795 startingBlock Preferred first block for allocation
1796 endingBlock Last block to check + 1
1797 maxBlocks Maximum number of contiguous blocks to allocate
1801 actualStartBlock First block of range allocated, or 0 if error
1802 actualNumBlocks Number of blocks allocated, or 0 if error
1803 _______________________________________________________________________
1807 * BlockAllocateAny acts as a multiplexer between BlockAllocateAnyRBTree
1808 * and BlockAllocateAnyBitmap, which uses the bitmap scanning logic.
1811 static OSErr
BlockAllocateAny(
1813 u_int32_t startingBlock
,
1814 register u_int32_t endingBlock
,
1815 u_int32_t maxBlocks
,
1816 Boolean useMetaZone
,
1817 u_int32_t
*actualStartBlock
,
1818 u_int32_t
*actualNumBlocks
)
1821 #if CONFIG_HFS_ALLOC_RBTREE
1822 if (hfs_isrbtree_active(VCBTOHFS(vcb
))) {
1823 return BlockAllocateAnyRBTree(vcb
, startingBlock
, maxBlocks
, useMetaZone
, actualStartBlock
, actualNumBlocks
);
1826 return BlockAllocateAnyBitmap(vcb
, startingBlock
, endingBlock
, maxBlocks
, useMetaZone
, actualStartBlock
, actualNumBlocks
);
1831 #if CONFIG_HFS_ALLOC_RBTREE
1833 * BlockAllocateAnyRBTree finds one or more allocation blocks by using
1834 * the red-black allocation tree to figure out where the free ranges are.
1835 * This function is typically used as a last resort becuase we were unable to
1836 * find the right ranges. Outputs are the same as BlockAllocateAnyBitmap.
1838 static OSErr
BlockAllocateAnyRBTree(
1840 u_int32_t startingBlock
,
1841 u_int32_t maxBlocks
,
1842 Boolean useMetaZone
,
1843 u_int32_t
*actualStartBlock
,
1844 u_int32_t
*actualNumBlocks
)
1849 * BlockAllocateContig
1851 /* If we're using the red-black tree, try searching at the specified offsets. */
1852 err
= BlockAllocateContigRBTree(vcb
, startingBlock
, 1, maxBlocks
, useMetaZone
,
1853 actualStartBlock
, actualNumBlocks
, 0);
1860 * BlockAllocateAnyBitmap finds free ranges by scanning the bitmap to figure out
1861 * where the free allocation blocks are. Inputs and outputs are the same as for
1862 * BlockAllocateAny and BlockAllocateAnyRBTree
1865 static OSErr
BlockAllocateAnyBitmap(
1867 u_int32_t startingBlock
,
1868 register u_int32_t endingBlock
,
1869 u_int32_t maxBlocks
,
1870 Boolean useMetaZone
,
1871 u_int32_t
*actualStartBlock
,
1872 u_int32_t
*actualNumBlocks
)
1875 register u_int32_t block
; // current block number
1876 register u_int32_t currentWord
; // Pointer to current word within bitmap block
1877 register u_int32_t bitMask
; // Word with given bits already set (ready to OR in)
1878 register u_int32_t wordsLeft
; // Number of words left in this bitmap block
1879 u_int32_t
*buffer
= NULL
;
1880 u_int32_t
*currCache
= NULL
;
1882 u_int32_t bitsPerBlock
;
1883 u_int32_t wordsPerBlock
;
1884 Boolean dirty
= false;
1885 struct hfsmount
*hfsmp
= VCBTOHFS(vcb
);
1887 if (hfs_kdebug_allocation
& HFSDBG_ALLOC_ENABLED
)
1888 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP
| DBG_FUNC_START
, startingBlock
, endingBlock
, maxBlocks
, useMetaZone
, 0);
1891 * When we're skipping the metadata zone and the start/end
1892 * range overlaps with the metadata zone then adjust the
1893 * start to be outside of the metadata zone. If the range
1894 * is entirely inside the metadata zone then we can deny the
1895 * request (dskFulErr).
1897 if (!useMetaZone
&& (vcb
->hfs_flags
& HFS_METADATA_ZONE
)) {
1898 if (startingBlock
<= vcb
->hfs_metazone_end
) {
1899 if (endingBlock
> (vcb
->hfs_metazone_end
+ 2))
1900 startingBlock
= vcb
->hfs_metazone_end
+ 1;
1908 // Since this routine doesn't wrap around
1909 if (maxBlocks
> (endingBlock
- startingBlock
)) {
1910 maxBlocks
= endingBlock
- startingBlock
;
1914 // Pre-read the first bitmap block
1916 err
= ReadBitmapBlock(vcb
, startingBlock
, &currCache
, &blockRef
);
1917 if (err
!= noErr
) goto Exit
;
1921 // Set up the current position within the block
1924 u_int32_t wordIndexInBlock
;
1926 bitsPerBlock
= vcb
->vcbVBMIOSize
* kBitsPerByte
;
1927 wordsPerBlock
= vcb
->vcbVBMIOSize
/ kBytesPerWord
;
1929 wordIndexInBlock
= (startingBlock
& (bitsPerBlock
-1)) / kBitsPerWord
;
1930 buffer
+= wordIndexInBlock
;
1931 wordsLeft
= wordsPerBlock
- wordIndexInBlock
;
1932 currentWord
= SWAP_BE32 (*buffer
);
1933 bitMask
= kHighBitInWordMask
>> (startingBlock
& kBitsWithinWordMask
);
1937 // Find the first unallocated block
1939 block
=startingBlock
;
1940 while (block
< endingBlock
) {
1941 if ((currentWord
& bitMask
) == 0)
1949 bitMask
= kHighBitInWordMask
;
1952 if (--wordsLeft
== 0) {
1954 buffer
= currCache
= NULL
;
1955 err
= ReleaseBitmapBlock(vcb
, blockRef
, false);
1956 if (err
!= noErr
) goto Exit
;
1959 * Skip over metadata blocks.
1962 block
= NextBitmapBlock(vcb
, block
);
1964 if (block
>= endingBlock
) {
1969 err
= ReadBitmapBlock(vcb
, block
, &currCache
, &blockRef
);
1970 if (err
!= noErr
) goto Exit
;
1973 wordsLeft
= wordsPerBlock
;
1975 currentWord
= SWAP_BE32 (*buffer
);
1979 // Did we get to the end of the bitmap before finding a free block?
1980 // If so, then couldn't allocate anything.
1981 if (block
>= endingBlock
) {
1986 // Return the first block in the allocated range
1987 *actualStartBlock
= block
;
1990 // If we could get the desired number of blocks before hitting endingBlock,
1991 // then adjust endingBlock so we won't keep looking. Ideally, the comparison
1992 // would be (block + maxBlocks) < endingBlock, but that could overflow. The
1993 // comparison below yields identical results, but without overflow.
1994 if (block
< (endingBlock
-maxBlocks
)) {
1995 endingBlock
= block
+ maxBlocks
; // if we get this far, we've found enough
2000 journal_modify_block_start(hfsmp
->jnl
, (struct buf
*)blockRef
);
2004 // Allocate all of the consecutive blocks
2006 while ((currentWord
& bitMask
) == 0) {
2007 // Allocate this block
2008 currentWord
|= bitMask
;
2010 // Move to the next block. If no more, then exit.
2012 if (block
== endingBlock
)
2018 *buffer
= SWAP_BE32 (currentWord
); // update value in bitmap
2021 bitMask
= kHighBitInWordMask
;
2024 if (--wordsLeft
== 0) {
2026 buffer
= currCache
= NULL
;
2027 err
= ReleaseBitmapBlock(vcb
, blockRef
, true);
2028 if (err
!= noErr
) goto Exit
;
2031 * Skip over metadata blocks.
2034 u_int32_t nextBlock
;
2036 nextBlock
= NextBitmapBlock(vcb
, block
);
2037 if (nextBlock
!= block
) {
2038 goto Exit
; /* allocation gap, so stop */
2042 err
= ReadBitmapBlock(vcb
, block
, &currCache
, &blockRef
);
2043 if (err
!= noErr
) goto Exit
;
2048 journal_modify_block_start(hfsmp
->jnl
, (struct buf
*)blockRef
);
2051 wordsLeft
= wordsPerBlock
;
2054 currentWord
= SWAP_BE32 (*buffer
);
2057 *buffer
= SWAP_BE32 (currentWord
); // update the last change
2061 *actualNumBlocks
= block
- *actualStartBlock
;
2064 if ((*actualStartBlock
+ *actualNumBlocks
) > vcb
->allocLimit
) {
2065 panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb
->vcbVN
);
2070 * Because this function directly manipulates the bitmap to mark the
2071 * blocks it came across as allocated, we must inform the journal (and
2072 * subsequently, the journal's trim list) that we are allocating these
2073 * blocks, just like in BlockMarkAllocatedInternal. hfs_unmap_alloc_extent
2074 * and the functions it calls will serialize behind the journal trim list lock
2075 * to ensure that either the asynchronous flush/TRIM/UNMAP happens prior to
2076 * us manipulating the trim list, or we get there first and successfully remove
2077 * these bitmap blocks before the TRIM happens.
2079 hfs_unmap_alloc_extent (vcb
, *actualStartBlock
, *actualNumBlocks
);
2082 *actualStartBlock
= 0;
2083 *actualNumBlocks
= 0;
2087 (void) ReleaseBitmapBlock(vcb
, blockRef
, dirty
);
2089 if (hfs_kdebug_allocation
& HFSDBG_ALLOC_ENABLED
)
2090 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP
| DBG_FUNC_END
, err
, *actualStartBlock
, *actualNumBlocks
, 0, 0);
2097 _______________________________________________________________________
2099 Routine: BlockAllocateKnown
2101 Function: Try to allocate space from known free space in the free
2105 vcb Pointer to volume where space is to be allocated
2106 maxBlocks Maximum number of contiguous blocks to allocate
2109 actualStartBlock First block of range allocated, or 0 if error
2110 actualNumBlocks Number of blocks allocated, or 0 if error
2113 dskFulErr Free extent cache is empty
2114 _______________________________________________________________________
2117 static OSErr
BlockAllocateKnown(
2119 u_int32_t maxBlocks
,
2120 u_int32_t
*actualStartBlock
,
2121 u_int32_t
*actualNumBlocks
)
2124 u_int32_t foundBlocks
;
2126 if (hfs_kdebug_allocation
& HFSDBG_ALLOC_ENABLED
)
2127 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP
| DBG_FUNC_START
, 0, 0, maxBlocks
, 0, 0);
2129 HFS_MOUNT_LOCK(vcb
, TRUE
);
2130 lck_spin_lock(&vcb
->vcbFreeExtLock
);
2131 if ((hfs_isrbtree_active(vcb
) == true) ||
2132 vcb
->vcbFreeExtCnt
== 0 ||
2133 vcb
->vcbFreeExt
[0].blockCount
== 0) {
2134 lck_spin_unlock(&vcb
->vcbFreeExtLock
);
2135 HFS_MOUNT_UNLOCK(vcb
, TRUE
);
2136 if (hfs_kdebug_allocation
& HFSDBG_ALLOC_ENABLED
)
2137 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP
| DBG_FUNC_END
, dskFulErr
, *actualStartBlock
, *actualNumBlocks
, 0, 0);
2140 lck_spin_unlock(&vcb
->vcbFreeExtLock
);
2141 HFS_MOUNT_UNLOCK(vcb
, TRUE
);
2143 lck_spin_lock(&vcb
->vcbFreeExtLock
);
2145 // Just grab up to maxBlocks of the first (largest) free exent.
2146 *actualStartBlock
= vcb
->vcbFreeExt
[0].startBlock
;
2147 foundBlocks
= vcb
->vcbFreeExt
[0].blockCount
;
2148 if (foundBlocks
> maxBlocks
)
2149 foundBlocks
= maxBlocks
;
2150 *actualNumBlocks
= foundBlocks
;
2152 lck_spin_unlock(&vcb
->vcbFreeExtLock
);
2154 remove_free_extent_cache(vcb
, *actualStartBlock
, *actualNumBlocks
);
2157 if ((*actualStartBlock
+ *actualNumBlocks
) > vcb
->allocLimit
)
2159 printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb
->vcbVN
);
2160 hfs_mark_volume_inconsistent(vcb
);
2161 *actualStartBlock
= 0;
2162 *actualNumBlocks
= 0;
2168 // Now mark the found extent in the bitmap
2170 err
= BlockMarkAllocatedInternal(vcb
, *actualStartBlock
, *actualNumBlocks
);
2173 sanity_check_free_ext(vcb
, 0);
2175 if (hfs_kdebug_allocation
& HFSDBG_ALLOC_ENABLED
)
2176 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP
| DBG_FUNC_END
, err
, *actualStartBlock
, *actualNumBlocks
, 0, 0);
2182 * BlockMarkAllocated
2184 * This is a wrapper function around the internal calls which will actually mark the blocks
2185 * as in-use. It will mark the blocks in the red-black tree if appropriate. We need to do
2186 * this logic here to avoid callers having to deal with whether or not the red-black tree
2191 OSErr
BlockMarkAllocated(
2193 u_int32_t startingBlock
,
2194 register u_int32_t numBlocks
)
2196 struct hfsmount
*hfsmp
;
2198 hfsmp
= VCBTOHFS(vcb
);
2199 #if CONFIG_HFS_ALLOC_RBTREE
2200 if (hfs_isrbtree_active(hfsmp
)) {
2203 if ((startingBlock
>= hfsmp
->offset_block_end
) &&
2204 (hfsmp
->hfs_flags
& HFS_RESIZE_IN_PROGRESS
)) {
2206 * We're manipulating a portion of the bitmap that is not controlled by the
2207 * red-black tree. Just update the bitmap and don't bother manipulating the tree
2212 err
= BlockMarkAllocatedRBTree(vcb
, startingBlock
, numBlocks
);
2214 if ( ALLOC_DEBUG
) {
2215 REQUIRE_FILE_LOCK(hfsmp
->hfs_allocation_vp
, false);
2216 if (!hfs_isallocated(hfsmp
, startingBlock
, numBlocks
)) {
2217 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
2218 startingBlock
, numBlocks
);
2220 check_rbtree_extents (hfsmp
, startingBlock
, numBlocks
, ASSERT_ALLOC
);
2229 return BlockMarkAllocatedInternal(vcb
, startingBlock
, numBlocks
);
2236 _______________________________________________________________________
2238 Routine: BlockMarkAllocatedInternal
2240 Function: Mark a contiguous group of blocks as allocated (set in the
2241 bitmap). It assumes those bits are currently marked
2242 deallocated (clear in the bitmap). Note that this function
2243 must be called regardless of whether or not the bitmap or
2244 tree-based allocator is used, as all allocations must correctly
2245 be marked on-disk. If the tree-based approach is running, then
2246 this will be done before the node is removed from the tree.
2249 vcb Pointer to volume where space is to be allocated
2250 startingBlock First block number to mark as allocated
2251 numBlocks Number of blocks to mark as allocated
2252 _______________________________________________________________________
2255 OSErr
BlockMarkAllocatedInternal (
2257 u_int32_t startingBlock
,
2258 register u_int32_t numBlocks
)
2261 register u_int32_t
*currentWord
; // Pointer to current word within bitmap block
2262 register u_int32_t wordsLeft
; // Number of words left in this bitmap block
2263 register u_int32_t bitMask
; // Word with given bits already set (ready to OR in)
2264 u_int32_t firstBit
; // Bit index within word of first bit to allocate
2265 u_int32_t numBits
; // Number of bits in word to allocate
2266 u_int32_t
*buffer
= NULL
;
2268 u_int32_t bitsPerBlock
;
2269 u_int32_t wordsPerBlock
;
2271 struct hfsmount
*hfsmp
= VCBTOHFS(vcb
);
2273 if (hfs_kdebug_allocation
& HFSDBG_BITMAP_ENABLED
)
2274 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP
| DBG_FUNC_START
, startingBlock
, numBlocks
, 0, 0, 0);
2276 hfs_unmap_alloc_extent(vcb
, startingBlock
, numBlocks
);
2279 // Pre-read the bitmap block containing the first word of allocation
2282 err
= ReadBitmapBlock(vcb
, startingBlock
, &buffer
, &blockRef
);
2283 if (err
!= noErr
) goto Exit
;
2285 // Initialize currentWord, and wordsLeft.
2288 u_int32_t wordIndexInBlock
;
2290 bitsPerBlock
= vcb
->vcbVBMIOSize
* kBitsPerByte
;
2291 wordsPerBlock
= vcb
->vcbVBMIOSize
/ kBytesPerWord
;
2293 wordIndexInBlock
= (startingBlock
& (bitsPerBlock
-1)) / kBitsPerWord
;
2294 currentWord
= buffer
+ wordIndexInBlock
;
2295 wordsLeft
= wordsPerBlock
- wordIndexInBlock
;
2300 journal_modify_block_start(hfsmp
->jnl
, (struct buf
*)blockRef
);
2304 // If the first block to allocate doesn't start on a word
2305 // boundary in the bitmap, then treat that first word
2309 firstBit
= startingBlock
% kBitsPerWord
;
2310 if (firstBit
!= 0) {
2311 bitMask
= kAllBitsSetInWord
>> firstBit
; // turn off all bits before firstBit
2312 numBits
= kBitsPerWord
- firstBit
; // number of remaining bits in this word
2313 if (numBits
> numBlocks
) {
2314 numBits
= numBlocks
; // entire allocation is inside this one word
2315 bitMask
&= ~(kAllBitsSetInWord
>> (firstBit
+ numBits
)); // turn off bits after last
2318 if ((*currentWord
& SWAP_BE32 (bitMask
)) != 0) {
2319 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2322 *currentWord
|= SWAP_BE32 (bitMask
); // set the bits in the bitmap
2323 numBlocks
-= numBits
; // adjust number of blocks left to allocate
2325 ++currentWord
; // move to next word
2326 --wordsLeft
; // one less word left in this block
2330 // Allocate whole words (32 blocks) at a time.
2333 bitMask
= kAllBitsSetInWord
; // put this in a register for 68K
2334 while (numBlocks
>= kBitsPerWord
) {
2335 if (wordsLeft
== 0) {
2336 // Read in the next bitmap block
2337 startingBlock
+= bitsPerBlock
; // generate a block number in the next bitmap block
2340 err
= ReleaseBitmapBlock(vcb
, blockRef
, true);
2341 if (err
!= noErr
) goto Exit
;
2343 err
= ReadBitmapBlock(vcb
, startingBlock
, &buffer
, &blockRef
);
2344 if (err
!= noErr
) goto Exit
;
2348 journal_modify_block_start(hfsmp
->jnl
, (struct buf
*)blockRef
);
2351 // Readjust currentWord and wordsLeft
2352 currentWord
= buffer
;
2353 wordsLeft
= wordsPerBlock
;
2356 if (*currentWord
!= 0) {
2357 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2360 *currentWord
= SWAP_BE32 (bitMask
);
2361 numBlocks
-= kBitsPerWord
;
2363 ++currentWord
; // move to next word
2364 --wordsLeft
; // one less word left in this block
2368 // Allocate any remaining blocks.
2371 if (numBlocks
!= 0) {
2372 bitMask
= ~(kAllBitsSetInWord
>> numBlocks
); // set first numBlocks bits
2373 if (wordsLeft
== 0) {
2374 // Read in the next bitmap block
2375 startingBlock
+= bitsPerBlock
; // generate a block number in the next bitmap block
2378 err
= ReleaseBitmapBlock(vcb
, blockRef
, true);
2379 if (err
!= noErr
) goto Exit
;
2381 err
= ReadBitmapBlock(vcb
, startingBlock
, &buffer
, &blockRef
);
2382 if (err
!= noErr
) goto Exit
;
2386 journal_modify_block_start(hfsmp
->jnl
, (struct buf
*)blockRef
);
2389 // Readjust currentWord and wordsLeft
2390 currentWord
= buffer
;
2391 wordsLeft
= wordsPerBlock
;
2394 if ((*currentWord
& SWAP_BE32 (bitMask
)) != 0) {
2395 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2398 *currentWord
|= SWAP_BE32 (bitMask
); // set the bits in the bitmap
2400 // No need to update currentWord or wordsLeft
2406 (void)ReleaseBitmapBlock(vcb
, blockRef
, true);
2408 if (hfs_kdebug_allocation
& HFSDBG_BITMAP_ENABLED
)
2409 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP
| DBG_FUNC_END
, err
, 0, 0, 0, 0);
2414 #if CONFIG_HFS_ALLOC_RBTREE
2416 * This is a wrapper function around BlockMarkAllocated. This function is
2417 * called when the RB Tree-based allocator needs to mark a block as in-use.
2418 * This function should take the locks that would not normally be
2419 * necessary for the normal bitmap allocator, and then call the function. Once
2420 * the on-disk data structures are updated properly, then this will remove the
2421 * appropriate node from the tree.
2424 static OSErr
BlockMarkAllocatedRBTree(
2426 u_int32_t startingBlock
,
2427 u_int32_t numBlocks
)
2430 struct hfsmount
*hfsmp
= VCBTOHFS(vcb
);
2435 REQUIRE_FILE_LOCK(vcb
->hfs_allocation_vp
, false);
2436 if (hfs_isallocated(hfsmp
, startingBlock
, numBlocks
)) {
2437 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n",
2438 startingBlock
, numBlocks
);
2440 check_rbtree_extents (VCBTOHFS(vcb
), startingBlock
, numBlocks
, ASSERT_FREE
);
2443 err
= BlockMarkAllocatedInternal (vcb
, startingBlock
, numBlocks
);
2448 if (!hfs_isallocated(hfsmp
, startingBlock
, numBlocks
)) {
2449 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet!\n",
2450 startingBlock
, numBlocks
);
2455 * Mark the blocks in the offset tree.
2457 rb_err
= extent_tree_offset_alloc_space(&hfsmp
->offset_tree
, numBlocks
, startingBlock
);
2460 printf("HFS RBTree Allocator: Could not mark blocks as in-use! %d \n", rb_err
);
2464 * We may be called from the BlockMarkAllocated interface, in which case, they would
2465 * not be picking extents from their start. Do a check here, find if the specified
2466 * extent is free, and if it is, then find the containing node.
2468 extent_node_t
*node
= NULL
;
2469 extent_node_t search_sentinel
;
2470 search_sentinel
.offset
= startingBlock
;
2472 node
= extent_tree_off_search_prev(&hfsmp
->offset_tree
, &search_sentinel
);
2475 rb_err
= extent_tree_offset_alloc_unaligned (&hfsmp
->offset_tree
, numBlocks
, startingBlock
);
2480 printf ("HFS RBTree Allocator: Still Couldn't mark blocks as in-use! %d\n", rb_err
);
2485 check_rbtree_extents (VCBTOHFS(vcb
), startingBlock
, numBlocks
, ASSERT_ALLOC
);
2490 * If we encountered a red-black tree error, for now, we immediately back off and force
2491 * destruction of rb-tree. Set the persistent error-detected bit in the mount point.
2492 * That will ensure that even if we reach a low-water-mark in the future we will still
2493 * not allow the rb-tree to be used. On next mount, we will force a re-construction from
2494 * on-disk state. As a fallback, we will now resort to the bitmap-scanning behavior.
2497 /* Mark RB-Trees with error */
2498 hfsmp
->extent_tree_flags
|= HFS_ALLOC_RB_ERRORED
;
2499 DestroyTrees(hfsmp
);
2500 /* Reset the Free Ext Cache since we'll be using it now. */
2501 ResetVCBFreeExtCache(hfsmp
);
2502 printf("HFS: Red-Black Allocator Tree BlockMarkAllocated error\n");
2514 * This is a wrapper function around the internal calls which will actually mark the blocks
2515 * as freed. It will mark the blocks in the red-black tree if appropriate. We need to do
2516 * this logic here to avoid callers having to deal with whether or not the red-black tree
2520 OSErr
BlockMarkFree(
2522 u_int32_t startingBlock
,
2523 register u_int32_t numBlocks
)
2525 struct hfsmount
*hfsmp
;
2526 hfsmp
= VCBTOHFS(vcb
);
2527 #if CONFIG_HFS_ALLOC_RBTREE
2528 if (hfs_isrbtree_active(hfsmp
)) {
2531 if ((startingBlock
>= hfsmp
->offset_block_end
) &&
2532 (hfsmp
->hfs_flags
& HFS_RESIZE_IN_PROGRESS
)) {
2534 * We're manipulating a portion of the bitmap that is not controlled by the
2535 * red-black tree. Just update the bitmap and don't bother manipulating the tree
2540 err
= BlockMarkFreeRBTree(vcb
, startingBlock
, numBlocks
);
2542 if ( ALLOC_DEBUG
) {
2543 REQUIRE_FILE_LOCK(hfsmp
->hfs_allocation_vp
, false);
2544 if (hfs_isallocated(hfsmp
, startingBlock
, numBlocks
)) {
2545 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use!\n",
2546 startingBlock
, numBlocks
);
2548 check_rbtree_extents (hfsmp
, startingBlock
, numBlocks
, ASSERT_FREE
);
2555 return BlockMarkFreeInternal(vcb
, startingBlock
, numBlocks
, true);
2561 * BlockMarkFreeUnused
2563 * Scan the bitmap block beyond end of current file system for bits
2564 * that are marked as used. If any of the bits are marked as used,
2565 * this function marks them free.
2567 * Note: This was specifically written to mark all bits beyond
2568 * end of current file system during hfs_extendfs(), which makes
2569 * sure that all the new blocks added to the file system are
2570 * marked as free. We expect that all the blocks beyond end of
2571 * current file system are always marked as free, but there might
2572 * be cases where are marked as used. This function assumes that
2573 * the number of blocks marked as used incorrectly are relatively
2574 * small, otherwise this can overflow journal transaction size
2575 * on certain file system configurations (example, large unused
2576 * bitmap with relatively small journal).
2579 * startingBlock: First block of the range to mark unused
2580 * numBlocks: Number of blocks in the range to mark unused
2582 * Returns: zero on success, non-zero on error.
2584 OSErr
BlockMarkFreeUnused(ExtendedVCB
*vcb
, u_int32_t startingBlock
, register u_int32_t numBlocks
)
2587 struct hfsmount
*hfsmp
= VCBTOHFS(vcb
);
2588 u_int32_t curNumBlocks
;
2589 u_int32_t bitsPerBlock
;
2592 /* Use the optimal bitmap I/O size instead of bitmap block size */
2593 bitsPerBlock
= hfsmp
->vcbVBMIOSize
* kBitsPerByte
;
2596 * First clear any non bitmap allocation block aligned bits
2598 * Calculate the first bit in the bitmap block next to
2599 * the bitmap block containing the bit for startingBlock.
2600 * Using this value, we calculate the total number of
2601 * bits to be marked unused from startingBlock to the
2602 * end of bitmap block containing startingBlock.
2604 lastBit
= ((startingBlock
+ (bitsPerBlock
- 1))/bitsPerBlock
) * bitsPerBlock
;
2605 curNumBlocks
= lastBit
- startingBlock
;
2606 if (curNumBlocks
> numBlocks
) {
2607 curNumBlocks
= numBlocks
;
2609 error
= BlockMarkFreeInternal(vcb
, startingBlock
, curNumBlocks
, false);
2613 startingBlock
+= curNumBlocks
;
2614 numBlocks
-= curNumBlocks
;
2617 * Check a full bitmap block for any 'used' bit. If any bit is used,
2618 * mark all the bits only in that bitmap block as free. This ensures
2619 * that we do not write unmodified bitmap blocks and do not
2620 * overwhelm the journal.
2622 * The code starts by checking full bitmap block at a time, and
2623 * marks entire bitmap block as free only if any bit in that bitmap
2624 * block is marked as used. In the end, it handles the last bitmap
2625 * block which might be partially full by only checking till the
2626 * caller-specified last bit and if any bit is set, only mark that
2630 if (numBlocks
>= bitsPerBlock
) {
2631 curNumBlocks
= bitsPerBlock
;
2633 curNumBlocks
= numBlocks
;
2635 if (hfs_isallocated(hfsmp
, startingBlock
, curNumBlocks
) == true) {
2636 error
= BlockMarkFreeInternal(vcb
, startingBlock
, curNumBlocks
, false);
2641 startingBlock
+= curNumBlocks
;
2642 numBlocks
-= curNumBlocks
;
2649 _______________________________________________________________________
2651 Routine: BlockMarkFreeInternal
2653 Function: Mark a contiguous group of blocks as free (clear in the
2654 bitmap). It assumes those bits are currently marked
2655 allocated (set in the bitmap).
2658 vcb Pointer to volume where space is to be freed
2659 startingBlock First block number to mark as freed
2660 numBlocks Number of blocks to mark as freed
2661 do_validate If true, validate that the blocks being
2662 deallocated to check if they are within totalBlocks
2663 for current volume and whether they were allocated
2664 before they are marked free.
2665 _______________________________________________________________________
2668 OSErr
BlockMarkFreeInternal(
2670 u_int32_t startingBlock_in
,
2671 register u_int32_t numBlocks_in
,
2672 Boolean do_validate
)
2675 u_int32_t startingBlock
= startingBlock_in
;
2676 u_int32_t numBlocks
= numBlocks_in
;
2677 uint32_t unmapStart
= startingBlock_in
;
2678 uint32_t unmapCount
= numBlocks_in
;
2679 uint32_t wordIndexInBlock
;
2680 u_int32_t
*currentWord
; // Pointer to current word within bitmap block
2681 u_int32_t wordsLeft
; // Number of words left in this bitmap block
2682 u_int32_t bitMask
; // Word with given bits already set (ready to OR in)
2683 u_int32_t currentBit
; // Bit index within word of current bit to allocate
2684 u_int32_t numBits
; // Number of bits in word to allocate
2685 u_int32_t
*buffer
= NULL
;
2687 u_int32_t bitsPerBlock
;
2688 u_int32_t wordsPerBlock
;
2690 struct hfsmount
*hfsmp
= VCBTOHFS(vcb
);
2692 if (hfs_kdebug_allocation
& HFSDBG_BITMAP_ENABLED
)
2693 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP
| DBG_FUNC_START
, startingBlock_in
, numBlocks_in
, do_validate
, 0, 0);
2696 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
2697 * need to be able to free blocks being relocated during hfs_truncatefs.
2699 if ((do_validate
== true) &&
2700 (startingBlock
+ numBlocks
> vcb
->totalBlocks
)) {
2702 panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock
, numBlocks
, vcb
->vcbVN
);
2705 printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock
, numBlocks
, vcb
->vcbVN
);
2706 hfs_mark_volume_inconsistent(vcb
);
2712 // Pre-read the bitmap block containing the first word of allocation
2715 err
= ReadBitmapBlock(vcb
, startingBlock
, &buffer
, &blockRef
);
2716 if (err
!= noErr
) goto Exit
;
2719 journal_modify_block_start(hfsmp
->jnl
, (struct buf
*)blockRef
);
2723 // Figure out how many bits and words per bitmap block.
2725 bitsPerBlock
= vcb
->vcbVBMIOSize
* kBitsPerByte
;
2726 wordsPerBlock
= vcb
->vcbVBMIOSize
/ kBytesPerWord
;
2727 wordIndexInBlock
= (startingBlock
& (bitsPerBlock
-1)) / kBitsPerWord
;
2730 // Look for a range of free blocks immediately before startingBlock
2731 // (up to the start of the current bitmap block). Set unmapStart to
2732 // the first free block.
2734 currentWord
= buffer
+ wordIndexInBlock
;
2735 currentBit
= startingBlock
% kBitsPerWord
;
2736 bitMask
= kHighBitInWordMask
>> currentBit
;
2738 // Move currentWord/bitMask back by one bit
2741 if (--currentWord
< buffer
)
2743 bitMask
= kLowBitInWordMask
;
2746 if (*currentWord
& SWAP_BE32(bitMask
))
2747 break; // Found an allocated block. Stop searching.
2753 // If the first block to free doesn't start on a word
2754 // boundary in the bitmap, then treat that first word
2758 currentWord
= buffer
+ wordIndexInBlock
;
2759 wordsLeft
= wordsPerBlock
- wordIndexInBlock
;
2760 currentBit
= startingBlock
% kBitsPerWord
;
2761 if (currentBit
!= 0) {
2762 bitMask
= kAllBitsSetInWord
>> currentBit
; // turn off all bits before currentBit
2763 numBits
= kBitsPerWord
- currentBit
; // number of remaining bits in this word
2764 if (numBits
> numBlocks
) {
2765 numBits
= numBlocks
; // entire allocation is inside this one word
2766 bitMask
&= ~(kAllBitsSetInWord
>> (currentBit
+ numBits
)); // turn off bits after last
2768 if ((do_validate
== true) &&
2769 (*currentWord
& SWAP_BE32 (bitMask
)) != SWAP_BE32 (bitMask
)) {
2772 *currentWord
&= SWAP_BE32 (~bitMask
); // clear the bits in the bitmap
2773 numBlocks
-= numBits
; // adjust number of blocks left to free
2775 ++currentWord
; // move to next word
2776 --wordsLeft
; // one less word left in this block
2780 // Free whole words (32 blocks) at a time.
2783 while (numBlocks
>= kBitsPerWord
) {
2784 if (wordsLeft
== 0) {
2785 // Read in the next bitmap block
2786 startingBlock
+= bitsPerBlock
; // generate a block number in the next bitmap block
2789 err
= ReleaseBitmapBlock(vcb
, blockRef
, true);
2790 if (err
!= noErr
) goto Exit
;
2792 err
= ReadBitmapBlock(vcb
, startingBlock
, &buffer
, &blockRef
);
2793 if (err
!= noErr
) goto Exit
;
2797 journal_modify_block_start(hfsmp
->jnl
, (struct buf
*)blockRef
);
2800 // Readjust currentWord and wordsLeft
2801 currentWord
= buffer
;
2802 wordsLeft
= wordsPerBlock
;
2804 if ((do_validate
== true) &&
2805 (*currentWord
!= SWAP_BE32 (kAllBitsSetInWord
))) {
2808 *currentWord
= 0; // clear the entire word
2809 numBlocks
-= kBitsPerWord
;
2811 ++currentWord
; // move to next word
2812 --wordsLeft
; // one less word left in this block
2816 // Free any remaining blocks.
2819 if (numBlocks
!= 0) {
2820 bitMask
= ~(kAllBitsSetInWord
>> numBlocks
); // set first numBlocks bits
2821 if (wordsLeft
== 0) {
2822 // Read in the next bitmap block
2823 startingBlock
+= bitsPerBlock
; // generate a block number in the next bitmap block
2826 err
= ReleaseBitmapBlock(vcb
, blockRef
, true);
2827 if (err
!= noErr
) goto Exit
;
2829 err
= ReadBitmapBlock(vcb
, startingBlock
, &buffer
, &blockRef
);
2830 if (err
!= noErr
) goto Exit
;
2834 journal_modify_block_start(hfsmp
->jnl
, (struct buf
*)blockRef
);
2837 // Readjust currentWord and wordsLeft
2838 currentWord
= buffer
;
2839 wordsLeft
= wordsPerBlock
;
2841 if ((do_validate
== true) &&
2842 (*currentWord
& SWAP_BE32 (bitMask
)) != SWAP_BE32 (bitMask
)) {
2845 *currentWord
&= SWAP_BE32 (~bitMask
); // clear the bits in the bitmap
2847 // No need to update currentWord or wordsLeft
2851 // Look for a range of free blocks immediately after the range we just freed
2852 // (up to the end of the current bitmap block).
2854 wordIndexInBlock
= ((startingBlock_in
+ numBlocks_in
- 1) & (bitsPerBlock
-1)) / kBitsPerWord
;
2855 wordsLeft
= wordsPerBlock
- wordIndexInBlock
;
2856 currentWord
= buffer
+ wordIndexInBlock
;
2857 currentBit
= (startingBlock_in
+ numBlocks_in
- 1) % kBitsPerWord
;
2858 bitMask
= kHighBitInWordMask
>> currentBit
;
2860 // Move currentWord/bitMask/wordsLeft forward one bit
2863 if (--wordsLeft
== 0)
2866 bitMask
= kHighBitInWordMask
;
2869 if (*currentWord
& SWAP_BE32(bitMask
))
2870 break; // Found an allocated block. Stop searching.
2877 (void)ReleaseBitmapBlock(vcb
, blockRef
, true);
2880 hfs_unmap_free_extent(vcb
, unmapStart
, unmapCount
);
2883 if (hfs_kdebug_allocation
& HFSDBG_BITMAP_ENABLED
)
2884 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP
| DBG_FUNC_END
, err
, 0, 0, 0, 0);
2890 panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
2892 printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s\n", vcb
->vcbVN
);
2893 hfs_mark_volume_inconsistent(vcb
);
2900 #if CONFIG_HFS_ALLOC_RBTREE
2902 * This is a wrapper function around BlockMarkFree. This function is
2903 * called when the RB Tree-based allocator needs to mark a block as no longer
2904 * in use. This function should take the locks that would not normally be
2905 * necessary for the normal bitmap deallocator, and then call the function. Once
2906 * the on-disk data structures are updated properly, then this will update an
2907 * existing rb-tree node if possible, or else create a new one.
2910 OSErr
BlockMarkFreeRBTree(
2912 u_int32_t startingBlock
,
2913 register u_int32_t numBlocks
)
2916 struct hfsmount
*hfsmp
= VCBTOHFS(vcb
);
2920 REQUIRE_FILE_LOCK(vcb
->hfs_allocation_vp
, false);
2921 if (!hfs_isallocated(hfsmp
, startingBlock
, numBlocks
)) {
2922 panic ("HFS RBTree Allocator: Trying to free blocks starting @ %x for %x but blocks not in use! \n",
2923 startingBlock
, numBlocks
);
2925 check_rbtree_extents (VCBTOHFS(vcb
), startingBlock
, numBlocks
, ASSERT_ALLOC
);
2928 err
= BlockMarkFreeInternal(vcb
, startingBlock
, numBlocks
, true);
2933 * During a filesystem truncation, we may need to relocate files out of the
2934 * portion of the bitmap that is no longer controlled by the r/b tree.
2935 * In this case, just update the bitmap and do not attempt to manipulate the tree.
2937 if ((startingBlock
>= hfsmp
->offset_block_end
) &&
2938 (hfsmp
->hfs_flags
& HFS_RESIZE_IN_PROGRESS
)) {
2942 extent_node_t
*newnode
;
2946 * Validate that the blocks in question are not allocated in the bitmap, and that they're
2947 * not in the offset tree, since it should be tracking free extents, rather than allocated
2950 if (hfs_isallocated(hfsmp
, startingBlock
, numBlocks
)) {
2951 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks still marked in-use!\n",
2952 startingBlock
, numBlocks
);
2956 if ((hfsmp
->extent_tree_flags
& HFS_ALLOC_RB_ACTIVE
) == 0) {
2957 if (startingBlock
>= hfsmp
->offset_block_end
) {
2959 * If the tree generation code has not yet finished scanning the
2960 * bitmap region containing this extent, do nothing. If the start
2961 * of the range to be deallocated is greater than the current high
2962 * watermark on the offset tree, just bail out and let the scanner catch up with us.
2969 newnode
= extent_tree_free_space(&hfsmp
->offset_tree
, numBlocks
, startingBlock
);
2970 if (newnode
== NULL
) {
2976 check_rbtree_extents (VCBTOHFS(vcb
), startingBlock
, numBlocks
, ASSERT_FREE
);
2983 * We follow the same principle as in BlockMarkAllocatedRB.
2984 * If we encounter an error in adding the extents to the rb-tree, then immediately
2985 * back off, destroy the trees, and persistently set a bit in the runtime hfsmp flags
2986 * to indicate we should not use the rb-tree until next mount, when we can force a rebuild.
2989 /* Mark RB-Trees with error */
2990 hfsmp
->extent_tree_flags
|= HFS_ALLOC_RB_ERRORED
;
2991 DestroyTrees(hfsmp
);
2992 /* Reset the Free Ext Cache since we'll be using it now. */
2993 ResetVCBFreeExtCache(hfsmp
);
2994 printf("HFS: Red-Black Allocator Tree BlockMarkFree error\n");
3004 _______________________________________________________________________
3006 Routine: BlockFindContiguous
3008 Function: Find a contiguous range of blocks that are free (bits
3009 clear in the bitmap). If a contiguous range of the
3010 minimum size can't be found, an error will be returned.
3011 This is only needed to support the bitmap-scanning logic,
3012 as the red-black tree should be able to do this by internally
3016 vcb Pointer to volume where space is to be allocated
3017 startingBlock Preferred first block of range
3018 endingBlock Last possible block in range + 1
3019 minBlocks Minimum number of blocks needed. Must be > 0.
3020 maxBlocks Maximum (ideal) number of blocks desired
3021 useMetaZone OK to dip into metadata allocation zone
3024 actualStartBlock First block of range found, or 0 if error
3025 actualNumBlocks Number of blocks found, or 0 if error
3028 noErr Found at least minBlocks contiguous
3029 dskFulErr No contiguous space found, or all less than minBlocks
3030 _______________________________________________________________________
3033 static OSErr
BlockFindContiguous(
3035 u_int32_t startingBlock
,
3036 u_int32_t endingBlock
,
3037 u_int32_t minBlocks
,
3038 u_int32_t maxBlocks
,
3039 Boolean useMetaZone
,
3040 u_int32_t
*actualStartBlock
,
3041 u_int32_t
*actualNumBlocks
)
3044 register u_int32_t currentBlock
; // Block we're currently looking at.
3045 u_int32_t firstBlock
; // First free block in current extent.
3046 u_int32_t stopBlock
; // If we get to this block, stop searching for first free block.
3047 u_int32_t foundBlocks
; // Number of contiguous free blocks in current extent.
3048 u_int32_t
*buffer
= NULL
;
3049 register u_int32_t
*currentWord
;
3050 register u_int32_t bitMask
;
3051 register u_int32_t wordsLeft
;
3052 register u_int32_t tempWord
;
3054 u_int32_t wordsPerBlock
;
3055 u_int32_t updated_free_extent
= 0;
3057 if (hfs_kdebug_allocation
& HFSDBG_ALLOC_ENABLED
)
3058 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG
| DBG_FUNC_START
, startingBlock
, endingBlock
, minBlocks
, maxBlocks
, 0);
3061 * When we're skipping the metadata zone and the start/end
3062 * range overlaps with the metadata zone then adjust the
3063 * start to be outside of the metadata zone. If the range
3064 * is entirely inside the metadata zone then we can deny the
3065 * request (dskFulErr).
3067 if (!useMetaZone
&& (vcb
->hfs_flags
& HFS_METADATA_ZONE
)) {
3068 if (startingBlock
<= vcb
->hfs_metazone_end
) {
3069 if (endingBlock
> (vcb
->hfs_metazone_end
+ 2))
3070 startingBlock
= vcb
->hfs_metazone_end
+ 1;
3076 if ((endingBlock
- startingBlock
) < minBlocks
)
3078 // The set of blocks we're checking is smaller than the minimum number
3079 // of blocks, so we couldn't possibly find a good range.
3083 stopBlock
= endingBlock
- minBlocks
+ 1;
3084 currentBlock
= startingBlock
;
3088 * Skip over metadata blocks.
3091 currentBlock
= NextBitmapBlock(vcb
, currentBlock
);
3094 // Pre-read the first bitmap block.
3096 err
= ReadBitmapBlock(vcb
, currentBlock
, &buffer
, &blockRef
);
3097 if ( err
!= noErr
) goto ErrorExit
;
3100 // Figure out where currentBlock is within the buffer.
3102 wordsPerBlock
= vcb
->vcbVBMIOSize
/ kBytesPerWord
;
3104 wordsLeft
= (currentBlock
/ kBitsPerWord
) & (wordsPerBlock
-1); // Current index into buffer
3105 currentWord
= buffer
+ wordsLeft
;
3106 wordsLeft
= wordsPerBlock
- wordsLeft
;
3112 //============================================================
3113 // Look for a free block, skipping over allocated blocks.
3114 //============================================================
3117 // Check an initial partial word (if any)
3119 bitMask
= currentBlock
& kBitsWithinWordMask
;
3122 tempWord
= SWAP_BE32(*currentWord
); // Fetch the current word only once
3123 bitMask
= kHighBitInWordMask
>> bitMask
;
3124 while (tempWord
& bitMask
)
3130 // Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)?
3134 // Didn't find any unused bits, so we're done with this word.
3140 // Check whole words
3142 while (currentBlock
< stopBlock
)
3144 // See if it's time to read another block.
3148 err
= ReleaseBitmapBlock(vcb
, blockRef
, false);
3149 if (err
!= noErr
) goto ErrorExit
;
3152 * Skip over metadata blocks.
3155 currentBlock
= NextBitmapBlock(vcb
, currentBlock
);
3156 if (currentBlock
>= stopBlock
) {
3161 err
= ReadBitmapBlock(vcb
, currentBlock
, &buffer
, &blockRef
);
3162 if ( err
!= noErr
) goto ErrorExit
;
3164 currentWord
= buffer
;
3165 wordsLeft
= wordsPerBlock
;
3168 // See if any of the bits are clear
3169 if ((tempWord
= SWAP_BE32(*currentWord
)) + 1) // non-zero if any bits were clear
3171 // Figure out which bit is clear
3172 bitMask
= kHighBitInWordMask
;
3173 while (tempWord
& bitMask
)
3179 break; // Found the free bit; break out to FoundUnused.
3182 // Keep looking at the next word
3183 currentBlock
+= kBitsPerWord
;
3189 // Make sure the unused bit is early enough to use
3190 if (currentBlock
>= stopBlock
)
3195 // Remember the start of the extent
3196 firstBlock
= currentBlock
;
3198 //============================================================
3199 // Count the number of contiguous free blocks.
3200 //============================================================
3203 // Check an initial partial word (if any)
3205 bitMask
= currentBlock
& kBitsWithinWordMask
;
3208 tempWord
= SWAP_BE32(*currentWord
); // Fetch the current word only once
3209 bitMask
= kHighBitInWordMask
>> bitMask
;
3210 while (bitMask
&& !(tempWord
& bitMask
))
3216 // Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)?
3220 // Didn't find any used bits, so we're done with this word.
3226 // Check whole words
3228 while (currentBlock
< endingBlock
)
3230 // See if it's time to read another block.
3234 err
= ReleaseBitmapBlock(vcb
, blockRef
, false);
3235 if (err
!= noErr
) goto ErrorExit
;
3238 * Skip over metadata blocks.
3241 u_int32_t nextBlock
;
3243 nextBlock
= NextBitmapBlock(vcb
, currentBlock
);
3244 if (nextBlock
!= currentBlock
) {
3245 goto LoopExit
; /* allocation gap, so stop */
3249 err
= ReadBitmapBlock(vcb
, currentBlock
, &buffer
, &blockRef
);
3250 if ( err
!= noErr
) goto ErrorExit
;
3252 currentWord
= buffer
;
3253 wordsLeft
= wordsPerBlock
;
3256 // See if any of the bits are set
3257 if ((tempWord
= SWAP_BE32(*currentWord
)) != 0)
3259 // Figure out which bit is set
3260 bitMask
= kHighBitInWordMask
;
3261 while (!(tempWord
& bitMask
))
3267 break; // Found the used bit; break out to FoundUsed.
3270 // Keep looking at the next word
3271 currentBlock
+= kBitsPerWord
;
3275 // If we found at least maxBlocks, we can quit early.
3276 if ((currentBlock
- firstBlock
) >= maxBlocks
)
3281 // Make sure we didn't run out of bitmap looking for a used block.
3282 // If so, pin to the end of the bitmap.
3283 if (currentBlock
> endingBlock
)
3284 currentBlock
= endingBlock
;
3286 // Figure out how many contiguous free blocks there were.
3287 // Pin the answer to maxBlocks.
3288 foundBlocks
= currentBlock
- firstBlock
;
3289 if (foundBlocks
> maxBlocks
)
3290 foundBlocks
= maxBlocks
;
3291 if (foundBlocks
>= minBlocks
)
3292 break; // Found what we needed!
3294 /* We did not find the total blocks were were looking for, but
3295 * lets add this free block run to our free extent cache list
3297 updated_free_extent
= add_free_extent_cache(vcb
, firstBlock
, foundBlocks
);
3299 } while (currentBlock
< stopBlock
);
3302 // Return the outputs.
3303 if (foundBlocks
< minBlocks
)
3308 *actualStartBlock
= 0;
3309 *actualNumBlocks
= 0;
3314 *actualStartBlock
= firstBlock
;
3315 *actualNumBlocks
= foundBlocks
;
3317 * Sanity check for overflow
3319 if ((firstBlock
+ foundBlocks
) > vcb
->allocLimit
) {
3320 panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
3321 vcb
->vcbVN
, startingBlock
, endingBlock
, currentBlock
,
3322 firstBlock
, stopBlock
, minBlocks
, foundBlocks
);
3326 if (updated_free_extent
&& (vcb
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
)) {
3328 u_int32_t min_start
= vcb
->totalBlocks
;
3330 // set the nextAllocation pointer to the smallest free block number
3331 // we've seen so on the next mount we won't rescan unnecessarily
3332 lck_spin_lock(&vcb
->vcbFreeExtLock
);
3333 for(i
=0; i
< (int)vcb
->vcbFreeExtCnt
; i
++) {
3334 if (vcb
->vcbFreeExt
[i
].startBlock
< min_start
) {
3335 min_start
= vcb
->vcbFreeExt
[i
].startBlock
;
3338 lck_spin_unlock(&vcb
->vcbFreeExtLock
);
3339 if (min_start
!= vcb
->totalBlocks
) {
3340 if (min_start
< vcb
->nextAllocation
) {
3341 vcb
->nextAllocation
= min_start
;
3343 if (min_start
< vcb
->sparseAllocation
) {
3344 vcb
->sparseAllocation
= min_start
;
3350 (void) ReleaseBitmapBlock(vcb
, blockRef
, false);
3352 if (hfs_kdebug_allocation
& HFSDBG_ALLOC_ENABLED
)
3353 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG
| DBG_FUNC_END
, err
, *actualStartBlock
, *actualNumBlocks
, 0, 0);
3359 #if CONFIG_HFS_ALLOC_RBTREE
3361 * Wrapper function around hfs_isrbtree_allocated. This just takes the start offset,
3362 * and the number of blocks, and whether or not we should check if the blocks are
3363 * free or not. This function is designed to be used primarily with the debug #ifdef
3364 * enabled, so it results in a panic if anything unexpected occurs.
3366 * shouldBeFree will be nonzero if the caller expects the zone to be free.
3369 void check_rbtree_extents (struct hfsmount
*hfsmp
, u_int32_t startBlocks
,
3370 u_int32_t numBlocks
, int shouldBeFree
) {
3372 extent_node_t
*node1
= NULL
;
3375 alloc
= hfs_isrbtree_allocated (hfsmp
, startBlocks
, numBlocks
, &node1
);
3378 off1
= node1
->offset
;
3379 len1
= node1
->length
;
3384 * If the region should be free, then we expect to see extents in the tree
3385 * matching this start and length. Alloc != 0 means some portion of the extent
3386 * specified was allocated.
3389 panic ("HFS check_rbtree_extents: Node (%p) do not exist! "
3390 "node1 off (%d),len(%d),, start(%d) end(%d)\n",
3391 node1
, off1
, len1
, startBlocks
, numBlocks
);
3396 * Otherwise, this means that the region should be allocated, and if we find
3397 * an extent matching it, that's bad.
3400 panic ("HFS check_rbtree_extents: Node (%p) exists! "
3401 "node1 off (%d),len(%d), start(%d) end(%d)\n",
3402 node1
, off1
, len1
, startBlocks
, numBlocks
);
3408 #if CONFIG_HFS_ALLOC_RBTREE
3410 * Exhaustive validation search. This function iterates over all allocation blocks and
3411 * compares their status in the red-black tree vs. the allocation bitmap. If the two are out of sync
3412 * then it will panic. Bitmap lock must be held while this function is run.
3414 * Because this function requires a red-black tree search to validate every allocation block, it is
3415 * very expensive and should ONLY be run in debug mode, and even then, infrequently.
3417 * 'end' is non-inclusive, so it should represent the total number of blocks in the volume.
3421 hfs_validate_rbtree (struct hfsmount
*hfsmp
, u_int32_t start
, u_int32_t end
){
3424 extent_node_t
* node1
;
3426 hfs_checktreelinks (hfsmp
);
3428 for (current
= start
; current
< end
; current
++) {
3430 int rbtree
= hfs_isrbtree_allocated(hfsmp
, current
, 1, &node1
);
3431 int bitmap
= hfs_isallocated(hfsmp
, current
, 1);
3433 if (bitmap
!= rbtree
){
3434 panic("HFS: Allocator mismatch @ block %d -- bitmap %d : rbtree %d\n",
3435 current
, bitmap
, rbtree
);
3441 * Exhaustive Red-Black Tree Linked List verification routine.
3443 * This function iterates through the red-black tree's nodes, and then verifies that the linked list
3444 * embedded within each of the nodes accurately points to the correct node as its "next" pointer.
3445 * The bitmap lock must be held while this function is run.
3449 hfs_checktreelinks (struct hfsmount
*hfsmp
) {
3450 extent_tree_offset_t
*tree
= &hfsmp
->offset_tree
;
3452 extent_node_t
*current
= NULL
;
3453 extent_node_t
*next
= NULL
;
3454 extent_node_t
*treenext
;
3456 current
= extent_tree_off_first (tree
);
3459 next
= current
->offset_next
;
3460 treenext
= extent_tree_off_next (tree
, current
);
3461 if (next
!= treenext
) {
3462 panic("hfs_checktreelinks: mismatch for node (%p), next: %p , treenext %p !\n", current
, next
, treenext
);
3471 #if CONFIG_HFS_ALLOC_RBTREE
3473 * Test to see if any free blocks exist at a given offset.
3474 * If there exists a node at the specified offset, it will return the appropriate
3477 * NULL indicates allocated blocks exist at that offset.
3479 * Allocation file lock must be held.
3482 * 1 if blocks in the range are allocated.
3483 * 0 if all blocks in the range are free.
3487 hfs_isrbtree_allocated (struct hfsmount
*hfsmp
, u_int32_t startBlock
,
3488 u_int32_t numBlocks
, extent_node_t
**ret_node
) {
3490 extent_node_t search_sentinel
;
3491 extent_node_t
*node
= NULL
;
3492 extent_node_t
*nextnode
= NULL
;
3495 * With only one tree, then we just have to validate that there are entries
3496 * in the R/B tree at the specified offset if it really is free.
3498 search_sentinel
.offset
= startBlock
;
3499 search_sentinel
.length
= numBlocks
;
3501 node
= extent_tree_off_search_prev(&hfsmp
->offset_tree
, &search_sentinel
);
3505 nextnode
= extent_tree_off_next (&hfsmp
->offset_tree
, node
);
3506 if (nextnode
!= node
->offset_next
) {
3507 panic ("hfs_rbtree_isallocated: Next pointers out of sync!\n");
3511 * Check to see if it is a superset of our target range. Because we started
3512 * with the offset or some offset prior to it, then we know the node's offset is
3513 * at least <= startBlock. So, if the end of the node is greater than the end of
3514 * our target range, then the whole range is free.
3517 if ((node
->offset
+ node
->length
) >= (startBlock
+ numBlocks
)) {
3518 if (node
->offset
> startBlock
) {
3519 panic ("hfs_rbtree_isallocated: bad node ordering!");
3525 * We got here if either our node search resulted in a node whose extent
3526 * was strictly before our target offset, or we couldnt' find a previous node
3527 * at all (the beginning of the volume). If the former, then we can infer that
3528 * at least one block in the target range is allocated since the next node's offset
3529 * must be greater than startBlock.
3531 * Either way, this means that the target node is unavailable to allocate, so
3541 * Count number of bits set in the given 32-bit unsigned number
3544 * Number of bits set
3546 static int num_bits_set(u_int32_t num
)
3550 for (count
= 0; num
; count
++) {
3558 * For a given range of blocks, find the total number of blocks
3559 * allocated. If 'stop_on_first' is true, it stops as soon as it
3560 * encounters the first allocated block. This option is useful
3561 * to determine if any block is allocated or not.
3564 * startingBlock First allocation block number of the range to be scanned.
3565 * numBlocks Total number of blocks that need to be scanned.
3566 * stop_on_first Stop the search after the first allocated block is found.
3569 * allocCount Total number of allocation blocks allocated in the given range.
3571 * On error, it is the number of allocated blocks found
3572 * before the function got an error.
3574 * If 'stop_on_first' is set,
3575 * allocCount = 1 if any allocated block was found.
3576 * allocCount = 0 if no allocated block was found.
3579 * 0 on success, non-zero on failure.
3582 hfs_isallocated_internal(struct hfsmount
*hfsmp
, u_int32_t startingBlock
,
3583 u_int32_t numBlocks
, Boolean stop_on_first
, u_int32_t
*allocCount
)
3585 u_int32_t
*currentWord
; // Pointer to current word within bitmap block
3586 u_int32_t wordsLeft
; // Number of words left in this bitmap block
3587 u_int32_t bitMask
; // Word with given bits already set (ready to test)
3588 u_int32_t firstBit
; // Bit index within word of first bit to allocate
3589 u_int32_t numBits
; // Number of bits in word to allocate
3590 u_int32_t
*buffer
= NULL
;
3592 u_int32_t bitsPerBlock
;
3593 u_int32_t wordsPerBlock
;
3594 u_int32_t blockCount
= 0;
3597 if (hfs_kdebug_allocation
& HFSDBG_BITMAP_ENABLED
)
3598 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED
| DBG_FUNC_START
, startingBlock
, numBlocks
, stop_on_first
, 0, 0);
3601 * Pre-read the bitmap block containing the first word of allocation
3603 error
= ReadBitmapBlock(hfsmp
, startingBlock
, &buffer
, &blockRef
);
3608 * Initialize currentWord, and wordsLeft.
3611 u_int32_t wordIndexInBlock
;
3613 bitsPerBlock
= hfsmp
->vcbVBMIOSize
* kBitsPerByte
;
3614 wordsPerBlock
= hfsmp
->vcbVBMIOSize
/ kBytesPerWord
;
3616 wordIndexInBlock
= (startingBlock
& (bitsPerBlock
-1)) / kBitsPerWord
;
3617 currentWord
= buffer
+ wordIndexInBlock
;
3618 wordsLeft
= wordsPerBlock
- wordIndexInBlock
;
3622 * First test any non word aligned bits.
3624 firstBit
= startingBlock
% kBitsPerWord
;
3625 if (firstBit
!= 0) {
3626 bitMask
= kAllBitsSetInWord
>> firstBit
;
3627 numBits
= kBitsPerWord
- firstBit
;
3628 if (numBits
> numBlocks
) {
3629 numBits
= numBlocks
;
3630 bitMask
&= ~(kAllBitsSetInWord
>> (firstBit
+ numBits
));
3632 if ((*currentWord
& SWAP_BE32 (bitMask
)) != 0) {
3633 if (stop_on_first
) {
3637 blockCount
+= num_bits_set(*currentWord
& SWAP_BE32 (bitMask
));
3639 numBlocks
-= numBits
;
3645 * Test whole words (32 blocks) at a time.
3647 while (numBlocks
>= kBitsPerWord
) {
3648 if (wordsLeft
== 0) {
3649 /* Read in the next bitmap block. */
3650 startingBlock
+= bitsPerBlock
;
3653 error
= ReleaseBitmapBlock(hfsmp
, blockRef
, false);
3654 if (error
) goto Exit
;
3656 error
= ReadBitmapBlock(hfsmp
, startingBlock
, &buffer
, &blockRef
);
3657 if (error
) goto Exit
;
3659 /* Readjust currentWord and wordsLeft. */
3660 currentWord
= buffer
;
3661 wordsLeft
= wordsPerBlock
;
3663 if (*currentWord
!= 0) {
3664 if (stop_on_first
) {
3668 blockCount
+= num_bits_set(*currentWord
);
3670 numBlocks
-= kBitsPerWord
;
3676 * Test any remaining blocks.
3678 if (numBlocks
!= 0) {
3679 bitMask
= ~(kAllBitsSetInWord
>> numBlocks
);
3680 if (wordsLeft
== 0) {
3681 /* Read in the next bitmap block */
3682 startingBlock
+= bitsPerBlock
;
3685 error
= ReleaseBitmapBlock(hfsmp
, blockRef
, false);
3686 if (error
) goto Exit
;
3688 error
= ReadBitmapBlock(hfsmp
, startingBlock
, &buffer
, &blockRef
);
3689 if (error
) goto Exit
;
3691 currentWord
= buffer
;
3692 wordsLeft
= wordsPerBlock
;
3694 if ((*currentWord
& SWAP_BE32 (bitMask
)) != 0) {
3695 if (stop_on_first
) {
3699 blockCount
+= num_bits_set(*currentWord
& SWAP_BE32 (bitMask
));
3704 (void)ReleaseBitmapBlock(hfsmp
, blockRef
, false);
3707 *allocCount
= blockCount
;
3711 if (hfs_kdebug_allocation
& HFSDBG_BITMAP_ENABLED
)
3712 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED
| DBG_FUNC_END
, error
, 0, blockCount
, 0, 0);
3718 * Count total number of blocks that are allocated in the given
3719 * range from the bitmap. This is used to preflight total blocks
3720 * that need to be relocated during volume resize.
3722 * The journal or allocation file lock must be held.
3725 * 0 on success, non-zero on failure.
3726 * On failure, allocCount is zero.
3729 hfs_count_allocated(struct hfsmount
*hfsmp
, u_int32_t startBlock
,
3730 u_int32_t numBlocks
, u_int32_t
*allocCount
)
3732 return hfs_isallocated_internal(hfsmp
, startBlock
, numBlocks
, false, allocCount
);
3736 * Test to see if any blocks in a range are allocated.
3738 * Note: On error, this function returns 1, which means that
3739 * one or more blocks in the range are allocated. This function
3740 * is primarily used for volume resize and we do not want
3741 * to report to the caller that the blocks are free when we
3742 * were not able to deterministically find it out. So on error,
3743 * we always report that the blocks are allocated.
3745 * The journal or allocation file lock must be held.
3748 * 0 if all blocks in the range are free.
3749 * 1 if blocks in the range are allocated, or there was an error.
3752 hfs_isallocated(struct hfsmount
*hfsmp
, u_int32_t startingBlock
, u_int32_t numBlocks
)
3755 u_int32_t allocCount
;
3757 error
= hfs_isallocated_internal(hfsmp
, startingBlock
, numBlocks
, true, &allocCount
);
3759 /* On error, we always say that the blocks are allocated
3760 * so that volume resize does not return false success.
3764 /* The function was deterministically able to find out
3765 * if there was any block allocated or not. In that case,
3766 * the value in allocCount is good enough to be returned
3767 * back to the caller.
3774 * Check to see if the red-black tree is live. Allocation file lock must be held
3775 * shared or exclusive to call this function. Note that we may call this even if
3776 * HFS is built without activating the red-black tree code.
3780 hfs_isrbtree_active(struct hfsmount
*hfsmp
){
3782 //TODO: Update this function to deal with a truncate/resize coming in when the tree
3783 //isn't fully finished. maybe we need to check the flags for something other than ENABLED?
3785 #if CONFIG_HFS_ALLOC_RBTREE
3787 REQUIRE_FILE_LOCK(hfsmp
->hfs_allocation_vp
, false);
3791 if (hfsmp
->extent_tree_flags
& HFS_ALLOC_RB_ENABLED
) {
3796 #pragma unused (hfsmp)
3798 /* If the RB Tree code is not enabled, then just always return 0 */
3804 * This function scans the specified bitmap block and acts on it as necessary.
3805 * We may add it to the list of blocks to be UNMAP/TRIM'd or add it to allocator
3806 * data structures. This function is not #if'd to the CONFIG_RB case because
3807 * we want to use it unilaterally at mount time if on a UNMAP-capable device.
3809 * Additionally, we may want an allocating thread to invoke this if the tree
3810 * does not have enough extents to satisfy an allocation request.
3812 * startbit - the allocation block represented by a bit in 'allocblock' where we need to
3813 * start our scan. For instance, we may need to start the normal allocation scan
3814 * in the middle of an existing allocation block.
3815 * endBit - the allocation block where we should end this search (inclusive).
3816 * bitToScan - output argument for this function to specify the next bit to scan.
3820 * nonzero on failure.
3823 static int hfs_alloc_scan_block(struct hfsmount
*hfsmp
, u_int32_t startbit
,
3824 u_int32_t endBit
, u_int32_t
*bitToScan
,
3825 struct jnl_trim_list
*list
) {
3828 u_int32_t curAllocBlock
;
3829 struct buf
*blockRef
= NULL
;
3830 u_int32_t
*buffer
= NULL
;
3831 u_int32_t wordIndexInBlock
;
3832 u_int32_t blockSize
= (u_int32_t
)hfsmp
->vcbVBMIOSize
;
3833 u_int32_t wordsPerBlock
= blockSize
/ kBytesPerWord
;
3834 u_int32_t offset
= 0;
3838 * Read the appropriate block from the bitmap file. ReadBitmapBlock
3839 * figures out which actual on-disk block corresponds to the bit we're
3842 error
= ReadBitmapBlock(hfsmp
, startbit
, &buffer
, (uintptr_t*)&blockRef
);
3847 /* curAllocBlock represents the logical block we're analyzing. */
3848 curAllocBlock
= startbit
;
3850 /* Figure out which word curAllocBlock corresponds to in the block we read */
3851 wordIndexInBlock
= (curAllocBlock
/ kBitsPerWord
) % wordsPerBlock
;
3853 /* Scan a word at a time */
3854 while (wordIndexInBlock
< wordsPerBlock
) {
3855 u_int32_t currentWord
= SWAP_BE32(buffer
[wordIndexInBlock
]);
3858 /* modulate curBit because it may start in the middle of a word */
3859 for (curBit
= curAllocBlock
% kBitsPerWord
; curBit
< kBitsPerWord
; curBit
++) {
3861 u_int32_t is_allocated
= currentWord
& (1 << (kBitsWithinWordMask
- curBit
));
3863 u_int32_t res
= hfs_isallocated_scan (hfsmp
, curAllocBlock
, buffer
);
3864 if ( ((res
) && (!is_allocated
)) || ((!res
) && (is_allocated
))) {
3865 panic("hfs_alloc_scan: curAllocBit %u, curBit (%d), word (0x%x), is_allocated (0x%x) res(0x%x) \n",
3866 curAllocBlock
, curBit
, currentWord
, is_allocated
, res
);
3870 * If curBit is not allocated, keep track of the start of the free range.
3871 * Increment a running tally on how many free blocks in a row we've seen.
3873 if (!is_allocated
) {
3876 offset
= curAllocBlock
;
3881 * If we hit an allocated block, insert the extent that tracked the range
3882 * we saw, and reset our tally counter.
3885 #if CONFIG_HFS_ALLOC_RBTREE
3886 extent_tree_free_space(&hfsmp
->offset_tree
, size
, offset
);
3888 hfs_track_unmap_blocks (hfsmp
, offset
, size
, list
);
3895 * Exit early if the next bit we'd analyze would take us beyond the end of the
3896 * range that we're supposed to scan.
3898 if (curAllocBlock
>= endBit
) {
3906 /* We may have been tracking a range of free blocks that hasn't been inserted yet. */
3908 #if CONFIG_HFS_ALLOC_RBTREE
3909 extent_tree_free_space(&hfsmp
->offset_tree
, size
, offset
);
3911 hfs_track_unmap_blocks (hfsmp
, offset
, size
, list
);
3914 * curAllocBlock represents the next block we need to scan while we're in this
3917 *bitToScan
= curAllocBlock
;
3919 ReleaseScanBitmapBlock(blockRef
);
3926 * This function is basically the same as hfs_isallocated, except it's designed for
3927 * use with the red-black tree validation code. It assumes we're only checking whether
3928 * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
3929 * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
3931 * This should not be called in general purpose scanning code.
3933 int hfs_isallocated_scan(struct hfsmount
*hfsmp
, u_int32_t startingBlock
, u_int32_t
*bp_buf
) {
3935 u_int32_t
*currentWord
; // Pointer to current word within bitmap block
3936 u_int32_t bitMask
; // Word with given bits already set (ready to test)
3937 u_int32_t firstBit
; // Bit index within word of first bit to allocate
3938 u_int32_t numBits
; // Number of bits in word to allocate
3939 u_int32_t bitsPerBlock
;
3941 u_int32_t wordsPerBlock
;
3942 u_int32_t numBlocks
= 1;
3943 u_int32_t
*buffer
= NULL
;
3950 /* just use passed-in buffer if avail. */
3955 * Pre-read the bitmap block containing the first word of allocation
3957 error
= ReadBitmapBlock(hfsmp
, startingBlock
, &buffer
, &blockRef
);
3963 * Initialize currentWord, and wordsLeft.
3965 u_int32_t wordIndexInBlock
;
3967 bitsPerBlock
= hfsmp
->vcbVBMIOSize
* kBitsPerByte
;
3968 wordsPerBlock
= hfsmp
->vcbVBMIOSize
/ kBytesPerWord
;
3970 wordIndexInBlock
= (startingBlock
& (bitsPerBlock
-1)) / kBitsPerWord
;
3971 currentWord
= buffer
+ wordIndexInBlock
;
3974 * First test any non word aligned bits.
3976 firstBit
= startingBlock
% kBitsPerWord
;
3977 bitMask
= kAllBitsSetInWord
>> firstBit
;
3978 numBits
= kBitsPerWord
- firstBit
;
3979 if (numBits
> numBlocks
) {
3980 numBits
= numBlocks
;
3981 bitMask
&= ~(kAllBitsSetInWord
>> (firstBit
+ numBits
));
3983 if ((*currentWord
& SWAP_BE32 (bitMask
)) != 0) {
3987 numBlocks
-= numBits
;
3991 if(bp_buf
== NULL
) {
3993 (void)ReleaseBitmapBlock(hfsmp
, blockRef
, false);
4002 #if CONFIG_HFS_ALLOC_RBTREE
4005 * Extern function that is called from mount and upgrade mount routines
4006 * that enable us to initialize the tree.
4010 u_int32_t
InitTree(struct hfsmount
*hfsmp
) {
4011 extent_tree_init (&(hfsmp
->offset_tree
));
4017 * This function builds the trees specified in its arguments. It uses
4018 * buf_meta_breads to scan through the bitmap and re-build the tree state.
4019 * It is very important to use buf_meta_bread because we need to ensure that we
4020 * read the most current version of the blocks that we're scanning. If we used
4021 * cluster_io, then journaled transactions could still be sitting in RAM since they are
4022 * written to disk in the proper location asynchronously.
4024 * Because this could be still running when mount has finished, we need to check
4025 * after every allocation block that we're working on if an unmount or some other
4026 * operation that would cause us to teardown has come in. (think downgrade mount).
4027 * If an unmount has come in, then abort whatever we're doing and return -1
4028 * to indicate we hit an error. If we don't do this, we'd hold up unmount for
4031 * This function assumes that the bitmap lock is acquired exclusively before being
4032 * called. It will drop the lock and then re-acquire it during operation, but
4033 * will always return with the lock held.
4036 u_int32_t
GenerateTree(struct hfsmount
*hfsmp
, u_int32_t endBlock
, int *flags
, int initialscan
) {
4038 REQUIRE_FILE_LOCK(hfsmp
->hfs_allocation_vp
, false);
4040 u_int32_t
*cur_block_eof
;
4043 int USE_FINE_GRAINED_LOCKING
= 0;
4045 /* Initialize the block counter while we hold the bitmap lock */
4046 cur_block_eof
= &hfsmp
->offset_block_end
;
4049 * This loop advances over all allocation bitmap blocks of the current region
4050 * to scan them and add the results into the red-black tree. We use the mount point
4051 * variable offset_block_end as our loop counter. This gives us flexibility
4052 * because we can release the allocation bitmap lock and allow a thread that wants
4053 * to make an allocation to grab the lock and do some scanning on our behalf while we're
4054 * waiting to re-acquire the lock. Then, the allocating thread will only do as much bitmap
4055 * scanning as needed to fulfill its allocation.
4057 * If the other thread does IO for us, then it will update the offset_block_end
4058 * variable as well, since it will use the same hfs_alloc_scan_block function to do its bit
4059 * scanning. So when we re-grab the lock, our current EOF/loop will immediately skip us to the next
4060 * block that needs scanning.
4063 while (*cur_block_eof
< endBlock
) {
4066 * If the filesystem is being resized before the bitmap has been fully scanned, we'll
4067 * update our endBlock to match the current allocation limit in the hfsmp struct.
4068 * The allocLimit field would only be be updated while holding the bitmap lock, so we won't
4069 * be executing this code at the same time that the resize is going on.
4071 if ((initialscan
) && (endBlock
!= hfsmp
->allocLimit
)) {
4073 /* If we're past the new/modified allocLimit, then just stop immediately.*/
4074 if (*cur_block_eof
>= hfsmp
->allocLimit
) {
4077 endBlock
= hfsmp
->allocLimit
;
4081 * TODO: fix unmount stuff!
4082 * See rdar://7391404
4084 * Once the RB allocator is checked in, we'll want to augment it to not hold the
4085 * allocation bitmap lock for the entire duration of the tree scan. For a first check-in
4086 * it's ok to do that but we can't leave it like that forever.
4088 * The gist of the new algorithm will work as follows:
4089 * if an unmount is in flight and has been detected:
4091 * unset tree-in-progress bit.
4092 * wakeup unmount thread
4093 * unlock allocation bitmap lock, fail out.
4095 * The corresponding code in the unmount side should already be in place.
4098 error
= hfs_alloc_scan_block (hfsmp
, *cur_block_eof
, endBlock
, cur_block_eof
);
4100 //TODO: Fix this below!
4101 if (USE_FINE_GRAINED_LOCKING
){
4102 hfs_systemfile_unlock(hfsmp
, *flags
);
4103 *flags
= hfs_systemfile_lock(hfsmp
, SFL_BITMAP
, HFS_EXCLUSIVE_LOCK
);
4105 //TODO: Infer that if *flags == 0, we don't actually need to lock/unlock.
4112 * This function destroys the specified rb-trees associated with the mount point.
4115 void DestroyTrees(struct hfsmount
*hfsmp
) {
4118 REQUIRE_FILE_LOCK(hfsmp
->hfs_allocation_vp
, false);
4119 printf("DestroyTrees: Validating red/black tree for vol %s\n", (char*) hfsmp
->vcbVN
);
4120 hfs_validate_rbtree (hfsmp
, 0, hfsmp
->offset_block_end
);
4124 * extent_tree_destroy will start with the first entry in the tree (by offset), then
4125 * iterate through the tree quickly using its embedded linked list. This results in tree
4126 * destruction in O(n) time.
4129 if (hfsmp
->extent_tree_flags
& HFS_ALLOC_RB_ENABLED
) {
4130 extent_tree_destroy(&hfsmp
->offset_tree
);
4132 /* Mark Trees as disabled */
4133 hfsmp
->extent_tree_flags
&= ~HFS_ALLOC_RB_ENABLED
;
4142 * This function resets all of the data structures relevant to the
4143 * free extent cache stored in the hfsmount struct.
4145 * If we are using the red-black tree code then we need to account for the fact that
4146 * we may encounter situations where we need to jettison the tree. If that is the
4147 * case, then we fail-over to the bitmap scanning logic, but we need to ensure that
4148 * the free ext cache is zeroed before we start using it.
4150 * We also reset and disable the cache when allocLimit is updated... which
4151 * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()).
4152 * It is independent of the type of allocator being used currently.
4154 void ResetVCBFreeExtCache(struct hfsmount
*hfsmp
)
4159 if (hfs_kdebug_allocation
& HFSDBG_EXT_CACHE_ENABLED
)
4160 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
4162 lck_spin_lock(&hfsmp
->vcbFreeExtLock
);
4164 /* reset Free Extent Count */
4165 hfsmp
->vcbFreeExtCnt
= 0;
4167 /* reset the actual array */
4168 bytes
= kMaxFreeExtents
* sizeof(HFSPlusExtentDescriptor
);
4169 freeExt
= (void*)(hfsmp
->vcbFreeExt
);
4171 bzero (freeExt
, bytes
);
4173 lck_spin_unlock(&hfsmp
->vcbFreeExtLock
);
4175 if (hfs_kdebug_allocation
& HFSDBG_EXT_CACHE_ENABLED
)
4176 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
4182 * This function is used to inform the allocator if we have to effectively shrink
4183 * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs.
4185 * The bitmap lock must be held when calling this function. This function also modifies the
4186 * allocLimit field in the hfs mount point structure in the general case.
4188 * In the shrinking case, we'll have to remove all free extents from the red-black
4189 * tree past the specified offset new_end_block. In the growth case, we'll have to force
4190 * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
4192 * new_end_block represents the total number of blocks available for allocation in the resized
4193 * filesystem. Block #new_end_block should not be allocatable in the resized filesystem since it
4194 * will be out of the (0, n-1) range that are indexable in the bitmap.
4196 * Returns 0 on success
4200 u_int32_t
UpdateAllocLimit (struct hfsmount
*hfsmp
, u_int32_t new_end_block
) {
4203 * Update allocLimit to the argument specified, but don't do anything else
4204 * if the red/black tree is not enabled.
4206 hfsmp
->allocLimit
= new_end_block
;
4208 /* Invalidate the free extent cache completely so that
4209 * it does not have any extents beyond end of current
4212 ResetVCBFreeExtCache(hfsmp
);
4214 #if CONFIG_HFS_ALLOC_RBTREE
4215 /* Shrinking the existing filesystem */
4216 if ((new_end_block
< hfsmp
->offset_block_end
) &&
4217 (hfsmp
->extent_tree_flags
& HFS_ALLOC_RB_ACTIVE
)) {
4218 extent_node_t search_sentinel
;
4219 extent_node_t
*node
= NULL
;
4220 /* Remover points to the current item to free/remove from the tree */
4221 extent_node_t
*remover
= NULL
;
4223 /* Begin search at the specified offset */
4224 memset (&search_sentinel
, 0, sizeof(extent_node_t
));
4225 search_sentinel
.offset
= new_end_block
;
4228 * Find the first available extent that satifies the allocation by searching
4229 * from the starting point or 1 earlier. We may need to split apart an existing node
4230 * if it straddles the new alloc limit.
4232 node
= extent_tree_off_search_prev(&hfsmp
->offset_tree
, &search_sentinel
);
4234 /* If it's an exact match, then just remove them all from this point forward */
4235 if (node
->offset
== new_end_block
) {
4237 * Find the previous entry and update its next pointer to NULL
4238 * since this entry is biting the dust. Update remover to node.
4240 extent_node_t
*prev
= NULL
;
4241 prev
= extent_tree_off_prev (&hfsmp
->offset_tree
, node
);
4243 prev
->offset_next
= NULL
;
4248 /* See if we need to split this node */
4249 if ((node
->offset
+ node
->length
) > new_end_block
) {
4251 * Update node to reflect its new size up until new_end_block.
4253 remover
= node
->offset_next
;
4254 node
->length
= new_end_block
- node
->offset
;
4255 /* node is becoming the last free extent in the volume. */
4256 node
->offset_next
= NULL
;
4259 if (node
->offset_next
== NULL
) {
4261 * 'node' points to the last free extent in the volume.
4262 * Coincidentally, it is also before the new cut-off point at which
4263 * we will stop representing bitmap values in the tree. Just bail out now.
4268 * Otherwise, point our temp variable 'remover' to the node where
4269 * we'll need to start yanking things out of the tree, and make 'node'
4270 * the last element in the tree in the linked list.
4272 remover
= node
->offset_next
;
4273 if (remover
->offset
<= new_end_block
) {
4274 panic ("UpdateAllocLimit: Invalid RBTree node next ptr!");
4276 node
->offset_next
= NULL
;
4281 * Remover is our "temp" pointer that points to the current node to remove from
4282 * the offset tree. We'll simply iterate through the tree linked list, removing the current
4283 * element from the tree, freeing them as we come across them.
4286 extent_node_t
*next
= remover
->offset_next
;
4287 extent_tree_remove_node (&hfsmp
->offset_tree
, remover
);
4288 free_node (remover
);
4293 printf ("UpdateAllocLimit: Validating rbtree after truncation\n");
4294 hfs_validate_rbtree (hfsmp
, 0, new_end_block
-1);
4298 * Don't forget to shrink offset_block_end after a successful truncation
4299 * new_end_block should represent the number of blocks available on the
4303 hfsmp
->offset_block_end
= new_end_block
;
4309 panic ("UpdateAllocLimit: no prev!");
4314 /* Growing the existing filesystem */
4315 else if ((new_end_block
> hfsmp
->offset_block_end
) &&
4316 (hfsmp
->extent_tree_flags
& HFS_ALLOC_RB_ACTIVE
)) {
4321 printf ("UpdateAllocLimit: Validating rbtree prior to growth\n");
4322 hfs_validate_rbtree (hfsmp
, 0, hfsmp
->offset_block_end
);
4326 retval
= GenerateTree (hfsmp
, new_end_block
, &flags
, 0);
4329 * Don't forget to update offset_block_end after a successful tree extension.
4334 printf ("UpdateAllocLimit: Validating rbtree after growth\n");
4335 hfs_validate_rbtree (hfsmp
, 0, new_end_block
);
4338 hfsmp
->offset_block_end
= new_end_block
;
4343 /* Otherwise, do nothing. fall through to the code below. */
4344 printf ("error : off_block_end: %d, alloclimit: %d, new_end_block: %d\n",
4345 hfsmp
->offset_block_end
,hfsmp
->allocLimit
, new_end_block
);
4354 * Remove an extent from the list of free extents.
4356 * This is a low-level routine. It does not handle overlaps or splitting;
4357 * that is the responsibility of the caller. The input extent must exactly
4358 * match an extent already in the list; it will be removed, and any following
4359 * extents in the list will be shifted up.
4362 * startBlock - Start of extent to remove
4363 * blockCount - Number of blocks in extent to remove
4366 * The index of the extent that was removed.
4368 static void remove_free_extent_list(struct hfsmount
*hfsmp
, int index
)
4370 if (index
< 0 || (uint32_t)index
>= hfsmp
->vcbFreeExtCnt
) {
4372 panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp
, index
, hfsmp
->vcbFreeExtCnt
);
4374 printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp
, index
, hfsmp
->vcbFreeExtCnt
);
4377 int shift_count
= hfsmp
->vcbFreeExtCnt
- index
- 1;
4378 if (shift_count
> 0) {
4379 memmove(&hfsmp
->vcbFreeExt
[index
], &hfsmp
->vcbFreeExt
[index
+1], shift_count
* sizeof(hfsmp
->vcbFreeExt
[0]));
4381 hfsmp
->vcbFreeExtCnt
--;
4386 * Add an extent to the list of free extents.
4388 * This is a low-level routine. It does not handle overlaps or coalescing;
4389 * that is the responsibility of the caller. This routine *does* make
4390 * sure that the extent it is adding is inserted in the correct location.
4391 * If the list is full, this routine will handle either removing the last
4392 * extent in the list to make room for the new extent, or ignoring the
4393 * new extent if it is "worse" than the last extent in the list.
4396 * startBlock - Start of extent to add
4397 * blockCount - Number of blocks in extent to add
4400 * The index where the extent that was inserted, or kMaxFreeExtents
4401 * if the extent was not inserted (the list was full, and the extent
4402 * being added was "worse" than everything in the list).
4404 static int add_free_extent_list(struct hfsmount
*hfsmp
, u_int32_t startBlock
, u_int32_t blockCount
)
4408 /* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */
4410 uint32_t endBlock
= startBlock
+ blockCount
;
4411 for (i
= 0; i
< hfsmp
->vcbFreeExtCnt
; ++i
) {
4412 if (endBlock
< hfsmp
->vcbFreeExt
[i
].startBlock
||
4413 startBlock
> (hfsmp
->vcbFreeExt
[i
].startBlock
+ hfsmp
->vcbFreeExt
[i
].blockCount
)) {
4416 panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d",
4417 hfsmp
, startBlock
, blockCount
, hfsmp
->vcbFreeExt
[i
].startBlock
, hfsmp
->vcbFreeExt
[i
].blockCount
, i
);
4421 /* Figure out what index the new extent should be inserted at. */
4422 for (i
= 0; i
< hfsmp
->vcbFreeExtCnt
; ++i
) {
4423 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
4424 /* The list is sorted by increasing offset. */
4425 if (startBlock
< hfsmp
->vcbFreeExt
[i
].startBlock
) {
4429 /* The list is sorted by decreasing size. */
4430 if (blockCount
> hfsmp
->vcbFreeExt
[i
].blockCount
) {
4436 /* When we get here, i is the index where the extent should be inserted. */
4437 if (i
== kMaxFreeExtents
) {
4439 * The new extent is worse than anything already in the list,
4440 * and the list is full, so just ignore the extent to be added.
4446 * Grow the list (if possible) to make room for an insert.
4448 if (hfsmp
->vcbFreeExtCnt
< kMaxFreeExtents
)
4449 hfsmp
->vcbFreeExtCnt
++;
4452 * If we'll be keeping any extents after the insert position, then shift them.
4454 int shift_count
= hfsmp
->vcbFreeExtCnt
- i
- 1;
4455 if (shift_count
> 0) {
4456 memmove(&hfsmp
->vcbFreeExt
[i
+1], &hfsmp
->vcbFreeExt
[i
], shift_count
* sizeof(hfsmp
->vcbFreeExt
[0]));
4459 /* Finally, store the new extent at its correct position. */
4460 hfsmp
->vcbFreeExt
[i
].startBlock
= startBlock
;
4461 hfsmp
->vcbFreeExt
[i
].blockCount
= blockCount
;
4467 * Remove an entry from free extent cache after it has been allocated.
4469 * This is a high-level routine. It handles removing a portion of a
4470 * cached extent, potentially splitting it into two (if the cache was
4471 * already full, throwing away the extent that would sort last). It
4472 * also handles removing an extent that overlaps multiple extents in
4476 * hfsmp - mount point structure
4477 * startBlock - starting block of the extent to be removed.
4478 * blockCount - number of blocks of the extent to be removed.
4480 static void remove_free_extent_cache(struct hfsmount
*hfsmp
, u_int32_t startBlock
, u_int32_t blockCount
)
4482 u_int32_t i
, insertedIndex
;
4483 u_int32_t currentStart
, currentEnd
, endBlock
;
4484 int extentsRemoved
= 0;
4486 #if CONFIG_HFS_ALLOC_RBTREE
4487 /* If red-black tree is enabled, no free extent cache is necessary */
4488 if (hfs_isrbtree_active(hfsmp
) == true) {
4493 if (hfs_kdebug_allocation
& HFSDBG_EXT_CACHE_ENABLED
)
4494 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE
| DBG_FUNC_START
, startBlock
, blockCount
, 0, 0, 0);
4496 endBlock
= startBlock
+ blockCount
;
4498 lck_spin_lock(&hfsmp
->vcbFreeExtLock
);
4501 * Iterate over all of the extents in the free extent cache, removing or
4502 * updating any entries that overlap with the input extent.
4504 for (i
= 0; i
< hfsmp
->vcbFreeExtCnt
; ++i
) {
4505 currentStart
= hfsmp
->vcbFreeExt
[i
].startBlock
;
4506 currentEnd
= currentStart
+ hfsmp
->vcbFreeExt
[i
].blockCount
;
4509 * If the current extent is entirely before or entirely after the
4510 * the extent to be removed, then we keep it as-is.
4512 if (currentEnd
<= startBlock
|| currentStart
>= endBlock
) {
4517 * If the extent being removed entirely contains the current extent,
4518 * then remove the current extent.
4520 if (startBlock
<= currentStart
&& endBlock
>= currentEnd
) {
4521 remove_free_extent_list(hfsmp
, i
);
4524 * We just removed the extent at index i. The extent at
4525 * index i+1 just got shifted to index i. So decrement i
4526 * to undo the loop's "++i", and the next iteration will
4527 * examine index i again, which contains the next extent
4536 * If the extent being removed is strictly "in the middle" of the
4537 * current extent, then we need to split the current extent into
4538 * two discontiguous extents (the "head" and "tail"). The good
4539 * news is that we don't need to examine any other extents in
4542 if (startBlock
> currentStart
&& endBlock
< currentEnd
) {
4543 remove_free_extent_list(hfsmp
, i
);
4544 add_free_extent_list(hfsmp
, currentStart
, startBlock
- currentStart
);
4545 add_free_extent_list(hfsmp
, endBlock
, currentEnd
- endBlock
);
4550 * The only remaining possibility is that the extent to be removed
4551 * overlaps the start or end (but not both!) of the current extent.
4552 * So we need to replace the current extent with a shorter one.
4554 * The only tricky part is that the updated extent might be at a
4555 * different index than the original extent. If the updated extent
4556 * was inserted after the current extent, then we need to re-examine
4557 * the entry at index i, since it now contains the extent that was
4558 * previously at index i+1. If the updated extent was inserted
4559 * before or at the same index as the removed extent, then the
4560 * following extents haven't changed position.
4562 remove_free_extent_list(hfsmp
, i
);
4563 if (startBlock
> currentStart
) {
4564 /* Remove the tail of the current extent. */
4565 insertedIndex
= add_free_extent_list(hfsmp
, currentStart
, startBlock
- currentStart
);
4567 /* Remove the head of the current extent. */
4568 insertedIndex
= add_free_extent_list(hfsmp
, endBlock
, currentEnd
- endBlock
);
4570 if (insertedIndex
> i
) {
4571 --i
; /* Undo the "++i" in the loop, so we examine the entry at index i again. */
4575 lck_spin_unlock(&hfsmp
->vcbFreeExtLock
);
4577 sanity_check_free_ext(hfsmp
, 0);
4579 if (hfs_kdebug_allocation
& HFSDBG_EXT_CACHE_ENABLED
)
4580 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE
| DBG_FUNC_END
, 0, 0, 0, extentsRemoved
, 0);
4587 * Add an entry to free extent cache after it has been deallocated.
4589 * This is a high-level routine. It will merge overlapping or contiguous
4590 * extents into a single, larger extent.
4592 * If the extent provided has blocks beyond current allocLimit, it is
4593 * clipped to allocLimit (so that we won't accidentally find and allocate
4594 * space beyond allocLimit).
4597 * hfsmp - mount point structure
4598 * startBlock - starting block of the extent to be removed.
4599 * blockCount - number of blocks of the extent to be removed.
4602 * true - if the extent was added successfully to the list
4603 * false - if the extent was not added to the list, maybe because
4604 * the extent was beyond allocLimit, or is not best
4605 * candidate to be put in the cache.
4607 static Boolean
add_free_extent_cache(struct hfsmount
*hfsmp
, u_int32_t startBlock
, u_int32_t blockCount
)
4609 Boolean retval
= false;
4611 uint32_t currentEnd
;
4614 if (hfs_kdebug_allocation
& HFSDBG_EXT_CACHE_ENABLED
)
4615 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE
| DBG_FUNC_START
, startBlock
, blockCount
, 0, 0, 0);
4618 * If using the red-black tree allocator, then there's no need to special case
4619 * for the sparse device case. We'll simply add the region we've recently freed
4620 * to the red-black tree, where it will get sorted by offset and length. The only special
4621 * casing will need to be done on the allocation side, where we may favor free extents
4622 * based on offset even if it will cause fragmentation. This may be true, for example, if
4623 * we are trying to reduce the number of bandfiles created in a sparse bundle disk image.
4625 #if CONFIG_HFS_ALLOC_RBTREE
4626 if (hfs_isrbtree_active(hfsmp
) == true) {
4627 goto out_not_locked
;
4631 /* No need to add extent that is beyond current allocLimit */
4632 if (startBlock
>= hfsmp
->allocLimit
) {
4633 goto out_not_locked
;
4636 /* If end of the free extent is beyond current allocLimit, clip the extent */
4637 if ((startBlock
+ blockCount
) > hfsmp
->allocLimit
) {
4638 blockCount
= hfsmp
->allocLimit
- startBlock
;
4641 lck_spin_lock(&hfsmp
->vcbFreeExtLock
);
4644 * Make a pass through the free extent cache, looking for known extents that
4645 * overlap or are contiguous with the extent to be added. We'll remove those
4646 * extents from the cache, and incorporate them into the new extent to be added.
4648 endBlock
= startBlock
+ blockCount
;
4649 for (i
=0; i
< hfsmp
->vcbFreeExtCnt
; ++i
) {
4650 currentEnd
= hfsmp
->vcbFreeExt
[i
].startBlock
+ hfsmp
->vcbFreeExt
[i
].blockCount
;
4651 if (hfsmp
->vcbFreeExt
[i
].startBlock
> endBlock
|| currentEnd
< startBlock
) {
4652 /* Extent i does not overlap and is not contiguous, so keep it. */
4655 /* We need to remove extent i and combine it with the input extent. */
4656 if (hfsmp
->vcbFreeExt
[i
].startBlock
< startBlock
)
4657 startBlock
= hfsmp
->vcbFreeExt
[i
].startBlock
;
4658 if (currentEnd
> endBlock
)
4659 endBlock
= currentEnd
;
4661 remove_free_extent_list(hfsmp
, i
);
4663 * We just removed the extent at index i. The extent at
4664 * index i+1 just got shifted to index i. So decrement i
4665 * to undo the loop's "++i", and the next iteration will
4666 * examine index i again, which contains the next extent
4672 add_free_extent_list(hfsmp
, startBlock
, endBlock
- startBlock
);
4674 lck_spin_unlock(&hfsmp
->vcbFreeExtLock
);
4677 sanity_check_free_ext(hfsmp
, 0);
4679 if (hfs_kdebug_allocation
& HFSDBG_EXT_CACHE_ENABLED
)
4680 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE
| DBG_FUNC_END
, 0, 0, 0, retval
, 0);
4685 /* Debug function to check if the free extent cache is good or not */
4686 static void sanity_check_free_ext(struct hfsmount
*hfsmp
, int check_allocated
)
4690 /* Do not do anything if debug is not on, or if we're using the red-black tree */
4691 if ((ALLOC_DEBUG
== 0) || (hfs_isrbtree_active(hfsmp
) == true)) {
4695 lck_spin_lock(&hfsmp
->vcbFreeExtLock
);
4697 if (hfsmp
->vcbFreeExtCnt
> kMaxFreeExtents
)
4698 panic("hfs: %p: free extent count (%u) is too large", hfsmp
, hfsmp
->vcbFreeExtCnt
);
4701 * Iterate the Free extent cache and ensure no entries are bogus or refer to
4704 for(i
=0; i
< hfsmp
->vcbFreeExtCnt
; i
++) {
4705 u_int32_t start
, nblocks
;
4707 start
= hfsmp
->vcbFreeExt
[i
].startBlock
;
4708 nblocks
= hfsmp
->vcbFreeExt
[i
].blockCount
;
4710 //printf ("hfs: %p: slot:%d (%u,%u)\n", hfsmp, i, start, nblocks);
4712 /* Check if any of the blocks in free extent cache are allocated.
4713 * This should not be enabled always because it might take
4714 * very long for large extents that get added to the list.
4716 * We have to drop vcbFreeExtLock while we call hfs_isallocated
4717 * because it is going to do I/O. Note that the free extent
4718 * cache could change. That's a risk we take when using this
4719 * debugging code. (Another alternative would be to try to
4720 * detect when the free extent cache changed, and perhaps
4721 * restart if the list changed while we dropped the lock.)
4723 if (check_allocated
) {
4724 lck_spin_unlock(&hfsmp
->vcbFreeExtLock
);
4725 if (hfs_isallocated(hfsmp
, start
, nblocks
)) {
4726 panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
4727 hfsmp
, i
, start
, nblocks
);
4729 lck_spin_lock(&hfsmp
->vcbFreeExtLock
);
4732 /* Check if any part of the extent is beyond allocLimit */
4733 if ((start
> hfsmp
->allocLimit
) || ((start
+ nblocks
) > hfsmp
->allocLimit
)) {
4734 panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
4735 hfsmp
, i
, start
, nblocks
, hfsmp
->allocLimit
);
4738 /* Check if there are any duplicate start blocks */
4739 for(j
=i
+1; j
< hfsmp
->vcbFreeExtCnt
; j
++) {
4740 if (start
== hfsmp
->vcbFreeExt
[j
].startBlock
) {
4741 panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n",
4742 hfsmp
, i
, start
, nblocks
, j
, hfsmp
->vcbFreeExt
[j
].startBlock
,
4743 hfsmp
->vcbFreeExt
[j
].blockCount
);
4747 /* Check if the entries are out of order */
4748 if ((i
+1) != hfsmp
->vcbFreeExtCnt
) {
4749 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
4750 /* sparse devices are sorted by starting block number (ascending) */
4751 if (hfsmp
->vcbFreeExt
[i
].startBlock
> hfsmp
->vcbFreeExt
[i
+1].startBlock
) {
4752 panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n",
4753 hfsmp
, i
, start
, nblocks
, i
+1, hfsmp
->vcbFreeExt
[i
+1].startBlock
,
4754 hfsmp
->vcbFreeExt
[i
+1].blockCount
);
4757 /* normally sorted by block count (descending) */
4758 if (hfsmp
->vcbFreeExt
[i
].blockCount
< hfsmp
->vcbFreeExt
[i
+1].blockCount
) {
4759 panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n",
4760 hfsmp
, i
, start
, nblocks
, i
+1, hfsmp
->vcbFreeExt
[i
+1].startBlock
,
4761 hfsmp
->vcbFreeExt
[i
+1].blockCount
);
4766 lck_spin_unlock(&hfsmp
->vcbFreeExtLock
);