]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfscommon/Misc/VolumeAllocation.c
a8a874c90c44cbf6fe4908e2ca48520c8c158e1c
[apple/xnu.git] / bsd / hfs / hfscommon / Misc / VolumeAllocation.c
1 /*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 File: VolumeAllocation.c
30
31 Contains: Routines for accessing and modifying the volume bitmap.
32
33 Version: HFS Plus 1.0
34
35 Copyright: � 1996-2009 by Apple Computer, Inc., all rights reserved.
36
37 */
38
39 /*
40 Public routines:
41 BlockAllocate
42 Allocate space on a volume. Can allocate space contiguously.
43 If not contiguous, then allocation may be less than what was
44 asked for. Returns the starting block number, and number of
45 blocks. (Will only do a single extent???)
46 BlockDeallocate
47 Deallocate a contiguous run of allocation blocks.
48
49 BlockMarkAllocated
50 Exported wrapper to mark blocks as in-use. This will correctly determine
51 whether or not the red-black tree is enabled and call the appropriate function
52 if applicable.
53 BlockMarkFree
54 Exported wrapper to mark blocks as freed. This will correctly determine whether or
55 not the red-black tree is enabled and call the appropriate function if applicable.
56
57
58 ResetVCBFreeExtCache
59 Since the red-black tree obviates the need to maintain the free extent cache, we do
60 not update it if the tree is also live. As a result, if we ever need to destroy the trees
61 we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
62 bitmap scanning allocator.
63 We also reset and disable the free extent cache when volume resizing is
64 in flight.
65
66 UpdateAllocLimit
67 Adjusts the AllocLimit field in the hfs mount point. This is used when we need to prevent
68 allocations from occupying space in the region we are modifying during a filesystem resize.
69 At other times, it should be consistent with the total number of allocation blocks in the
70 filesystem. It is also used to shrink or grow the number of blocks that the red-black tree should
71 know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
72 number of items in the tree that we can allocate from.
73
74 UnmapBlocks
75 Issues DKIOCUNMAPs to the device as it fills the internal volume buffer when iterating
76 the volume bitmap.
77
78 Internal routines:
79 Note that the RBTree routines are guarded by a cpp check for CONFIG_HFS_ALLOC_RBTREE. This
80 is to cut down on space for functions that could not possibly be used if they are not planning to
81 use the red-black tree code.
82
83 BlockMarkFreeRBTree
84 Make an internal call to BlockMarkFree and then update
85 and/or create Red-Black Tree allocation tree nodes to correspond
86 to the free space being generated.
87 BlockMarkFreeInternal
88 Mark a contiguous range of blocks as free. The corresponding
89 bits in the volume bitmap will be cleared. This will actually do the work
90 of modifying the bitmap for us.
91
92 BlockMarkAllocatedRBTree
93 Make an internal call to BlockAllocateMarked, which will update the
94 bitmap on-disk when we allocate blocks. If that is successful, then
95 we'll remove the appropriate entries from the red-black tree.
96 BlockMarkAllocatedInternal
97 Mark a contiguous range of blocks as allocated. The cor-
98 responding bits in the volume bitmap are set. Also tests to see
99 if any of the blocks were previously unallocated.
100 BlockFindContiguous
101 Find a contiguous range of blocks of a given size. The caller
102 specifies where to begin the search (by block number). The
103 block number of the first block in the range is returned. This is only
104 called by the bitmap scanning logic as the red-black tree should be able
105 to do this internally by searching its tree.
106 BlockAllocateAny
107 Find and allocate a contiguous range of blocks up to a given size. The
108 first range of contiguous free blocks found are allocated, even if there
109 are fewer blocks than requested (and even if a contiguous range of blocks
110 of the given size exists elsewhere).
111 BlockAllocateAnyBitmap
112 Finds a range of blocks per the above requirements without using the
113 Allocation RB Tree. This relies on the bitmap-scanning logic in order to find
114 any valid range of free space needed.
115 BlockAllocateAnyRBTree
116 Finds a valid range of blocks per the above requirements by searching
117 the red-black tree. We can just make an internal call to
118 BlockAllocateContigRBTree to find the valid range.
119 BlockAllocateContig
120 Find and allocate a contiguous range of blocks of a given size. If
121 a contiguous range of free blocks of the given size isn't found, then
122 the allocation fails (i.e. it is "all or nothing"). This routine is
123 essentially a wrapper function around its related sub-functions,
124 BlockAllocateContigBitmap and BlockAllocateContigRBTree, which use,
125 respectively, the original HFS+ bitmap scanning logic and the new
126 Red-Black Tree to search and manage free-space decisions. This function
127 contains logic for when to use which of the allocation algorithms,
128 depending on the free space contained in the volume.
129 BlockAllocateContigBitmap
130 Finds and allocates a range of blocks specified by the size parameters
131 using the original HFS+ bitmap scanning logic. The red-black tree
132 will not be updated if this function is used.
133 BlockAllocateContigRBTree
134 Finds and allocates a range of blocks specified by the size parameters
135 using the new red/black tree data structure and search algorithms
136 provided by the tree library. Updates the red/black tree nodes after
137 the on-disk data structure (bitmap) has been updated.
138 BlockAllocateKnown
139 Try to allocate space from known free space in the volume's
140 free extent cache.
141
142 ReadBitmapBlock
143 Given an allocation block number, read the bitmap block that
144 contains that allocation block into a caller-supplied buffer.
145
146 ReleaseBitmapBlock
147 Release a bitmap block back into the buffer cache.
148
149 remove_free_extent_cache
150 Remove an extent from the free extent cache. Handles overlaps
151 with multiple extents in the cache, and handles splitting an
152 extent in the cache if the extent to be removed is in the middle
153 of a cached extent.
154
155 add_free_extent_cache
156 Add an extent to the free extent cache. It will merge the
157 input extent with extents already in the cache.
158
159
160 Debug/Test Routines
161 hfs_isallocated
162 Test to see if any blocks in a range are allocated. Journal or
163 allocation file lock must be held.
164
165 hfs_isallocated_scan
166 Test to see if any blocks in a range are allocated. Releases and
167 invalidates the block used when finished.
168
169 hfs_isrbtree_active
170 Test to see if the allocation red-black tree is live. This function
171 requires either an exclusive or shared lock on the allocation bitmap file
172 in the HFS mount structure, to prevent red-black tree pointers from disappearing.
173
174 hfs_isrbtree_allocated
175 Test to see if the specified extent is marked as allocated in the red-black tree.
176 Multiplexes between the metadata zone trees and the normal allocation zone trees
177 depending on the offset of the extent specified.
178
179 check_rbtree_extents
180 Void function that wraps around the above function (hfs_isrbtree_allocated)
181 and checks to see that the return value was appropriate based on the assertion we're
182 trying to validate (whether or not the specified extent should be marked as free
183 or allocated).
184
185 hfs_validate_rbtree
186 Exhaustive search function that will check every allocation block for its status in the
187 red-black tree and then check the corresponding status in the bitmap file. If the two are out
188 of sync, it will panic. Note that this function is extremely expensive and must NEVER
189 be run outside of debug code.
190
191 hfs_checktreelinks
192 Checks the embedded linked list structure of the red black tree for integrity. The next pointer
193 should always point to whatever extent_tree_offset_next returns.
194
195
196 Red Black Tree Specific Routines
197 GenerateTree
198 Build a red-black tree for the given filesystem's bitmap.
199
200 DestroyTrees
201 Destroy the tree on the given filesystem
202
203
204 hfs_alloc_scan_block
205 Given a starting allocation block number, figures out which physical block contains that
206 allocation block's bit, and scans it from the starting bit until either the ending bit or
207 the end of the block. Free space extents are inserted into the appropriate red-black tree.
208
209 */
210
211 #include "../../hfs_macos_defs.h"
212
213 #include <sys/types.h>
214 #include <sys/buf.h>
215 #include <sys/systm.h>
216 #include <sys/sysctl.h>
217 #include <sys/disk.h>
218 #include <sys/ubc.h>
219 #include <sys/uio.h>
220 #include <kern/kalloc.h>
221 /* For VM Page size */
222 #include <libkern/libkern.h>
223
224 #include "../../hfs.h"
225 #include "../../hfs_dbg.h"
226 #include "../../hfs_format.h"
227 #include "../../hfs_endian.h"
228 #include "../../hfs_macos_defs.h"
229 #include "../headers/FileMgrInternal.h"
230 #include "../headers/HybridAllocator.h"
231 #include "../../hfs_kdebug.h"
232
233 /* Headers for unmap-on-mount support */
234 #include <vfs/vfs_journal.h>
235 #include <sys/disk.h>
236
237 #ifndef CONFIG_HFS_TRIM
238 #define CONFIG_HFS_TRIM 0
239 #endif
240
241 /*
242 * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
243 * KERNEL_DEBUG_CONSTANT events are enabled at runtime. (They're
244 * disabled by default because there can be a lot of these events,
245 * and we don't want to overwhelm the kernel debug buffer. If you
246 * want to watch these events in particular, just set the sysctl.)
247 */
248 static int hfs_kdebug_allocation = 0;
249 SYSCTL_DECL(_vfs_generic);
250 SYSCTL_NODE(_vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system");
251 SYSCTL_NODE(_vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug");
252 SYSCTL_INT(_vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations");
253 enum {
254 /*
255 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
256 * BlockDeallocate, including the internal BlockAllocateXxx
257 * routines so we can see how an allocation was satisfied.
258 *
259 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
260 * free extent cache.
261 *
262 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
263 *
264 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
265 * or clearing bits, scanning the bitmap).
266 */
267 HFSDBG_ALLOC_ENABLED = 1,
268 HFSDBG_EXT_CACHE_ENABLED = 2,
269 HFSDBG_UNMAP_ENABLED = 4,
270 HFSDBG_BITMAP_ENABLED = 8
271 };
272
273 enum {
274 kBytesPerWord = 4,
275 kBitsPerByte = 8,
276 kBitsPerWord = 32,
277
278 kBitsWithinWordMask = kBitsPerWord-1
279 };
280
281 #define kLowBitInWordMask 0x00000001ul
282 #define kHighBitInWordMask 0x80000000ul
283 #define kAllBitsSetInWord 0xFFFFFFFFul
284
285
286 #define ALLOC_DEBUG 0
287
288 static OSErr ReadBitmapBlock(
289 ExtendedVCB *vcb,
290 u_int32_t bit,
291 u_int32_t **buffer,
292 uintptr_t *blockRef);
293
294 static OSErr ReleaseBitmapBlock(
295 ExtendedVCB *vcb,
296 uintptr_t blockRef,
297 Boolean dirty);
298
299 static OSErr BlockAllocateAny(
300 ExtendedVCB *vcb,
301 u_int32_t startingBlock,
302 u_int32_t endingBlock,
303 u_int32_t maxBlocks,
304 Boolean useMetaZone,
305 u_int32_t *actualStartBlock,
306 u_int32_t *actualNumBlocks);
307
308 static OSErr BlockAllocateAnyBitmap(
309 ExtendedVCB *vcb,
310 u_int32_t startingBlock,
311 u_int32_t endingBlock,
312 u_int32_t maxBlocks,
313 Boolean useMetaZone,
314 u_int32_t *actualStartBlock,
315 u_int32_t *actualNumBlocks);
316
317 static OSErr BlockAllocateContig(
318 ExtendedVCB *vcb,
319 u_int32_t startingBlock,
320 u_int32_t minBlocks,
321 u_int32_t maxBlocks,
322 Boolean useMetaZone,
323 u_int32_t *actualStartBlock,
324 u_int32_t *actualNumBlocks);
325
326 static OSErr BlockAllocateContigBitmap(
327 ExtendedVCB *vcb,
328 u_int32_t startingBlock,
329 u_int32_t minBlocks,
330 u_int32_t maxBlocks,
331 Boolean useMetaZone,
332 u_int32_t *actualStartBlock,
333 u_int32_t *actualNumBlocks);
334
335 static OSErr BlockFindContiguous(
336 ExtendedVCB *vcb,
337 u_int32_t startingBlock,
338 u_int32_t endingBlock,
339 u_int32_t minBlocks,
340 u_int32_t maxBlocks,
341 Boolean useMetaZone,
342 u_int32_t *actualStartBlock,
343 u_int32_t *actualNumBlocks);
344
345 static OSErr BlockAllocateKnown(
346 ExtendedVCB *vcb,
347 u_int32_t maxBlocks,
348 u_int32_t *actualStartBlock,
349 u_int32_t *actualNumBlocks);
350
351 static OSErr BlockMarkAllocatedInternal (
352 ExtendedVCB *vcb,
353 u_int32_t startingBlock,
354 register u_int32_t numBlocks);
355
356 static OSErr BlockMarkFreeInternal(
357 ExtendedVCB *vcb,
358 u_int32_t startingBlock,
359 u_int32_t numBlocks,
360 Boolean do_validate);
361
362
363 static OSErr ReleaseScanBitmapBlock( struct buf *bp );
364
365 static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset,
366 u_int32_t numBlocks, struct jnl_trim_list *list);
367
368 static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list);
369
370 static int hfs_alloc_scan_block(struct hfsmount *hfsmp,
371 u_int32_t startbit,
372 u_int32_t endBit,
373 u_int32_t *bitToScan,
374 struct jnl_trim_list *list);
375
376 int hfs_isallocated_scan (struct hfsmount *hfsmp,
377 u_int32_t startingBlock,
378 u_int32_t *bp_buf);
379
380 #if CONFIG_HFS_ALLOC_RBTREE
381 static OSErr BlockAllocateAnyRBTree(
382 ExtendedVCB *vcb,
383 u_int32_t startingBlock,
384 u_int32_t maxBlocks,
385 Boolean useMetaZone,
386 u_int32_t *actualStartBlock,
387 u_int32_t *actualNumBlocks);
388
389 static OSErr BlockAllocateContigRBTree(
390 ExtendedVCB *vcb,
391 u_int32_t startingBlock,
392 u_int32_t minBlocks,
393 u_int32_t maxBlocks,
394 Boolean useMetaZone,
395 u_int32_t *actualStartBlock,
396 u_int32_t *actualNumBlocks,
397 u_int32_t forceContig);
398
399 static OSErr BlockMarkAllocatedRBTree(
400 ExtendedVCB *vcb,
401 u_int32_t startingBlock,
402 u_int32_t numBlocks);
403
404 static OSErr BlockMarkFreeRBTree(
405 ExtendedVCB *vcb,
406 u_int32_t startingBlock,
407 u_int32_t numBlocks);
408
409 static int
410 hfs_isrbtree_allocated (struct hfsmount * hfsmp,
411 u_int32_t startBlock,
412 u_int32_t numBlocks,
413 extent_node_t** node1);
414
415 extern void
416 hfs_validate_rbtree (struct hfsmount *hfsmp,
417 u_int32_t start,
418 u_int32_t end);
419
420 static void hfs_checktreelinks (struct hfsmount *hfsmp);
421
422
423 void check_rbtree_extents (struct hfsmount *hfsmp,
424 u_int32_t start,
425 u_int32_t numBlocks,
426 int shouldBeFree);
427
428 #define ASSERT_FREE 1
429 #define ASSERT_ALLOC 0
430
431 #endif /* CONFIG_HFS_ALLOC_RBTREE */
432
433 /* Functions for manipulating free extent cache */
434 static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
435 static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
436 static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
437
438 #if ALLOC_DEBUG
439 /*
440 * Validation Routine to verify that the TRIM list maintained by the journal
441 * is in good shape relative to what we think the bitmap should have. We should
442 * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
443 * we panic.
444 */
445 int trim_validate_bitmap (struct hfsmount *hfsmp);
446 int trim_validate_bitmap (struct hfsmount *hfsmp) {
447 u_int64_t blockno_offset;
448 u_int64_t numblocks;
449 int i;
450 int count;
451 u_int32_t startblk;
452 u_int32_t blks;
453 int err = 0;
454 uint32_t alloccount = 0;
455
456 if (hfsmp->jnl) {
457 struct journal *jnl = (struct journal*)hfsmp->jnl;
458 if (jnl->active_tr) {
459 struct jnl_trim_list *trim = &(jnl->active_tr->trim);
460 count = trim->extent_count;
461 for (i = 0; i < count; i++) {
462 blockno_offset = trim->extents[i].offset;
463 blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
464 blockno_offset = blockno_offset / hfsmp->blockSize;
465 numblocks = trim->extents[i].length / hfsmp->blockSize;
466
467 startblk = (u_int32_t)blockno_offset;
468 blks = (u_int32_t) numblocks;
469 err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);
470
471 if (err == 0 && alloccount != 0) {
472 panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
473 }
474 }
475 }
476 }
477 return 0;
478 }
479
480 #endif
481
482
483 /*
484 ;________________________________________________________________________________
485 ;
486 ; Routine: hfs_unmap_free_extent
487 ;
488 ; Function: Make note of a range of allocation blocks that should be
489 ; unmapped (trimmed). That is, the given range of blocks no
490 ; longer have useful content, and the device can unmap the
491 ; previous contents. For example, a solid state disk may reuse
492 ; the underlying storage for other blocks.
493 ;
494 ; This routine is only supported for journaled volumes. The extent
495 ; being freed is passed to the journal code, and the extent will
496 ; be unmapped after the current transaction is written to disk.
497 ;
498 ; Input Arguments:
499 ; hfsmp - The volume containing the allocation blocks.
500 ; startingBlock - The first allocation block of the extent being freed.
501 ; numBlocks - The number of allocation blocks of the extent being freed.
502 ;________________________________________________________________________________
503 */
504 static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
505 {
506 u_int64_t offset;
507 u_int64_t length;
508 u_int64_t device_sz;
509 int err = 0;
510
511 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
512 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
513
514 if (ALLOC_DEBUG) {
515 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
516 panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks);
517 }
518 }
519
520 if (hfsmp->jnl != NULL) {
521 device_sz = hfsmp->hfs_logical_bytes;
522 offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
523 length = (u_int64_t) numBlocks * hfsmp->blockSize;
524
525 /* Validate that the trim is in a valid range of bytes */
526 if ((offset >= device_sz) || ((offset + length) > device_sz)) {
527 printf("hfs_unmap_free_ext: ignoring trim @ off %lld len %lld \n", offset, length);
528 err = EINVAL;
529 }
530
531 if (err == 0) {
532 err = journal_trim_add_extent(hfsmp->jnl, offset, length);
533 if (err) {
534 printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent", err);
535 }
536 }
537 }
538
539 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
540 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0);
541 }
542
543
544
545 /*
546 ;________________________________________________________________________________
547 ;
548 ; Routine: hfs_track_unmap_blocks
549 ;
550 ; Function: Make note of a range of allocation blocks that should be
551 ; unmapped (trimmed). That is, the given range of blocks no
552 ; longer have useful content, and the device can unmap the
553 ; previous contents. For example, a solid state disk may reuse
554 ; the underlying storage for other blocks.
555 ;
556 ; This routine is only supported for journaled volumes.
557 ;
558 ; *****NOTE*****:
559 ; This function should *NOT* be used when the volume is fully
560 ; mounted. This function is intended to support a bitmap iteration
561 ; at mount time to fully inform the SSD driver of the state of all blocks
562 ; at mount time, and assumes that there is no allocation/deallocation
563 ; interference during its iteration.,
564 ;
565 ; Input Arguments:
566 ; hfsmp - The volume containing the allocation blocks.
567 ; offset - The first allocation block of the extent being freed.
568 ; numBlocks - The number of allocation blocks of the extent being freed.
569 ; list - The list of currently tracked trim ranges.
570 ;________________________________________________________________________________
571 */
572 static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start,
573 u_int32_t numBlocks, struct jnl_trim_list *list) {
574
575 u_int64_t offset;
576 u_int64_t length;
577 int error = 0;
578
579 if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) {
580 int extent_no = list->extent_count;
581 offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
582 length = (u_int64_t) numBlocks * hfsmp->blockSize;
583
584
585 list->extents[extent_no].offset = offset;
586 list->extents[extent_no].length = length;
587 list->extent_count++;
588 if (list->extent_count == list->allocated_count) {
589 error = hfs_issue_unmap (hfsmp, list);
590 }
591 }
592
593 return error;
594 }
595
596 /*
597 ;________________________________________________________________________________
598 ;
599 ; Routine: hfs_issue_unmap
600 ;
601 ; Function: Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list
602 ;
603 ; Input Arguments:
604 ; hfsmp - The volume containing the allocation blocks.
605 ; list - The list of currently tracked trim ranges.
606 ;________________________________________________________________________________
607 */
608
609 static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list) {
610 dk_unmap_t unmap;
611 int error = 0;
612
613 if (list->extent_count > 0) {
614 bzero(&unmap, sizeof(unmap));
615 unmap.extents = list->extents;
616 unmap.extentsCount = list->extent_count;
617
618 /* Issue a TRIM and flush them out */
619 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
620
621 bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t)));
622 list->extent_count = 0;
623 }
624 return error;
625 }
626
627
628
629 /*
630 ;________________________________________________________________________________
631 ;
632 ; Routine: hfs_unmap_alloc_extent
633 ;
634 ; Function: Make note of a range of allocation blocks, some of
635 ; which may have previously been passed to hfs_unmap_free_extent,
636 ; is now in use on the volume. The given blocks will be removed
637 ; from any pending DKIOCUNMAP.
638 ;
639 ; Input Arguments:
640 ; hfsmp - The volume containing the allocation blocks.
641 ; startingBlock - The first allocation block of the extent being allocated.
642 ; numBlocks - The number of allocation blocks being allocated.
643 ;________________________________________________________________________________
644 */
645 static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
646 {
647 u_int64_t offset;
648 u_int64_t length;
649 int err;
650
651 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
652 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
653
654 if (hfsmp->jnl != NULL) {
655 offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
656 length = (u_int64_t) numBlocks * hfsmp->blockSize;
657
658 err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
659 if (err) {
660 printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent", err);
661 }
662 }
663
664 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
665 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0);
666 }
667
668
669 /*
670 ;________________________________________________________________________________
671 ;
672 ; Routine: hfs_trim_callback
673 ;
674 ; Function: This function is called when a transaction that freed extents
675 ; (via hfs_unmap_free_extent/journal_trim_add_extent) has been
676 ; written to the on-disk journal. This routine will add those
677 ; extents to the free extent cache so that they can be reused.
678 ;
679 ; CAUTION: This routine is called while the journal's trim lock
680 ; is held shared, so that no other thread can reuse any portion
681 ; of those extents. We must be very careful about which locks
682 ; we take from within this callback, to avoid deadlock. The
683 ; call to add_free_extent_cache will end up taking the cache's
684 ; lock (just long enough to add these extents to the cache).
685 ;
686 ; CAUTION: If the journal becomes invalid (eg., due to an I/O
687 ; error when trying to write to the journal), this callback
688 ; will stop getting called, even if extents got freed before
689 ; the journal became invalid!
690 ;
691 ; Input Arguments:
692 ; arg - The hfsmount of the volume containing the extents.
693 ; extent_count - The number of extents freed in the transaction.
694 ; extents - An array of extents (byte ranges) that were freed.
695 ;________________________________________________________________________________
696 */
697 __private_extern__ void
698 hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
699 {
700 uint32_t i;
701 uint32_t startBlock, numBlocks;
702 struct hfsmount *hfsmp = arg;
703
704 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
705 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0);
706
707 for (i=0; i<extent_count; ++i) {
708 /* Convert the byte range in *extents back to a range of allocation blocks. */
709 startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize;
710 numBlocks = extents[i].length / hfsmp->blockSize;
711 (void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
712 }
713
714 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
715 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0);
716 }
717
718
719 /*
720 ;________________________________________________________________________________
721 ;
722 ; Routine: UnmapBlocks
723 ;
724 ; Function: Traverse the bitmap, and issue DKIOCUNMAPs to the underlying
725 ; device as needed so that the underlying disk device is as
726 ; up-to-date as possible with which blocks are unmapped.
727 ;
728 ; Input Arguments:
729 ; hfsmp - The volume containing the allocation blocks.
730 ;________________________________________________________________________________
731 */
732
733 __private_extern__
734 u_int32_t UnmapBlocks (struct hfsmount *hfsmp) {
735 u_int32_t blocks_scanned = 0;
736 int error = 0;
737 struct jnl_trim_list trimlist;
738
739 /*
740 *struct jnl_trim_list {
741 uint32_t allocated_count;
742 uint32_t extent_count;
743 dk_extent_t *extents;
744 };
745 */
746 bzero (&trimlist, sizeof(trimlist));
747 if (CONFIG_HFS_TRIM) {
748 int alloc_count = PAGE_SIZE / sizeof(dk_extent_t);
749 void *extents = kalloc (alloc_count * sizeof(dk_extent_t));
750 if (extents == NULL) {
751 return ENOMEM;
752 }
753 trimlist.extents = (dk_extent_t*)extents;
754 trimlist.allocated_count = alloc_count;
755 trimlist.extent_count = 0;
756
757
758
759 while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){
760 error = hfs_alloc_scan_block (hfsmp, blocks_scanned, hfsmp->totalBlocks,
761 &blocks_scanned, &trimlist);
762 if (error) {
763 printf("HFS: bitmap unmap scan error: %d\n", error);
764 break;
765 }
766 }
767 if (error == 0) {
768 hfs_issue_unmap(hfsmp, &trimlist);
769 }
770 if (trimlist.extents) {
771 kfree (trimlist.extents, (trimlist.allocated_count * sizeof(dk_extent_t)));
772 }
773 }
774 return error;
775 }
776
777 /*
778 ;________________________________________________________________________________
779 ;
780 ; Routine: BlockAllocate
781 ;
782 ; Function: Allocate space on a volume. If contiguous allocation is requested,
783 ; at least the requested number of bytes will be allocated or an
784 ; error will be returned. If contiguous allocation is not forced,
785 ; the space will be allocated with the first largest extent available
786 ; at the requested starting allocation block. If there is not enough
787 ; room there, a block allocation of less than the requested size will be
788 ; allocated.
789 ;
790 ; If the requested starting block is 0 (for new file allocations),
791 ; the volume's allocation block pointer will be used as a starting
792 ; point.
793 ;
794 ; Input Arguments:
795 ; vcb - Pointer to ExtendedVCB for the volume to allocate space on
796 ; fcb - Pointer to FCB for the file for which storage is being allocated
797 ; startingBlock - Preferred starting allocation block, 0 = no preference
798 ; minBlocks - Number of blocks requested. If the allocation is non-contiguous,
799 ; less than this may actually be allocated
800 ; maxBlocks - The maximum number of blocks to allocate. If there is additional free
801 ; space after bytesRequested, then up to maxBlocks bytes should really
802 ; be allocated. (Used by ExtendFileC to round up allocations to a multiple
803 ; of the file's clump size.)
804 ; flags - Flags to specify options like contiguous, use metadata zone,
805 ; skip free block check, etc.
806 ;
807 ; Output:
808 ; (result) - Error code, zero for successful allocation
809 ; *startBlock - Actual starting allocation block
810 ; *actualBlocks - Actual number of allocation blocks allocated
811 ;
812 ; Side effects:
813 ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
814 ;________________________________________________________________________________
815 */
816 OSErr BlockAllocate (
817 ExtendedVCB *vcb, /* which volume to allocate space on */
818 u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */
819 u_int32_t minBlocks, /* desired number of blocks to allocate */
820 u_int32_t maxBlocks, /* maximum number of blocks to allocate */
821 u_int32_t flags, /* option flags */
822 u_int32_t *actualStartBlock, /* actual first block of allocation */
823 u_int32_t *actualNumBlocks) /* number of blocks actually allocated; if forceContiguous */
824 /* was zero, then this may represent fewer than minBlocks */
825 {
826 u_int32_t freeBlocks;
827 OSErr err;
828 Boolean updateAllocPtr = false; // true if nextAllocation needs to be updated
829 struct hfsmount *hfsmp;
830 Boolean useMetaZone;
831 Boolean forceContiguous;
832
833 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
834 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
835
836 if (flags & HFS_ALLOC_FORCECONTIG) {
837 forceContiguous = true;
838 } else {
839 forceContiguous = false;
840 }
841
842 if (flags & HFS_ALLOC_METAZONE) {
843 useMetaZone = true;
844 } else {
845 useMetaZone = false;
846 }
847
848 //TODO: Figure out when we need to re-enable the RB-Tree.
849
850
851 //TODO: Make sure we use allocLimit when appropriate.
852
853 /*
854 * TODO: Update BlockAllocate and its sub-functions to do cooperative allocation and bitmap scanning
855 * in conjunction with the Generate Tree function. If the red-black tree does not currently contain
856 * an allocation block of appropriate size, then start scanning blocks FOR the tree generation function until
857 * we find what we need. We'll update the tree fields when we're done, indicating that we've advanced the
858 * high water mark for the tree.
859 */
860
861 //
862 // Initialize outputs in case we get an error
863 //
864 *actualStartBlock = 0;
865 *actualNumBlocks = 0;
866 hfsmp = VCBTOHFS (vcb);
867 freeBlocks = hfs_freeblks(hfsmp, 0);
868
869
870 /* Skip free block check if blocks are being allocated for relocating
871 * data during truncating a volume.
872 *
873 * During hfs_truncatefs(), the volume free block count is updated
874 * before relocating data to reflect the total number of free blocks
875 * that will exist on the volume after resize is successful. This
876 * means that we have reserved allocation blocks required for relocating
877 * the data and hence there is no need to check the free blocks.
878 * It will also prevent resize failure when the number of blocks in
879 * an extent being relocated is more than the free blocks that will
880 * exist after the volume is resized.
881 */
882 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
883 // If the disk is already full, don't bother.
884 if (freeBlocks == 0) {
885 err = dskFulErr;
886 goto Exit;
887 }
888 if (forceContiguous && freeBlocks < minBlocks) {
889 err = dskFulErr;
890 goto Exit;
891 }
892
893 /*
894 * Clip if necessary so we don't over-subscribe the free blocks.
895 */
896 if (minBlocks > freeBlocks) {
897 minBlocks = freeBlocks;
898 }
899 if (maxBlocks > freeBlocks) {
900 maxBlocks = freeBlocks;
901 }
902 }
903
904 //
905 // If caller didn't specify a starting block number, then use the volume's
906 // next block to allocate from.
907 //
908 if (startingBlock == 0) {
909 HFS_MOUNT_LOCK(vcb, TRUE);
910
911 /* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
912 if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
913 startingBlock = vcb->sparseAllocation;
914 }
915 else {
916 startingBlock = vcb->nextAllocation;
917 }
918 HFS_MOUNT_UNLOCK(vcb, TRUE);
919 updateAllocPtr = true;
920 }
921
922
923 if (startingBlock >= vcb->allocLimit) {
924 startingBlock = 0; /* overflow so start at beginning */
925 }
926
927 //
928 // If the request must be contiguous, then find a sequence of free blocks
929 // that is long enough. Otherwise, find the first free block.
930 //
931 if (forceContiguous) {
932 err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks,
933 useMetaZone, actualStartBlock, actualNumBlocks);
934 /*
935 * If we allocated from a new position then also update the roving allocator.
936 * This will keep the roving allocation pointer up-to-date even
937 * if we are using the new R/B tree allocator, since
938 * it doesn't matter to us here, how the underlying allocator found
939 * the block to vend out.
940 */
941 if ((err == noErr) &&
942 (*actualStartBlock > startingBlock) &&
943 ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
944 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
945 updateAllocPtr = true;
946 }
947 } else {
948 #if CONFIG_HFS_ALLOC_RBTREE
949 /*
950 * If the RB-Tree Allocator is live, just go straight for a
951 * BlockAllocateAny call and return the result. Otherwise,
952 * resort to the bitmap scanner.
953 */
954 if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
955 /* Start by trying to allocate from the starting block forward */
956 err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit,
957 maxBlocks, useMetaZone, actualStartBlock,
958 actualNumBlocks);
959
960 /*
961 * Because the RB-Tree is live, the previous call to BlockAllocateAny
962 * will use the rbtree variant. As a result, it will automatically search the
963 * metadata zone for a valid extent if needed. If we get a return value of
964 * noErr, we found a valid extent and we can skip to the end. If the error indicates
965 * the disk is full, that's an equally valid return code and we can skip to the end, too.
966 */
967 if (err == noErr || err == dskFulErr) {
968 goto Exit;
969 }
970 else {
971 //TODO: only tear down tree if the tree is finished building.
972 //Make sure to handle the ENOSPC condition properly. We shouldn't error out in that case.
973 /* Tear down tree if we encounter an error */
974 if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE) {
975 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
976 DestroyTrees(hfsmp);
977 ResetVCBFreeExtCache(hfsmp);
978 }
979 else {
980 goto Exit;
981 }
982 // fall through to the normal allocation since the rb-tree allocation failed.
983 }
984 }
985 #endif
986
987 /*
988 * Scan the bitmap once, gather the N largest free extents, then
989 * allocate from these largest extents. Repeat as needed until
990 * we get all the space we needed. We could probably build up
991 * that list when the higher level caller tried (and failed) a
992 * contiguous allocation first.
993 *
994 * Note that the free-extent cache will be cease to be updated if
995 * we are using the red-black tree for allocations. If we jettison
996 * the tree, then we will reset the free-extent cache and start over.
997 */
998
999 err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks);
1000 /* dskFulErr out of BlockAllocateKnown indicates an empty Free Extent Cache */
1001
1002 if (err == dskFulErr) {
1003 /*
1004 * Now we have to do a bigger scan. Start at startingBlock and go up until the
1005 * allocation limit.
1006 */
1007 err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit,
1008 maxBlocks, useMetaZone, actualStartBlock,
1009 actualNumBlocks);
1010 }
1011 if (err == dskFulErr) {
1012 /*
1013 * We may be out of space in the normal zone; go up to the starting block from
1014 * the start of the volume.
1015 */
1016 err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks,
1017 useMetaZone, actualStartBlock,
1018 actualNumBlocks);
1019 }
1020 }
1021
1022 Exit:
1023 // if we actually allocated something then go update the
1024 // various bits of state that we maintain regardless of
1025 // whether there was an error (i.e. partial allocations
1026 // still need to update things like the free block count).
1027 //
1028 if (*actualNumBlocks != 0) {
1029 //
1030 // If we used the volume's roving allocation pointer, then we need to update it.
1031 // Adding in the length of the current allocation might reduce the next allocate
1032 // call by avoiding a re-scan of the already allocated space. However, the clump
1033 // just allocated can quite conceivably end up being truncated or released when
1034 // the file is closed or its EOF changed. Leaving the allocation pointer at the
1035 // start of the last allocation will avoid unnecessary fragmentation in this case.
1036 //
1037 HFS_MOUNT_LOCK(vcb, TRUE);
1038
1039 lck_spin_lock(&hfsmp->vcbFreeExtLock);
1040 if (vcb->vcbFreeExtCnt == 0 && vcb->hfs_freed_block_count == 0) {
1041 vcb->sparseAllocation = *actualStartBlock;
1042 }
1043 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
1044 if (*actualNumBlocks < vcb->hfs_freed_block_count) {
1045 vcb->hfs_freed_block_count -= *actualNumBlocks;
1046 } else {
1047 vcb->hfs_freed_block_count = 0;
1048 }
1049
1050 if (updateAllocPtr &&
1051 ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
1052 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
1053 HFS_UPDATE_NEXT_ALLOCATION(vcb, *actualStartBlock);
1054 }
1055
1056 (void) remove_free_extent_cache(hfsmp, *actualStartBlock, *actualNumBlocks);
1057
1058 /*
1059 * Update the number of free blocks on the volume
1060 *
1061 * Skip updating the free blocks count if the block are
1062 * being allocated to relocate data as part of hfs_truncatefs()
1063 */
1064 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1065 vcb->freeBlocks -= *actualNumBlocks;
1066 }
1067 MarkVCBDirty(vcb);
1068 HFS_MOUNT_UNLOCK(vcb, TRUE);
1069
1070 hfs_generate_volume_notifications(VCBTOHFS(vcb));
1071 }
1072
1073 if (ALLOC_DEBUG) {
1074 if (err == noErr) {
1075 if (*actualStartBlock >= hfsmp->totalBlocks) {
1076 panic ("BlockAllocate: vending invalid blocks!");
1077 }
1078 if (*actualStartBlock >= hfsmp->allocLimit) {
1079 panic ("BlockAllocate: vending block past allocLimit!");
1080 }
1081
1082 if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->totalBlocks) {
1083 panic ("BlockAllocate: vending too many invalid blocks!");
1084 }
1085
1086 if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->allocLimit) {
1087 panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
1088 }
1089 }
1090 }
1091
1092 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1093 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
1094
1095 return err;
1096 }
1097
1098
1099 /*
1100 ;________________________________________________________________________________
1101 ;
1102 ; Routine: BlockDeallocate
1103 ;
1104 ; Function: Update the bitmap to deallocate a run of disk allocation blocks
1105 ;
1106 ; Input Arguments:
1107 ; vcb - Pointer to ExtendedVCB for the volume to free space on
1108 ; firstBlock - First allocation block to be freed
1109 ; numBlocks - Number of allocation blocks to free up (must be > 0!)
1110 ;
1111 ; Output:
1112 ; (result) - Result code
1113 ;
1114 ; Side effects:
1115 ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
1116 ; The Allocator's red-black trees may also be modified as a result.
1117 ;________________________________________________________________________________
1118 */
1119
1120 OSErr BlockDeallocate (
1121 ExtendedVCB *vcb, // Which volume to deallocate space on
1122 u_int32_t firstBlock, // First block in range to deallocate
1123 u_int32_t numBlocks, // Number of contiguous blocks to deallocate
1124 u_int32_t flags)
1125 {
1126 OSErr err;
1127 struct hfsmount *hfsmp;
1128 hfsmp = VCBTOHFS(vcb);
1129
1130 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1131 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0);
1132
1133 //
1134 // If no blocks to deallocate, then exit early
1135 //
1136 if (numBlocks == 0) {
1137 err = noErr;
1138 goto Exit;
1139 }
1140
1141
1142 if (ALLOC_DEBUG) {
1143 if (firstBlock >= hfsmp->totalBlocks) {
1144 panic ("BlockDeallocate: freeing invalid blocks!");
1145 }
1146
1147 if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {
1148 panic ("BlockDeallocate: freeing too many invalid blocks!");
1149 }
1150 }
1151
1152
1153
1154
1155 /*
1156 * If we're using the red-black tree code, then try to free the
1157 * blocks by marking them in the red-black tree first. If the tree
1158 * is not active for whatever reason (or we're not using the
1159 * R/B Tree code at all), then go straight for the BlockMarkFree
1160 * function.
1161 *
1162 * Remember that we can get into this function if the tree isn't finished
1163 * building. In that case, check to see if the block we're de-allocating is
1164 * past the high watermark
1165 */
1166 #if CONFIG_HFS_ALLOC_RBTREE
1167 if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
1168 /*
1169 * BlockMarkFreeRBTree deals with the case where we are resizing the
1170 * filesystem (shrinking), and we need to manipulate the bitmap beyond the portion
1171 * that is currenly controlled by the r/b tree.
1172 */
1173
1174 //TODO: Update multiplexing code for the half-finished case.
1175 err = BlockMarkFreeRBTree(vcb, firstBlock, numBlocks);
1176 adjustFreeExtCache = 0;
1177 }
1178 else {
1179 err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
1180 }
1181
1182 #else
1183 err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
1184 #endif
1185 if (err)
1186 goto Exit;
1187
1188 //
1189 // Update the volume's free block count, and mark the VCB as dirty.
1190 //
1191 HFS_MOUNT_LOCK(vcb, TRUE);
1192
1193 /*
1194 * Do not update the free block count. This flags is specified
1195 * when a volume is being truncated.
1196 */
1197 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1198 vcb->freeBlocks += numBlocks;
1199 }
1200
1201 vcb->hfs_freed_block_count += numBlocks;
1202
1203 if (vcb->nextAllocation == (firstBlock + numBlocks)) {
1204 HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
1205 }
1206
1207 if (hfsmp->jnl == NULL) {
1208 /*
1209 * In the journal case, we'll add the free extent once the journal
1210 * calls us back to tell us it wrote the transaction to disk.
1211 */
1212 (void) add_free_extent_cache(vcb, firstBlock, numBlocks);
1213
1214 /*
1215 * If the journal case, we'll only update sparseAllocation once the
1216 * free extent cache becomes empty (when we remove the last entry
1217 * from the cache). Skipping it here means we're less likely to
1218 * find a recently freed extent via the bitmap before it gets added
1219 * to the free extent cache.
1220 */
1221 if (firstBlock < vcb->sparseAllocation) {
1222 vcb->sparseAllocation = firstBlock;
1223 }
1224 }
1225
1226 MarkVCBDirty(vcb);
1227 HFS_MOUNT_UNLOCK(vcb, TRUE);
1228
1229 hfs_generate_volume_notifications(VCBTOHFS(vcb));
1230 Exit:
1231
1232 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1233 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0);
1234
1235 return err;
1236 }
1237
1238
1239 u_int8_t freebitcount[16] = {
1240 4, 3, 3, 2, 3, 2, 2, 1, /* 0 1 2 3 4 5 6 7 */
1241 3, 2, 2, 1, 2, 1, 1, 0, /* 8 9 A B C D E F */
1242 };
1243
1244 u_int32_t
1245 MetaZoneFreeBlocks(ExtendedVCB *vcb)
1246 {
1247 u_int32_t freeblocks;
1248 u_int32_t *currCache;
1249 uintptr_t blockRef;
1250 u_int32_t bit;
1251 u_int32_t lastbit;
1252 int bytesleft;
1253 int bytesperblock;
1254 u_int8_t byte;
1255 u_int8_t *buffer;
1256
1257 blockRef = 0;
1258 bytesleft = freeblocks = 0;
1259 buffer = NULL;
1260 bit = VCBTOHFS(vcb)->hfs_metazone_start;
1261 if (bit == 1)
1262 bit = 0;
1263
1264 lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
1265 bytesperblock = vcb->vcbVBMIOSize;
1266
1267 /*
1268 * Count all the bits from bit to lastbit.
1269 */
1270 while (bit < lastbit) {
1271 /*
1272 * Get next bitmap block.
1273 */
1274 if (bytesleft == 0) {
1275 if (blockRef) {
1276 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1277 blockRef = 0;
1278 }
1279 if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef) != 0) {
1280 return (0);
1281 }
1282 buffer = (u_int8_t *)currCache;
1283 bytesleft = bytesperblock;
1284 }
1285 byte = *buffer++;
1286 freeblocks += freebitcount[byte & 0x0F];
1287 freeblocks += freebitcount[(byte >> 4) & 0x0F];
1288 bit += kBitsPerByte;
1289 --bytesleft;
1290 }
1291 if (blockRef)
1292 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1293
1294 return (freeblocks);
1295 }
1296
1297
1298 /*
1299 * Obtain the next allocation block (bit) that's
1300 * outside the metadata allocation zone.
1301 */
1302 static u_int32_t NextBitmapBlock(
1303 ExtendedVCB *vcb,
1304 u_int32_t bit)
1305 {
1306 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1307
1308 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
1309 return (bit);
1310 /*
1311 * Skip over metadata allocation zone.
1312 */
1313 if ((bit >= hfsmp->hfs_metazone_start) &&
1314 (bit <= hfsmp->hfs_metazone_end)) {
1315 bit = hfsmp->hfs_metazone_end + 1;
1316 }
1317 return (bit);
1318 }
1319
1320
1321 /*
1322 ;_______________________________________________________________________
1323 ;
1324 ; Routine: ReadBitmapBlock
1325 ;
1326 ; Function: Read in a bitmap block corresponding to a given allocation
1327 ; block (bit). Return a pointer to the bitmap block.
1328 ;
1329 ; Inputs:
1330 ; vcb -- Pointer to ExtendedVCB
1331 ; bit -- Allocation block whose bitmap block is desired
1332 ;
1333 ; Outputs:
1334 ; buffer -- Pointer to bitmap block corresonding to "block"
1335 ; blockRef
1336 ;_______________________________________________________________________
1337 */
1338 static OSErr ReadBitmapBlock(
1339 ExtendedVCB *vcb,
1340 u_int32_t bit,
1341 u_int32_t **buffer,
1342 uintptr_t *blockRef)
1343 {
1344 OSErr err;
1345 struct buf *bp = NULL;
1346 struct vnode *vp = NULL;
1347 daddr64_t block;
1348 u_int32_t blockSize;
1349
1350 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1351 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0);
1352
1353 /*
1354 * volume bitmap blocks are protected by the allocation file lock
1355 */
1356 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1357
1358 blockSize = (u_int32_t)vcb->vcbVBMIOSize;
1359 block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
1360
1361 if (vcb->vcbSigWord == kHFSPlusSigWord) {
1362 vp = vcb->hfs_allocation_vp; /* use allocation file vnode */
1363
1364 } else /* hfs */ {
1365 vp = VCBTOHFS(vcb)->hfs_devvp; /* use device I/O vnode */
1366 block += vcb->vcbVBMSt; /* map to physical block */
1367 }
1368
1369 err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);
1370
1371 if (bp) {
1372 if (err) {
1373 buf_brelse(bp);
1374 *blockRef = 0;
1375 *buffer = NULL;
1376 } else {
1377 *blockRef = (uintptr_t)bp;
1378 *buffer = (u_int32_t *)buf_dataptr(bp);
1379 }
1380 }
1381
1382 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1383 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0);
1384
1385 return err;
1386 }
1387
1388
1389 /*
1390 ;_______________________________________________________________________
1391 ;
1392 ; Routine: ReleaseBitmapBlock
1393 ;
1394 ; Function: Relase a bitmap block.
1395 ;
1396 ; Inputs:
1397 ; vcb
1398 ; blockRef
1399 ; dirty
1400 ;_______________________________________________________________________
1401 */
1402 static OSErr ReleaseBitmapBlock(
1403 ExtendedVCB *vcb,
1404 uintptr_t blockRef,
1405 Boolean dirty)
1406 {
1407 struct buf *bp = (struct buf *)blockRef;
1408
1409 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1410 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0);
1411
1412 if (blockRef == 0) {
1413 if (dirty)
1414 panic("hfs: ReleaseBitmapBlock: missing bp");
1415 return (0);
1416 }
1417
1418 if (bp) {
1419 if (dirty) {
1420 // XXXdbg
1421 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1422
1423 if (hfsmp->jnl) {
1424 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
1425 } else {
1426 buf_bdwrite(bp);
1427 }
1428 } else {
1429 buf_brelse(bp);
1430 }
1431 }
1432
1433 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1434 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0);
1435
1436 return (0);
1437 }
1438
1439 /*
1440 * ReleaseScanBitmapBlock is used to release struct bufs that were
1441 * created for use by bitmap scanning code. We want to force
1442 * them to be purged out of the buffer cache ASAP, so we'll release them differently
1443 * than in the ReleaseBitmapBlock case. Alternately, we know that we're only reading
1444 * the blocks, so we will never dirty them as part of the tree building scan.
1445 */
1446
1447 static OSErr ReleaseScanBitmapBlock(struct buf *bp ) {
1448
1449 if (bp == NULL) {
1450 return (0);
1451 }
1452
1453 if (bp) {
1454 /* Mark the buffer invalid if it isn't locked, then release it */
1455 if ((buf_flags(bp) & B_LOCKED) == 0) {
1456 buf_markinvalid(bp);
1457 }
1458 buf_brelse(bp);
1459 }
1460
1461 return (0);
1462
1463
1464 }
1465
1466 /*
1467 _______________________________________________________________________
1468
1469 Routine: BlockAllocateContig
1470
1471 Function: Allocate a contiguous group of allocation blocks. The
1472 allocation is all-or-nothing. The caller guarantees that
1473 there are enough free blocks (though they may not be
1474 contiguous, in which case this call will fail).
1475
1476 Inputs:
1477 vcb Pointer to volume where space is to be allocated
1478 startingBlock Preferred first block for allocation
1479 minBlocks Minimum number of contiguous blocks to allocate
1480 maxBlocks Maximum number of contiguous blocks to allocate
1481 useMetaZone
1482
1483 Outputs:
1484 actualStartBlock First block of range allocated, or 0 if error
1485 actualNumBlocks Number of blocks allocated, or 0 if error
1486 _______________________________________________________________________
1487 */
1488 static OSErr BlockAllocateContig(
1489 ExtendedVCB *vcb,
1490 u_int32_t startingBlock,
1491 u_int32_t minBlocks,
1492 u_int32_t maxBlocks,
1493 Boolean useMetaZone,
1494 u_int32_t *actualStartBlock,
1495 u_int32_t *actualNumBlocks)
1496 {
1497
1498 #if CONFIG_HFS_ALLOC_RBTREE
1499 if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
1500 return BlockAllocateContigRBTree(vcb, startingBlock, minBlocks, maxBlocks, useMetaZone,
1501 actualStartBlock, actualNumBlocks, 1);
1502 }
1503 #endif
1504 return BlockAllocateContigBitmap(vcb, startingBlock, minBlocks,
1505 maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
1506 }
1507
1508 /*
1509 * Variant of BlockAllocateContig that uses the original bitmap-searching logic
1510 */
1511
1512 static OSErr BlockAllocateContigBitmap(
1513 ExtendedVCB *vcb,
1514 u_int32_t startingBlock,
1515 u_int32_t minBlocks,
1516 u_int32_t maxBlocks,
1517 Boolean useMetaZone,
1518 u_int32_t *actualStartBlock,
1519 u_int32_t *actualNumBlocks)
1520 {
1521 OSErr err;
1522
1523 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1524 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
1525
1526 //
1527 // Find a contiguous group of blocks at least minBlocks long.
1528 // Determine the number of contiguous blocks available (up
1529 // to maxBlocks).
1530 //
1531
1532 /*
1533 * NOTE: If the only contiguous free extent of at least minBlocks
1534 * crosses startingBlock (i.e. starts before, ends after), then we
1535 * won't find it. Earlier versions *did* find this case by letting
1536 * the second search look past startingBlock by minBlocks. But
1537 * with the free extent cache, this can lead to duplicate entries
1538 * in the cache, causing the same blocks to be allocated twice.
1539 */
1540 err = BlockFindContiguous(vcb, startingBlock, vcb->allocLimit, minBlocks,
1541 maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
1542 if (err == dskFulErr && startingBlock != 0) {
1543 /*
1544 * Constrain the endingBlock so we don't bother looking for ranges
1545 * that would overlap those found in the previous call.
1546 */
1547 err = BlockFindContiguous(vcb, 1, startingBlock, minBlocks, maxBlocks,
1548 useMetaZone, actualStartBlock, actualNumBlocks);
1549 }
1550 //
1551 // Now mark those blocks allocated.
1552 //
1553 if (err == noErr)
1554 err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
1555
1556 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1557 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
1558
1559 return err;
1560 }
1561
1562 #if CONFIG_HFS_ALLOC_RBTREE
1563 /*
1564 * Variant of BlockAllocateContig that uses the newer red-black tree library
1565 * in order to manage free space extents. This will search the red-black tree
1566 * and return results in the same fashion as BlockAllocateContigBitmap.
1567 *
1568 * Note that this function is invoked from both the red-black tree variant of BlockAllocateany
1569 * as well as BlockAllocateContig. In order to determine when we should vend contiguous chunks over
1570 * locality-based-searches, we use the forceContig argument to determine who called us.
1571 */
1572
1573 static OSErr BlockAllocateContigRBTree(
1574 ExtendedVCB *vcb,
1575 u_int32_t startingBlock,
1576 u_int32_t minBlocks,
1577 u_int32_t maxBlocks,
1578 Boolean useMetaZone,
1579 u_int32_t *actualStartBlock,
1580 u_int32_t *actualNumBlocks,
1581 u_int32_t forceContig)
1582 {
1583 OSErr err;
1584 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1585 extent_node_t search_sentinel;
1586 extent_node_t *node = NULL;
1587 extent_node_t tempnode;
1588
1589 bzero (&tempnode, sizeof(extent_node_t));
1590
1591 /* Begin search at the end of the file, via startingBlock */
1592 memset (&search_sentinel, 0, sizeof(extent_node_t));
1593 search_sentinel.offset = startingBlock;
1594
1595 *actualStartBlock = 0;
1596 *actualNumBlocks = 0;
1597
1598 /*
1599 * Find the first available extent that satifies the allocation by searching
1600 * from the starting point and moving forward
1601 */
1602 node = extent_tree_off_search_next(&hfsmp->offset_tree, &search_sentinel);
1603
1604 if (node) {
1605 *actualStartBlock = node->offset;
1606 *actualNumBlocks = node->length;
1607 }
1608
1609 /* If we managed to grab at least minBlocks of space, then we're done. */
1610
1611 if (*actualNumBlocks >= minBlocks) {
1612 if (*actualNumBlocks > maxBlocks) {
1613 *actualNumBlocks = maxBlocks;
1614 }
1615
1616
1617 /* Check to see if blocks are already marked as in-use */
1618 if (ALLOC_DEBUG) {
1619 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1620 if (hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
1621 printf("bad node: %p, offset %d, length %d\n", node, node->offset,node->length);
1622 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n",
1623 *actualStartBlock, *actualNumBlocks);
1624 }
1625 }
1626
1627 /*
1628 * BlockMarkAllocatedRBTree is responsible for removing the nodes
1629 * from the red-black tree after the bitmap has been updated on-disk.
1630 */
1631 err = BlockMarkAllocatedRBTree(vcb, *actualStartBlock, *actualNumBlocks);
1632 if (err == noErr) {
1633
1634 if ( ALLOC_DEBUG ) {
1635 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1636 if (!hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
1637 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
1638 *actualStartBlock, *actualNumBlocks);
1639 }
1640 check_rbtree_extents (VCBTOHFS(vcb), *actualStartBlock, *actualNumBlocks, ASSERT_ALLOC);
1641 }
1642
1643 return err;
1644 }
1645 }
1646
1647 /*
1648 * We may have failed to grow at the end of the file. We'll try to find
1649 * appropriate free extents, searching by size in the normal allocation zone.
1650 *
1651 * However, if we're allocating on behalf of a sparse device that hasn't explicitly
1652 * requested a contiguous chunk, then we try to search by offset, even if it
1653 * means fragmenting the file. We want all available entries starting
1654 * from the front of the disk to avoid creating new bandfiles. As a result,
1655 * we'll start by searching the offset tree rather than the normal length
1656 * tree. Note that this function can be invoked from BlockAllocateAny, in
1657 * which the minimum block size is 1 block, making it easy to succeed.
1658 */
1659 search_sentinel.offset = hfsmp->hfs_metazone_end;
1660 search_sentinel.length = minBlocks;
1661
1662 if ((vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) && (forceContig == 0)) {
1663 /* just start with the first offset node */
1664 node = extent_tree_off_search_next(&hfsmp->offset_tree, &search_sentinel);
1665 }
1666 else {
1667 /*
1668 * Otherwise, start from the end of the metadata zone or our next allocation pointer,
1669 * and try to find the first chunk of size >= min.
1670 */
1671 node = extent_tree_off_search_nextWithSize (&hfsmp->offset_tree, &search_sentinel);
1672
1673 if (node == NULL) {
1674 extent_node_t *metaend_node;
1675 /*
1676 * Maybe there's a free extent coalesced with the space still in the metadata
1677 * zone. If there is, find it and allocate from the middle of it, starting at
1678 * the end of the metadata zone.
1679 *
1680 * If search_prev yields a result that is not offset == metazone_end, then that
1681 * means no node existed at that offset. If the previous node's offset + length crosses
1682 * the metazone boundary, then allocate from there. If it is too small to
1683 * cross the metazone boundary, then it is of no importance and we'd have to
1684 * report ENOSPC.
1685 */
1686 metaend_node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
1687
1688 if ((metaend_node) && (metaend_node->offset < hfsmp->hfs_metazone_end)) {
1689 u_int32_t node_end = metaend_node->offset + metaend_node->length;
1690 if (node_end > hfsmp->hfs_metazone_end) {
1691 u_int32_t modified_length = node_end - hfsmp->hfs_metazone_end;
1692 if (modified_length >= minBlocks) {
1693 /*
1694 * Then we can allocate it. Fill in the contents into tempnode,
1695 * and BlockMarkAllocatedRBTree below will take care of the rest.
1696 */
1697 tempnode.offset = hfsmp->hfs_metazone_end;
1698 tempnode.length = MIN(minBlocks, node_end - tempnode.offset);
1699 node = &tempnode;
1700 }
1701 }
1702 }
1703 }
1704 }
1705
1706 /* If we can't find anything useful, search the metadata zone as a last resort. */
1707
1708 if ((!node) && useMetaZone) {
1709 search_sentinel.offset = 0;
1710 search_sentinel.length = minBlocks;
1711 node = extent_tree_off_search_nextWithSize (&hfsmp->offset_tree, &search_sentinel);
1712 }
1713
1714 /* If we found something useful, then go ahead and update the bitmap */
1715 if ((node) && (node->length >= minBlocks)) {
1716 *actualStartBlock = node->offset;
1717 if (node->length >= maxBlocks) {
1718 *actualNumBlocks = maxBlocks;
1719 }
1720 else {
1721 *actualNumBlocks = node->length;
1722 }
1723
1724 err = BlockMarkAllocatedRBTree(vcb, *actualStartBlock, *actualNumBlocks);
1725
1726 if (err == noErr) {
1727 if ( ALLOC_DEBUG ) {
1728 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1729 if (!hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
1730 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
1731 *actualStartBlock, *actualNumBlocks);
1732 }
1733 check_rbtree_extents (VCBTOHFS(vcb), *actualStartBlock, *actualNumBlocks, ASSERT_ALLOC);
1734 }
1735 }
1736 }
1737 else {
1738 int destroy_trees = 0;
1739 /*
1740 * TODO: Add High-water mark check here. If we couldn't find anything useful,
1741 * when do we tear down the tree? Or should the logic be in BlockAllocateContig??
1742 */
1743 if (destroy_trees) {
1744 DestroyTrees(VCBTOHFS(vcb));
1745 /* Reset the Free Ext Cache since we'll be using it now. */
1746 ResetVCBFreeExtCache(VCBTOHFS(vcb));
1747 }
1748
1749 if (ALLOC_DEBUG) {
1750 printf("HFS allocator: No space on FS (%s). Node %p Start %d Min %d, Max %d, Tree still alive.\n",
1751 hfsmp->vcbVN, node, startingBlock, minBlocks, maxBlocks);
1752
1753 /* Dump the list ? */
1754 extent_tree_offset_print(&hfsmp->offset_tree);
1755
1756 printf("HFS allocator: Done printing list on FS (%s). Min %d, Max %d, Tree still alive.\n",
1757 hfsmp->vcbVN, minBlocks, maxBlocks);
1758
1759
1760
1761 }
1762 err = dskFulErr;
1763 }
1764
1765 if (err == noErr) {
1766 if (ALLOC_DEBUG) {
1767 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
1768 panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
1769 }
1770 }
1771 else {
1772 *actualStartBlock = 0;
1773 *actualNumBlocks = 0;
1774 }
1775
1776 return err;
1777
1778 }
1779 #endif
1780
1781
1782
1783 /*
1784 _______________________________________________________________________
1785
1786 Routine: BlockAllocateAny
1787
1788 Function: Allocate one or more allocation blocks. If there are fewer
1789 free blocks than requested, all free blocks will be
1790 allocated. The caller guarantees that there is at least
1791 one free block.
1792
1793 Inputs:
1794 vcb Pointer to volume where space is to be allocated
1795 startingBlock Preferred first block for allocation
1796 endingBlock Last block to check + 1
1797 maxBlocks Maximum number of contiguous blocks to allocate
1798 useMetaZone
1799
1800 Outputs:
1801 actualStartBlock First block of range allocated, or 0 if error
1802 actualNumBlocks Number of blocks allocated, or 0 if error
1803 _______________________________________________________________________
1804 */
1805
1806 /*
1807 * BlockAllocateAny acts as a multiplexer between BlockAllocateAnyRBTree
1808 * and BlockAllocateAnyBitmap, which uses the bitmap scanning logic.
1809 */
1810
1811 static OSErr BlockAllocateAny(
1812 ExtendedVCB *vcb,
1813 u_int32_t startingBlock,
1814 register u_int32_t endingBlock,
1815 u_int32_t maxBlocks,
1816 Boolean useMetaZone,
1817 u_int32_t *actualStartBlock,
1818 u_int32_t *actualNumBlocks)
1819 {
1820
1821 #if CONFIG_HFS_ALLOC_RBTREE
1822 if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
1823 return BlockAllocateAnyRBTree(vcb, startingBlock, maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
1824 }
1825 #endif
1826 return BlockAllocateAnyBitmap(vcb, startingBlock, endingBlock, maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
1827
1828 }
1829
1830
1831 #if CONFIG_HFS_ALLOC_RBTREE
1832 /*
1833 * BlockAllocateAnyRBTree finds one or more allocation blocks by using
1834 * the red-black allocation tree to figure out where the free ranges are.
1835 * This function is typically used as a last resort becuase we were unable to
1836 * find the right ranges. Outputs are the same as BlockAllocateAnyBitmap.
1837 */
1838 static OSErr BlockAllocateAnyRBTree(
1839 ExtendedVCB *vcb,
1840 u_int32_t startingBlock,
1841 u_int32_t maxBlocks,
1842 Boolean useMetaZone,
1843 u_int32_t *actualStartBlock,
1844 u_int32_t *actualNumBlocks)
1845 {
1846 OSErr err;
1847
1848 /*
1849 * BlockAllocateContig
1850 */
1851 /* If we're using the red-black tree, try searching at the specified offsets. */
1852 err = BlockAllocateContigRBTree(vcb, startingBlock, 1, maxBlocks, useMetaZone,
1853 actualStartBlock, actualNumBlocks, 0);
1854 return err;
1855
1856 }
1857 #endif
1858
1859 /*
1860 * BlockAllocateAnyBitmap finds free ranges by scanning the bitmap to figure out
1861 * where the free allocation blocks are. Inputs and outputs are the same as for
1862 * BlockAllocateAny and BlockAllocateAnyRBTree
1863 */
1864
1865 static OSErr BlockAllocateAnyBitmap(
1866 ExtendedVCB *vcb,
1867 u_int32_t startingBlock,
1868 register u_int32_t endingBlock,
1869 u_int32_t maxBlocks,
1870 Boolean useMetaZone,
1871 u_int32_t *actualStartBlock,
1872 u_int32_t *actualNumBlocks)
1873 {
1874 OSErr err;
1875 register u_int32_t block; // current block number
1876 register u_int32_t currentWord; // Pointer to current word within bitmap block
1877 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
1878 register u_int32_t wordsLeft; // Number of words left in this bitmap block
1879 u_int32_t *buffer = NULL;
1880 u_int32_t *currCache = NULL;
1881 uintptr_t blockRef;
1882 u_int32_t bitsPerBlock;
1883 u_int32_t wordsPerBlock;
1884 Boolean dirty = false;
1885 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1886
1887 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1888 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);
1889
1890 /*
1891 * When we're skipping the metadata zone and the start/end
1892 * range overlaps with the metadata zone then adjust the
1893 * start to be outside of the metadata zone. If the range
1894 * is entirely inside the metadata zone then we can deny the
1895 * request (dskFulErr).
1896 */
1897 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
1898 if (startingBlock <= vcb->hfs_metazone_end) {
1899 if (endingBlock > (vcb->hfs_metazone_end + 2))
1900 startingBlock = vcb->hfs_metazone_end + 1;
1901 else {
1902 err = dskFulErr;
1903 goto Exit;
1904 }
1905 }
1906 }
1907
1908 // Since this routine doesn't wrap around
1909 if (maxBlocks > (endingBlock - startingBlock)) {
1910 maxBlocks = endingBlock - startingBlock;
1911 }
1912
1913 //
1914 // Pre-read the first bitmap block
1915 //
1916 err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef);
1917 if (err != noErr) goto Exit;
1918 buffer = currCache;
1919
1920 //
1921 // Set up the current position within the block
1922 //
1923 {
1924 u_int32_t wordIndexInBlock;
1925
1926 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
1927 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
1928
1929 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
1930 buffer += wordIndexInBlock;
1931 wordsLeft = wordsPerBlock - wordIndexInBlock;
1932 currentWord = SWAP_BE32 (*buffer);
1933 bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
1934 }
1935
1936 //
1937 // Find the first unallocated block
1938 //
1939 block=startingBlock;
1940 while (block < endingBlock) {
1941 if ((currentWord & bitMask) == 0)
1942 break;
1943
1944 // Next bit
1945 ++block;
1946 bitMask >>= 1;
1947 if (bitMask == 0) {
1948 // Next word
1949 bitMask = kHighBitInWordMask;
1950 ++buffer;
1951
1952 if (--wordsLeft == 0) {
1953 // Next block
1954 buffer = currCache = NULL;
1955 err = ReleaseBitmapBlock(vcb, blockRef, false);
1956 if (err != noErr) goto Exit;
1957
1958 /*
1959 * Skip over metadata blocks.
1960 */
1961 if (!useMetaZone) {
1962 block = NextBitmapBlock(vcb, block);
1963 }
1964 if (block >= endingBlock) {
1965 err = dskFulErr;
1966 goto Exit;
1967 }
1968
1969 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
1970 if (err != noErr) goto Exit;
1971 buffer = currCache;
1972
1973 wordsLeft = wordsPerBlock;
1974 }
1975 currentWord = SWAP_BE32 (*buffer);
1976 }
1977 }
1978
1979 // Did we get to the end of the bitmap before finding a free block?
1980 // If so, then couldn't allocate anything.
1981 if (block >= endingBlock) {
1982 err = dskFulErr;
1983 goto Exit;
1984 }
1985
1986 // Return the first block in the allocated range
1987 *actualStartBlock = block;
1988 dirty = true;
1989
1990 // If we could get the desired number of blocks before hitting endingBlock,
1991 // then adjust endingBlock so we won't keep looking. Ideally, the comparison
1992 // would be (block + maxBlocks) < endingBlock, but that could overflow. The
1993 // comparison below yields identical results, but without overflow.
1994 if (block < (endingBlock-maxBlocks)) {
1995 endingBlock = block + maxBlocks; // if we get this far, we've found enough
1996 }
1997
1998 // XXXdbg
1999 if (hfsmp->jnl) {
2000 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2001 }
2002
2003 //
2004 // Allocate all of the consecutive blocks
2005 //
2006 while ((currentWord & bitMask) == 0) {
2007 // Allocate this block
2008 currentWord |= bitMask;
2009
2010 // Move to the next block. If no more, then exit.
2011 ++block;
2012 if (block == endingBlock)
2013 break;
2014
2015 // Next bit
2016 bitMask >>= 1;
2017 if (bitMask == 0) {
2018 *buffer = SWAP_BE32 (currentWord); // update value in bitmap
2019
2020 // Next word
2021 bitMask = kHighBitInWordMask;
2022 ++buffer;
2023
2024 if (--wordsLeft == 0) {
2025 // Next block
2026 buffer = currCache = NULL;
2027 err = ReleaseBitmapBlock(vcb, blockRef, true);
2028 if (err != noErr) goto Exit;
2029
2030 /*
2031 * Skip over metadata blocks.
2032 */
2033 if (!useMetaZone) {
2034 u_int32_t nextBlock;
2035
2036 nextBlock = NextBitmapBlock(vcb, block);
2037 if (nextBlock != block) {
2038 goto Exit; /* allocation gap, so stop */
2039 }
2040 }
2041
2042 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
2043 if (err != noErr) goto Exit;
2044 buffer = currCache;
2045
2046 // XXXdbg
2047 if (hfsmp->jnl) {
2048 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2049 }
2050
2051 wordsLeft = wordsPerBlock;
2052 }
2053
2054 currentWord = SWAP_BE32 (*buffer);
2055 }
2056 }
2057 *buffer = SWAP_BE32 (currentWord); // update the last change
2058
2059 Exit:
2060 if (err == noErr) {
2061 *actualNumBlocks = block - *actualStartBlock;
2062
2063 // sanity check
2064 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
2065 panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
2066 }
2067
2068 /*
2069 * Beware!
2070 * Because this function directly manipulates the bitmap to mark the
2071 * blocks it came across as allocated, we must inform the journal (and
2072 * subsequently, the journal's trim list) that we are allocating these
2073 * blocks, just like in BlockMarkAllocatedInternal. hfs_unmap_alloc_extent
2074 * and the functions it calls will serialize behind the journal trim list lock
2075 * to ensure that either the asynchronous flush/TRIM/UNMAP happens prior to
2076 * us manipulating the trim list, or we get there first and successfully remove
2077 * these bitmap blocks before the TRIM happens.
2078 */
2079 hfs_unmap_alloc_extent (vcb, *actualStartBlock, *actualNumBlocks);
2080 }
2081 else {
2082 *actualStartBlock = 0;
2083 *actualNumBlocks = 0;
2084 }
2085
2086 if (currCache)
2087 (void) ReleaseBitmapBlock(vcb, blockRef, dirty);
2088
2089 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2090 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2091
2092 return err;
2093 }
2094
2095
2096 /*
2097 _______________________________________________________________________
2098
2099 Routine: BlockAllocateKnown
2100
2101 Function: Try to allocate space from known free space in the free
2102 extent cache.
2103
2104 Inputs:
2105 vcb Pointer to volume where space is to be allocated
2106 maxBlocks Maximum number of contiguous blocks to allocate
2107
2108 Outputs:
2109 actualStartBlock First block of range allocated, or 0 if error
2110 actualNumBlocks Number of blocks allocated, or 0 if error
2111
2112 Returns:
2113 dskFulErr Free extent cache is empty
2114 _______________________________________________________________________
2115 */
2116
2117 static OSErr BlockAllocateKnown(
2118 ExtendedVCB *vcb,
2119 u_int32_t maxBlocks,
2120 u_int32_t *actualStartBlock,
2121 u_int32_t *actualNumBlocks)
2122 {
2123 OSErr err;
2124 u_int32_t foundBlocks;
2125
2126 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2127 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
2128
2129 HFS_MOUNT_LOCK(vcb, TRUE);
2130 lck_spin_lock(&vcb->vcbFreeExtLock);
2131 if ((hfs_isrbtree_active(vcb) == true) ||
2132 vcb->vcbFreeExtCnt == 0 ||
2133 vcb->vcbFreeExt[0].blockCount == 0) {
2134 lck_spin_unlock(&vcb->vcbFreeExtLock);
2135 HFS_MOUNT_UNLOCK(vcb, TRUE);
2136 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2137 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
2138 return dskFulErr;
2139 }
2140 lck_spin_unlock(&vcb->vcbFreeExtLock);
2141 HFS_MOUNT_UNLOCK(vcb, TRUE);
2142
2143 lck_spin_lock(&vcb->vcbFreeExtLock);
2144
2145 // Just grab up to maxBlocks of the first (largest) free exent.
2146 *actualStartBlock = vcb->vcbFreeExt[0].startBlock;
2147 foundBlocks = vcb->vcbFreeExt[0].blockCount;
2148 if (foundBlocks > maxBlocks)
2149 foundBlocks = maxBlocks;
2150 *actualNumBlocks = foundBlocks;
2151
2152 lck_spin_unlock(&vcb->vcbFreeExtLock);
2153
2154 remove_free_extent_cache(vcb, *actualStartBlock, *actualNumBlocks);
2155
2156 // sanity check
2157 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
2158 {
2159 printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
2160 hfs_mark_volume_inconsistent(vcb);
2161 *actualStartBlock = 0;
2162 *actualNumBlocks = 0;
2163 err = EIO;
2164 }
2165 else
2166 {
2167 //
2168 // Now mark the found extent in the bitmap
2169 //
2170 err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
2171 }
2172
2173 sanity_check_free_ext(vcb, 0);
2174
2175 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2176 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2177
2178 return err;
2179 }
2180
2181 /*
2182 * BlockMarkAllocated
2183 *
2184 * This is a wrapper function around the internal calls which will actually mark the blocks
2185 * as in-use. It will mark the blocks in the red-black tree if appropriate. We need to do
2186 * this logic here to avoid callers having to deal with whether or not the red-black tree
2187 * is enabled.
2188 */
2189
2190
2191 OSErr BlockMarkAllocated(
2192 ExtendedVCB *vcb,
2193 u_int32_t startingBlock,
2194 register u_int32_t numBlocks)
2195 {
2196 struct hfsmount *hfsmp;
2197
2198 hfsmp = VCBTOHFS(vcb);
2199 #if CONFIG_HFS_ALLOC_RBTREE
2200 if (hfs_isrbtree_active(hfsmp)) {
2201 int err;
2202
2203 if ((startingBlock >= hfsmp->offset_block_end) &&
2204 (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
2205 /*
2206 * We're manipulating a portion of the bitmap that is not controlled by the
2207 * red-black tree. Just update the bitmap and don't bother manipulating the tree
2208 */
2209 goto justbitmap;
2210 }
2211
2212 err = BlockMarkAllocatedRBTree(vcb, startingBlock, numBlocks);
2213 if (err == noErr) {
2214 if ( ALLOC_DEBUG ) {
2215 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
2216 if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2217 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
2218 startingBlock, numBlocks);
2219 }
2220 check_rbtree_extents (hfsmp, startingBlock, numBlocks, ASSERT_ALLOC);
2221 }
2222 }
2223 return err;
2224
2225 }
2226 justbitmap:
2227 #endif
2228
2229 return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks);
2230
2231 }
2232
2233
2234
2235 /*
2236 _______________________________________________________________________
2237
2238 Routine: BlockMarkAllocatedInternal
2239
2240 Function: Mark a contiguous group of blocks as allocated (set in the
2241 bitmap). It assumes those bits are currently marked
2242 deallocated (clear in the bitmap). Note that this function
2243 must be called regardless of whether or not the bitmap or
2244 tree-based allocator is used, as all allocations must correctly
2245 be marked on-disk. If the tree-based approach is running, then
2246 this will be done before the node is removed from the tree.
2247
2248 Inputs:
2249 vcb Pointer to volume where space is to be allocated
2250 startingBlock First block number to mark as allocated
2251 numBlocks Number of blocks to mark as allocated
2252 _______________________________________________________________________
2253 */
2254 static
2255 OSErr BlockMarkAllocatedInternal (
2256 ExtendedVCB *vcb,
2257 u_int32_t startingBlock,
2258 register u_int32_t numBlocks)
2259 {
2260 OSErr err;
2261 register u_int32_t *currentWord; // Pointer to current word within bitmap block
2262 register u_int32_t wordsLeft; // Number of words left in this bitmap block
2263 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
2264 u_int32_t firstBit; // Bit index within word of first bit to allocate
2265 u_int32_t numBits; // Number of bits in word to allocate
2266 u_int32_t *buffer = NULL;
2267 uintptr_t blockRef;
2268 u_int32_t bitsPerBlock;
2269 u_int32_t wordsPerBlock;
2270 // XXXdbg
2271 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2272
2273 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2274 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
2275
2276 hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
2277
2278 //
2279 // Pre-read the bitmap block containing the first word of allocation
2280 //
2281
2282 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2283 if (err != noErr) goto Exit;
2284 //
2285 // Initialize currentWord, and wordsLeft.
2286 //
2287 {
2288 u_int32_t wordIndexInBlock;
2289
2290 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
2291 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2292
2293 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2294 currentWord = buffer + wordIndexInBlock;
2295 wordsLeft = wordsPerBlock - wordIndexInBlock;
2296 }
2297
2298 // XXXdbg
2299 if (hfsmp->jnl) {
2300 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2301 }
2302
2303 //
2304 // If the first block to allocate doesn't start on a word
2305 // boundary in the bitmap, then treat that first word
2306 // specially.
2307 //
2308
2309 firstBit = startingBlock % kBitsPerWord;
2310 if (firstBit != 0) {
2311 bitMask = kAllBitsSetInWord >> firstBit; // turn off all bits before firstBit
2312 numBits = kBitsPerWord - firstBit; // number of remaining bits in this word
2313 if (numBits > numBlocks) {
2314 numBits = numBlocks; // entire allocation is inside this one word
2315 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); // turn off bits after last
2316 }
2317 #if DEBUG_BUILD
2318 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2319 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2320 }
2321 #endif
2322 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
2323 numBlocks -= numBits; // adjust number of blocks left to allocate
2324
2325 ++currentWord; // move to next word
2326 --wordsLeft; // one less word left in this block
2327 }
2328
2329 //
2330 // Allocate whole words (32 blocks) at a time.
2331 //
2332
2333 bitMask = kAllBitsSetInWord; // put this in a register for 68K
2334 while (numBlocks >= kBitsPerWord) {
2335 if (wordsLeft == 0) {
2336 // Read in the next bitmap block
2337 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
2338
2339 buffer = NULL;
2340 err = ReleaseBitmapBlock(vcb, blockRef, true);
2341 if (err != noErr) goto Exit;
2342
2343 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2344 if (err != noErr) goto Exit;
2345
2346 // XXXdbg
2347 if (hfsmp->jnl) {
2348 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2349 }
2350
2351 // Readjust currentWord and wordsLeft
2352 currentWord = buffer;
2353 wordsLeft = wordsPerBlock;
2354 }
2355 #if DEBUG_BUILD
2356 if (*currentWord != 0) {
2357 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2358 }
2359 #endif
2360 *currentWord = SWAP_BE32 (bitMask);
2361 numBlocks -= kBitsPerWord;
2362
2363 ++currentWord; // move to next word
2364 --wordsLeft; // one less word left in this block
2365 }
2366
2367 //
2368 // Allocate any remaining blocks.
2369 //
2370
2371 if (numBlocks != 0) {
2372 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
2373 if (wordsLeft == 0) {
2374 // Read in the next bitmap block
2375 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
2376
2377 buffer = NULL;
2378 err = ReleaseBitmapBlock(vcb, blockRef, true);
2379 if (err != noErr) goto Exit;
2380
2381 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2382 if (err != noErr) goto Exit;
2383
2384 // XXXdbg
2385 if (hfsmp->jnl) {
2386 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2387 }
2388
2389 // Readjust currentWord and wordsLeft
2390 currentWord = buffer;
2391 wordsLeft = wordsPerBlock;
2392 }
2393 #if DEBUG_BUILD
2394 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2395 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2396 }
2397 #endif
2398 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
2399
2400 // No need to update currentWord or wordsLeft
2401 }
2402
2403 Exit:
2404
2405 if (buffer)
2406 (void)ReleaseBitmapBlock(vcb, blockRef, true);
2407
2408 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2409 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
2410
2411 return err;
2412 }
2413
2414 #if CONFIG_HFS_ALLOC_RBTREE
2415 /*
2416 * This is a wrapper function around BlockMarkAllocated. This function is
2417 * called when the RB Tree-based allocator needs to mark a block as in-use.
2418 * This function should take the locks that would not normally be
2419 * necessary for the normal bitmap allocator, and then call the function. Once
2420 * the on-disk data structures are updated properly, then this will remove the
2421 * appropriate node from the tree.
2422 */
2423
2424 static OSErr BlockMarkAllocatedRBTree(
2425 ExtendedVCB *vcb,
2426 u_int32_t startingBlock,
2427 u_int32_t numBlocks)
2428 {
2429 OSErr err;
2430 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2431 int rb_err = 0;
2432
2433
2434 if (ALLOC_DEBUG) {
2435 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
2436 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2437 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n",
2438 startingBlock, numBlocks);
2439 }
2440 check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_FREE);
2441 }
2442
2443 err = BlockMarkAllocatedInternal (vcb, startingBlock, numBlocks);
2444
2445 if (err == noErr) {
2446
2447 if (ALLOC_DEBUG) {
2448 if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2449 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet!\n",
2450 startingBlock, numBlocks);
2451 }
2452 }
2453
2454 /*
2455 * Mark the blocks in the offset tree.
2456 */
2457 rb_err = extent_tree_offset_alloc_space(&hfsmp->offset_tree, numBlocks, startingBlock);
2458 if (rb_err) {
2459 if (ALLOC_DEBUG) {
2460 printf("HFS RBTree Allocator: Could not mark blocks as in-use! %d \n", rb_err);
2461 }
2462
2463 /*
2464 * We may be called from the BlockMarkAllocated interface, in which case, they would
2465 * not be picking extents from their start. Do a check here, find if the specified
2466 * extent is free, and if it is, then find the containing node.
2467 */
2468 extent_node_t *node = NULL;
2469 extent_node_t search_sentinel;
2470 search_sentinel.offset = startingBlock;
2471
2472 node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
2473
2474 if (node) {
2475 rb_err = extent_tree_offset_alloc_unaligned (&hfsmp->offset_tree, numBlocks, startingBlock);
2476 }
2477
2478 if (ALLOC_DEBUG) {
2479 if (rb_err) {
2480 printf ("HFS RBTree Allocator: Still Couldn't mark blocks as in-use! %d\n", rb_err);
2481 }
2482 }
2483 }
2484 if (ALLOC_DEBUG) {
2485 check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_ALLOC);
2486 }
2487 }
2488
2489 /*
2490 * If we encountered a red-black tree error, for now, we immediately back off and force
2491 * destruction of rb-tree. Set the persistent error-detected bit in the mount point.
2492 * That will ensure that even if we reach a low-water-mark in the future we will still
2493 * not allow the rb-tree to be used. On next mount, we will force a re-construction from
2494 * on-disk state. As a fallback, we will now resort to the bitmap-scanning behavior.
2495 */
2496 if (rb_err) {
2497 /* Mark RB-Trees with error */
2498 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
2499 DestroyTrees(hfsmp);
2500 /* Reset the Free Ext Cache since we'll be using it now. */
2501 ResetVCBFreeExtCache(hfsmp);
2502 printf("HFS: Red-Black Allocator Tree BlockMarkAllocated error\n");
2503 }
2504
2505 return err;
2506 }
2507 #endif
2508
2509
2510
2511 /*
2512 * BlockMarkFree
2513 *
2514 * This is a wrapper function around the internal calls which will actually mark the blocks
2515 * as freed. It will mark the blocks in the red-black tree if appropriate. We need to do
2516 * this logic here to avoid callers having to deal with whether or not the red-black tree
2517 * is enabled.
2518 *
2519 */
2520 OSErr BlockMarkFree(
2521 ExtendedVCB *vcb,
2522 u_int32_t startingBlock,
2523 register u_int32_t numBlocks)
2524 {
2525 struct hfsmount *hfsmp;
2526 hfsmp = VCBTOHFS(vcb);
2527 #if CONFIG_HFS_ALLOC_RBTREE
2528 if (hfs_isrbtree_active(hfsmp)) {
2529 int err;
2530
2531 if ((startingBlock >= hfsmp->offset_block_end) &&
2532 (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
2533 /*
2534 * We're manipulating a portion of the bitmap that is not controlled by the
2535 * red-black tree. Just update the bitmap and don't bother manipulating the tree
2536 */
2537 goto justbitmap;
2538 }
2539
2540 err = BlockMarkFreeRBTree(vcb, startingBlock, numBlocks);
2541 if (err == noErr) {
2542 if ( ALLOC_DEBUG ) {
2543 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
2544 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2545 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use!\n",
2546 startingBlock, numBlocks);
2547 }
2548 check_rbtree_extents (hfsmp, startingBlock, numBlocks, ASSERT_FREE);
2549 }
2550 }
2551 return err;
2552 }
2553 justbitmap:
2554 #endif
2555 return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
2556
2557 }
2558
2559
2560 /*
2561 * BlockMarkFreeUnused
2562 *
2563 * Scan the bitmap block beyond end of current file system for bits
2564 * that are marked as used. If any of the bits are marked as used,
2565 * this function marks them free.
2566 *
2567 * Note: This was specifically written to mark all bits beyond
2568 * end of current file system during hfs_extendfs(), which makes
2569 * sure that all the new blocks added to the file system are
2570 * marked as free. We expect that all the blocks beyond end of
2571 * current file system are always marked as free, but there might
2572 * be cases where are marked as used. This function assumes that
2573 * the number of blocks marked as used incorrectly are relatively
2574 * small, otherwise this can overflow journal transaction size
2575 * on certain file system configurations (example, large unused
2576 * bitmap with relatively small journal).
2577 *
2578 * Input:
2579 * startingBlock: First block of the range to mark unused
2580 * numBlocks: Number of blocks in the range to mark unused
2581 *
2582 * Returns: zero on success, non-zero on error.
2583 */
2584 OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t numBlocks)
2585 {
2586 int error = 0;
2587 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2588 u_int32_t curNumBlocks;
2589 u_int32_t bitsPerBlock;
2590 u_int32_t lastBit;
2591
2592 /* Use the optimal bitmap I/O size instead of bitmap block size */
2593 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
2594
2595 /*
2596 * First clear any non bitmap allocation block aligned bits
2597 *
2598 * Calculate the first bit in the bitmap block next to
2599 * the bitmap block containing the bit for startingBlock.
2600 * Using this value, we calculate the total number of
2601 * bits to be marked unused from startingBlock to the
2602 * end of bitmap block containing startingBlock.
2603 */
2604 lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
2605 curNumBlocks = lastBit - startingBlock;
2606 if (curNumBlocks > numBlocks) {
2607 curNumBlocks = numBlocks;
2608 }
2609 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
2610 if (error) {
2611 return error;
2612 }
2613 startingBlock += curNumBlocks;
2614 numBlocks -= curNumBlocks;
2615
2616 /*
2617 * Check a full bitmap block for any 'used' bit. If any bit is used,
2618 * mark all the bits only in that bitmap block as free. This ensures
2619 * that we do not write unmodified bitmap blocks and do not
2620 * overwhelm the journal.
2621 *
2622 * The code starts by checking full bitmap block at a time, and
2623 * marks entire bitmap block as free only if any bit in that bitmap
2624 * block is marked as used. In the end, it handles the last bitmap
2625 * block which might be partially full by only checking till the
2626 * caller-specified last bit and if any bit is set, only mark that
2627 * range as free.
2628 */
2629 while (numBlocks) {
2630 if (numBlocks >= bitsPerBlock) {
2631 curNumBlocks = bitsPerBlock;
2632 } else {
2633 curNumBlocks = numBlocks;
2634 }
2635 if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
2636 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
2637 if (error) {
2638 return error;
2639 }
2640 }
2641 startingBlock += curNumBlocks;
2642 numBlocks -= curNumBlocks;
2643 }
2644
2645 return error;
2646 }
2647
2648 /*
2649 _______________________________________________________________________
2650
2651 Routine: BlockMarkFreeInternal
2652
2653 Function: Mark a contiguous group of blocks as free (clear in the
2654 bitmap). It assumes those bits are currently marked
2655 allocated (set in the bitmap).
2656
2657 Inputs:
2658 vcb Pointer to volume where space is to be freed
2659 startingBlock First block number to mark as freed
2660 numBlocks Number of blocks to mark as freed
2661 do_validate If true, validate that the blocks being
2662 deallocated to check if they are within totalBlocks
2663 for current volume and whether they were allocated
2664 before they are marked free.
2665 _______________________________________________________________________
2666 */
2667 static
2668 OSErr BlockMarkFreeInternal(
2669 ExtendedVCB *vcb,
2670 u_int32_t startingBlock_in,
2671 register u_int32_t numBlocks_in,
2672 Boolean do_validate)
2673 {
2674 OSErr err;
2675 u_int32_t startingBlock = startingBlock_in;
2676 u_int32_t numBlocks = numBlocks_in;
2677 uint32_t unmapStart = startingBlock_in;
2678 uint32_t unmapCount = numBlocks_in;
2679 uint32_t wordIndexInBlock;
2680 u_int32_t *currentWord; // Pointer to current word within bitmap block
2681 u_int32_t wordsLeft; // Number of words left in this bitmap block
2682 u_int32_t bitMask; // Word with given bits already set (ready to OR in)
2683 u_int32_t currentBit; // Bit index within word of current bit to allocate
2684 u_int32_t numBits; // Number of bits in word to allocate
2685 u_int32_t *buffer = NULL;
2686 uintptr_t blockRef;
2687 u_int32_t bitsPerBlock;
2688 u_int32_t wordsPerBlock;
2689 // XXXdbg
2690 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2691
2692 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2693 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);
2694
2695 /*
2696 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
2697 * need to be able to free blocks being relocated during hfs_truncatefs.
2698 */
2699 if ((do_validate == true) &&
2700 (startingBlock + numBlocks > vcb->totalBlocks)) {
2701 if (ALLOC_DEBUG) {
2702 panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
2703 }
2704
2705 printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
2706 hfs_mark_volume_inconsistent(vcb);
2707 err = EIO;
2708 goto Exit;
2709 }
2710
2711 //
2712 // Pre-read the bitmap block containing the first word of allocation
2713 //
2714
2715 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2716 if (err != noErr) goto Exit;
2717 // XXXdbg
2718 if (hfsmp->jnl) {
2719 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2720 }
2721
2722 //
2723 // Figure out how many bits and words per bitmap block.
2724 //
2725 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
2726 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2727 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2728
2729 //
2730 // Look for a range of free blocks immediately before startingBlock
2731 // (up to the start of the current bitmap block). Set unmapStart to
2732 // the first free block.
2733 //
2734 currentWord = buffer + wordIndexInBlock;
2735 currentBit = startingBlock % kBitsPerWord;
2736 bitMask = kHighBitInWordMask >> currentBit;
2737 while (true) {
2738 // Move currentWord/bitMask back by one bit
2739 bitMask <<= 1;
2740 if (bitMask == 0) {
2741 if (--currentWord < buffer)
2742 break;
2743 bitMask = kLowBitInWordMask;
2744 }
2745
2746 if (*currentWord & SWAP_BE32(bitMask))
2747 break; // Found an allocated block. Stop searching.
2748 --unmapStart;
2749 ++unmapCount;
2750 }
2751
2752 //
2753 // If the first block to free doesn't start on a word
2754 // boundary in the bitmap, then treat that first word
2755 // specially.
2756 //
2757
2758 currentWord = buffer + wordIndexInBlock;
2759 wordsLeft = wordsPerBlock - wordIndexInBlock;
2760 currentBit = startingBlock % kBitsPerWord;
2761 if (currentBit != 0) {
2762 bitMask = kAllBitsSetInWord >> currentBit; // turn off all bits before currentBit
2763 numBits = kBitsPerWord - currentBit; // number of remaining bits in this word
2764 if (numBits > numBlocks) {
2765 numBits = numBlocks; // entire allocation is inside this one word
2766 bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits)); // turn off bits after last
2767 }
2768 if ((do_validate == true) &&
2769 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
2770 goto Corruption;
2771 }
2772 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
2773 numBlocks -= numBits; // adjust number of blocks left to free
2774
2775 ++currentWord; // move to next word
2776 --wordsLeft; // one less word left in this block
2777 }
2778
2779 //
2780 // Free whole words (32 blocks) at a time.
2781 //
2782
2783 while (numBlocks >= kBitsPerWord) {
2784 if (wordsLeft == 0) {
2785 // Read in the next bitmap block
2786 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
2787
2788 buffer = NULL;
2789 err = ReleaseBitmapBlock(vcb, blockRef, true);
2790 if (err != noErr) goto Exit;
2791
2792 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2793 if (err != noErr) goto Exit;
2794
2795 // XXXdbg
2796 if (hfsmp->jnl) {
2797 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2798 }
2799
2800 // Readjust currentWord and wordsLeft
2801 currentWord = buffer;
2802 wordsLeft = wordsPerBlock;
2803 }
2804 if ((do_validate == true) &&
2805 (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
2806 goto Corruption;
2807 }
2808 *currentWord = 0; // clear the entire word
2809 numBlocks -= kBitsPerWord;
2810
2811 ++currentWord; // move to next word
2812 --wordsLeft; // one less word left in this block
2813 }
2814
2815 //
2816 // Free any remaining blocks.
2817 //
2818
2819 if (numBlocks != 0) {
2820 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
2821 if (wordsLeft == 0) {
2822 // Read in the next bitmap block
2823 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
2824
2825 buffer = NULL;
2826 err = ReleaseBitmapBlock(vcb, blockRef, true);
2827 if (err != noErr) goto Exit;
2828
2829 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2830 if (err != noErr) goto Exit;
2831
2832 // XXXdbg
2833 if (hfsmp->jnl) {
2834 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2835 }
2836
2837 // Readjust currentWord and wordsLeft
2838 currentWord = buffer;
2839 wordsLeft = wordsPerBlock;
2840 }
2841 if ((do_validate == true) &&
2842 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
2843 goto Corruption;
2844 }
2845 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
2846
2847 // No need to update currentWord or wordsLeft
2848 }
2849
2850 //
2851 // Look for a range of free blocks immediately after the range we just freed
2852 // (up to the end of the current bitmap block).
2853 //
2854 wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord;
2855 wordsLeft = wordsPerBlock - wordIndexInBlock;
2856 currentWord = buffer + wordIndexInBlock;
2857 currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord;
2858 bitMask = kHighBitInWordMask >> currentBit;
2859 while (true) {
2860 // Move currentWord/bitMask/wordsLeft forward one bit
2861 bitMask >>= 1;
2862 if (bitMask == 0) {
2863 if (--wordsLeft == 0)
2864 break;
2865 ++currentWord;
2866 bitMask = kHighBitInWordMask;
2867 }
2868
2869 if (*currentWord & SWAP_BE32(bitMask))
2870 break; // Found an allocated block. Stop searching.
2871 ++unmapCount;
2872 }
2873
2874 Exit:
2875
2876 if (buffer)
2877 (void)ReleaseBitmapBlock(vcb, blockRef, true);
2878
2879 if (err == noErr) {
2880 hfs_unmap_free_extent(vcb, unmapStart, unmapCount);
2881 }
2882
2883 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2884 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
2885
2886 return err;
2887
2888 Corruption:
2889 #if DEBUG_BUILD
2890 panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
2891 #else
2892 printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s\n", vcb->vcbVN);
2893 hfs_mark_volume_inconsistent(vcb);
2894 err = EIO;
2895 goto Exit;
2896 #endif
2897 }
2898
2899
2900 #if CONFIG_HFS_ALLOC_RBTREE
2901 /*
2902 * This is a wrapper function around BlockMarkFree. This function is
2903 * called when the RB Tree-based allocator needs to mark a block as no longer
2904 * in use. This function should take the locks that would not normally be
2905 * necessary for the normal bitmap deallocator, and then call the function. Once
2906 * the on-disk data structures are updated properly, then this will update an
2907 * existing rb-tree node if possible, or else create a new one.
2908 */
2909
2910 OSErr BlockMarkFreeRBTree(
2911 ExtendedVCB *vcb,
2912 u_int32_t startingBlock,
2913 register u_int32_t numBlocks)
2914 {
2915 OSErr err;
2916 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2917 int rb_err = 0;
2918
2919 if (ALLOC_DEBUG) {
2920 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
2921 if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2922 panic ("HFS RBTree Allocator: Trying to free blocks starting @ %x for %x but blocks not in use! \n",
2923 startingBlock, numBlocks);
2924 }
2925 check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_ALLOC);
2926 }
2927
2928 err = BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
2929
2930 if (err == noErr) {
2931
2932 /*
2933 * During a filesystem truncation, we may need to relocate files out of the
2934 * portion of the bitmap that is no longer controlled by the r/b tree.
2935 * In this case, just update the bitmap and do not attempt to manipulate the tree.
2936 */
2937 if ((startingBlock >= hfsmp->offset_block_end) &&
2938 (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
2939 goto free_error;
2940 }
2941
2942 extent_node_t *newnode;
2943
2944 if (ALLOC_DEBUG) {
2945 /*
2946 * Validate that the blocks in question are not allocated in the bitmap, and that they're
2947 * not in the offset tree, since it should be tracking free extents, rather than allocated
2948 * extents
2949 */
2950 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2951 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks still marked in-use!\n",
2952 startingBlock, numBlocks);
2953 }
2954 }
2955
2956 if ((hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE) == 0) {
2957 if (startingBlock >= hfsmp->offset_block_end) {
2958 /*
2959 * If the tree generation code has not yet finished scanning the
2960 * bitmap region containing this extent, do nothing. If the start
2961 * of the range to be deallocated is greater than the current high
2962 * watermark on the offset tree, just bail out and let the scanner catch up with us.
2963 */
2964 rb_err = 0;
2965 goto free_error;
2966 }
2967 }
2968
2969 newnode = extent_tree_free_space(&hfsmp->offset_tree, numBlocks, startingBlock);
2970 if (newnode == NULL) {
2971 rb_err = 1;
2972 goto free_error;
2973 }
2974
2975 if (ALLOC_DEBUG) {
2976 check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_FREE);
2977 }
2978
2979 }
2980
2981 free_error:
2982 /*
2983 * We follow the same principle as in BlockMarkAllocatedRB.
2984 * If we encounter an error in adding the extents to the rb-tree, then immediately
2985 * back off, destroy the trees, and persistently set a bit in the runtime hfsmp flags
2986 * to indicate we should not use the rb-tree until next mount, when we can force a rebuild.
2987 */
2988 if (rb_err) {
2989 /* Mark RB-Trees with error */
2990 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
2991 DestroyTrees(hfsmp);
2992 /* Reset the Free Ext Cache since we'll be using it now. */
2993 ResetVCBFreeExtCache(hfsmp);
2994 printf("HFS: Red-Black Allocator Tree BlockMarkFree error\n");
2995 }
2996
2997
2998 return err;
2999
3000 }
3001 #endif
3002
3003 /*
3004 _______________________________________________________________________
3005
3006 Routine: BlockFindContiguous
3007
3008 Function: Find a contiguous range of blocks that are free (bits
3009 clear in the bitmap). If a contiguous range of the
3010 minimum size can't be found, an error will be returned.
3011 This is only needed to support the bitmap-scanning logic,
3012 as the red-black tree should be able to do this by internally
3013 searching its tree.
3014
3015 Inputs:
3016 vcb Pointer to volume where space is to be allocated
3017 startingBlock Preferred first block of range
3018 endingBlock Last possible block in range + 1
3019 minBlocks Minimum number of blocks needed. Must be > 0.
3020 maxBlocks Maximum (ideal) number of blocks desired
3021 useMetaZone OK to dip into metadata allocation zone
3022
3023 Outputs:
3024 actualStartBlock First block of range found, or 0 if error
3025 actualNumBlocks Number of blocks found, or 0 if error
3026
3027 Returns:
3028 noErr Found at least minBlocks contiguous
3029 dskFulErr No contiguous space found, or all less than minBlocks
3030 _______________________________________________________________________
3031 */
3032
3033 static OSErr BlockFindContiguous(
3034 ExtendedVCB *vcb,
3035 u_int32_t startingBlock,
3036 u_int32_t endingBlock,
3037 u_int32_t minBlocks,
3038 u_int32_t maxBlocks,
3039 Boolean useMetaZone,
3040 u_int32_t *actualStartBlock,
3041 u_int32_t *actualNumBlocks)
3042 {
3043 OSErr err;
3044 register u_int32_t currentBlock; // Block we're currently looking at.
3045 u_int32_t firstBlock; // First free block in current extent.
3046 u_int32_t stopBlock; // If we get to this block, stop searching for first free block.
3047 u_int32_t foundBlocks; // Number of contiguous free blocks in current extent.
3048 u_int32_t *buffer = NULL;
3049 register u_int32_t *currentWord;
3050 register u_int32_t bitMask;
3051 register u_int32_t wordsLeft;
3052 register u_int32_t tempWord;
3053 uintptr_t blockRef;
3054 u_int32_t wordsPerBlock;
3055 u_int32_t updated_free_extent = 0;
3056
3057 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
3058 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);
3059
3060 /*
3061 * When we're skipping the metadata zone and the start/end
3062 * range overlaps with the metadata zone then adjust the
3063 * start to be outside of the metadata zone. If the range
3064 * is entirely inside the metadata zone then we can deny the
3065 * request (dskFulErr).
3066 */
3067 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
3068 if (startingBlock <= vcb->hfs_metazone_end) {
3069 if (endingBlock > (vcb->hfs_metazone_end + 2))
3070 startingBlock = vcb->hfs_metazone_end + 1;
3071 else
3072 goto DiskFull;
3073 }
3074 }
3075
3076 if ((endingBlock - startingBlock) < minBlocks)
3077 {
3078 // The set of blocks we're checking is smaller than the minimum number
3079 // of blocks, so we couldn't possibly find a good range.
3080 goto DiskFull;
3081 }
3082
3083 stopBlock = endingBlock - minBlocks + 1;
3084 currentBlock = startingBlock;
3085 firstBlock = 0;
3086
3087 /*
3088 * Skip over metadata blocks.
3089 */
3090 if (!useMetaZone)
3091 currentBlock = NextBitmapBlock(vcb, currentBlock);
3092
3093 //
3094 // Pre-read the first bitmap block.
3095 //
3096 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
3097 if ( err != noErr ) goto ErrorExit;
3098
3099 //
3100 // Figure out where currentBlock is within the buffer.
3101 //
3102 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
3103
3104 wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1); // Current index into buffer
3105 currentWord = buffer + wordsLeft;
3106 wordsLeft = wordsPerBlock - wordsLeft;
3107
3108 do
3109 {
3110 foundBlocks = 0;
3111
3112 //============================================================
3113 // Look for a free block, skipping over allocated blocks.
3114 //============================================================
3115
3116 //
3117 // Check an initial partial word (if any)
3118 //
3119 bitMask = currentBlock & kBitsWithinWordMask;
3120 if (bitMask)
3121 {
3122 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
3123 bitMask = kHighBitInWordMask >> bitMask;
3124 while (tempWord & bitMask)
3125 {
3126 bitMask >>= 1;
3127 ++currentBlock;
3128 }
3129
3130 // Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)?
3131 if (bitMask)
3132 goto FoundUnused;
3133
3134 // Didn't find any unused bits, so we're done with this word.
3135 ++currentWord;
3136 --wordsLeft;
3137 }
3138
3139 //
3140 // Check whole words
3141 //
3142 while (currentBlock < stopBlock)
3143 {
3144 // See if it's time to read another block.
3145 if (wordsLeft == 0)
3146 {
3147 buffer = NULL;
3148 err = ReleaseBitmapBlock(vcb, blockRef, false);
3149 if (err != noErr) goto ErrorExit;
3150
3151 /*
3152 * Skip over metadata blocks.
3153 */
3154 if (!useMetaZone) {
3155 currentBlock = NextBitmapBlock(vcb, currentBlock);
3156 if (currentBlock >= stopBlock) {
3157 goto LoopExit;
3158 }
3159 }
3160
3161 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
3162 if ( err != noErr ) goto ErrorExit;
3163
3164 currentWord = buffer;
3165 wordsLeft = wordsPerBlock;
3166 }
3167
3168 // See if any of the bits are clear
3169 if ((tempWord = SWAP_BE32(*currentWord)) + 1) // non-zero if any bits were clear
3170 {
3171 // Figure out which bit is clear
3172 bitMask = kHighBitInWordMask;
3173 while (tempWord & bitMask)
3174 {
3175 bitMask >>= 1;
3176 ++currentBlock;
3177 }
3178
3179 break; // Found the free bit; break out to FoundUnused.
3180 }
3181
3182 // Keep looking at the next word
3183 currentBlock += kBitsPerWord;
3184 ++currentWord;
3185 --wordsLeft;
3186 }
3187
3188 FoundUnused:
3189 // Make sure the unused bit is early enough to use
3190 if (currentBlock >= stopBlock)
3191 {
3192 break;
3193 }
3194
3195 // Remember the start of the extent
3196 firstBlock = currentBlock;
3197
3198 //============================================================
3199 // Count the number of contiguous free blocks.
3200 //============================================================
3201
3202 //
3203 // Check an initial partial word (if any)
3204 //
3205 bitMask = currentBlock & kBitsWithinWordMask;
3206 if (bitMask)
3207 {
3208 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
3209 bitMask = kHighBitInWordMask >> bitMask;
3210 while (bitMask && !(tempWord & bitMask))
3211 {
3212 bitMask >>= 1;
3213 ++currentBlock;
3214 }
3215
3216 // Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)?
3217 if (bitMask)
3218 goto FoundUsed;
3219
3220 // Didn't find any used bits, so we're done with this word.
3221 ++currentWord;
3222 --wordsLeft;
3223 }
3224
3225 //
3226 // Check whole words
3227 //
3228 while (currentBlock < endingBlock)
3229 {
3230 // See if it's time to read another block.
3231 if (wordsLeft == 0)
3232 {
3233 buffer = NULL;
3234 err = ReleaseBitmapBlock(vcb, blockRef, false);
3235 if (err != noErr) goto ErrorExit;
3236
3237 /*
3238 * Skip over metadata blocks.
3239 */
3240 if (!useMetaZone) {
3241 u_int32_t nextBlock;
3242
3243 nextBlock = NextBitmapBlock(vcb, currentBlock);
3244 if (nextBlock != currentBlock) {
3245 goto LoopExit; /* allocation gap, so stop */
3246 }
3247 }
3248
3249 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
3250 if ( err != noErr ) goto ErrorExit;
3251
3252 currentWord = buffer;
3253 wordsLeft = wordsPerBlock;
3254 }
3255
3256 // See if any of the bits are set
3257 if ((tempWord = SWAP_BE32(*currentWord)) != 0)
3258 {
3259 // Figure out which bit is set
3260 bitMask = kHighBitInWordMask;
3261 while (!(tempWord & bitMask))
3262 {
3263 bitMask >>= 1;
3264 ++currentBlock;
3265 }
3266
3267 break; // Found the used bit; break out to FoundUsed.
3268 }
3269
3270 // Keep looking at the next word
3271 currentBlock += kBitsPerWord;
3272 ++currentWord;
3273 --wordsLeft;
3274
3275 // If we found at least maxBlocks, we can quit early.
3276 if ((currentBlock - firstBlock) >= maxBlocks)
3277 break;
3278 }
3279
3280 FoundUsed:
3281 // Make sure we didn't run out of bitmap looking for a used block.
3282 // If so, pin to the end of the bitmap.
3283 if (currentBlock > endingBlock)
3284 currentBlock = endingBlock;
3285
3286 // Figure out how many contiguous free blocks there were.
3287 // Pin the answer to maxBlocks.
3288 foundBlocks = currentBlock - firstBlock;
3289 if (foundBlocks > maxBlocks)
3290 foundBlocks = maxBlocks;
3291 if (foundBlocks >= minBlocks)
3292 break; // Found what we needed!
3293
3294 /* We did not find the total blocks were were looking for, but
3295 * lets add this free block run to our free extent cache list
3296 */
3297 updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
3298
3299 } while (currentBlock < stopBlock);
3300 LoopExit:
3301
3302 // Return the outputs.
3303 if (foundBlocks < minBlocks)
3304 {
3305 DiskFull:
3306 err = dskFulErr;
3307 ErrorExit:
3308 *actualStartBlock = 0;
3309 *actualNumBlocks = 0;
3310 }
3311 else
3312 {
3313 err = noErr;
3314 *actualStartBlock = firstBlock;
3315 *actualNumBlocks = foundBlocks;
3316 /*
3317 * Sanity check for overflow
3318 */
3319 if ((firstBlock + foundBlocks) > vcb->allocLimit) {
3320 panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
3321 vcb->vcbVN, startingBlock, endingBlock, currentBlock,
3322 firstBlock, stopBlock, minBlocks, foundBlocks);
3323 }
3324 }
3325
3326 if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
3327 int i;
3328 u_int32_t min_start = vcb->totalBlocks;
3329
3330 // set the nextAllocation pointer to the smallest free block number
3331 // we've seen so on the next mount we won't rescan unnecessarily
3332 lck_spin_lock(&vcb->vcbFreeExtLock);
3333 for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
3334 if (vcb->vcbFreeExt[i].startBlock < min_start) {
3335 min_start = vcb->vcbFreeExt[i].startBlock;
3336 }
3337 }
3338 lck_spin_unlock(&vcb->vcbFreeExtLock);
3339 if (min_start != vcb->totalBlocks) {
3340 if (min_start < vcb->nextAllocation) {
3341 vcb->nextAllocation = min_start;
3342 }
3343 if (min_start < vcb->sparseAllocation) {
3344 vcb->sparseAllocation = min_start;
3345 }
3346 }
3347 }
3348
3349 if (buffer)
3350 (void) ReleaseBitmapBlock(vcb, blockRef, false);
3351
3352 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
3353 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
3354
3355 return err;
3356 }
3357
3358
3359 #if CONFIG_HFS_ALLOC_RBTREE
3360 /*
3361 * Wrapper function around hfs_isrbtree_allocated. This just takes the start offset,
3362 * and the number of blocks, and whether or not we should check if the blocks are
3363 * free or not. This function is designed to be used primarily with the debug #ifdef
3364 * enabled, so it results in a panic if anything unexpected occurs.
3365 *
3366 * shouldBeFree will be nonzero if the caller expects the zone to be free.
3367 */
3368
3369 void check_rbtree_extents (struct hfsmount *hfsmp, u_int32_t startBlocks,
3370 u_int32_t numBlocks, int shouldBeFree) {
3371 int alloc;
3372 extent_node_t *node1 = NULL;
3373 u_int32_t off1 = 0;
3374 u_int32_t len1 = 0;
3375 alloc = hfs_isrbtree_allocated (hfsmp, startBlocks, numBlocks, &node1);
3376
3377 if (node1) {
3378 off1 = node1->offset;
3379 len1 = node1->length;
3380 }
3381
3382 if (shouldBeFree) {
3383 /*
3384 * If the region should be free, then we expect to see extents in the tree
3385 * matching this start and length. Alloc != 0 means some portion of the extent
3386 * specified was allocated.
3387 */
3388 if (alloc != 0){
3389 panic ("HFS check_rbtree_extents: Node (%p) do not exist! "
3390 "node1 off (%d),len(%d),, start(%d) end(%d)\n",
3391 node1, off1, len1, startBlocks, numBlocks);
3392 }
3393 }
3394 else {
3395 /*
3396 * Otherwise, this means that the region should be allocated, and if we find
3397 * an extent matching it, that's bad.
3398 */
3399 if (alloc == 0){
3400 panic ("HFS check_rbtree_extents: Node (%p) exists! "
3401 "node1 off (%d),len(%d), start(%d) end(%d)\n",
3402 node1, off1, len1, startBlocks, numBlocks);
3403 }
3404 }
3405 }
3406 #endif
3407
3408 #if CONFIG_HFS_ALLOC_RBTREE
3409 /*
3410 * Exhaustive validation search. This function iterates over all allocation blocks and
3411 * compares their status in the red-black tree vs. the allocation bitmap. If the two are out of sync
3412 * then it will panic. Bitmap lock must be held while this function is run.
3413 *
3414 * Because this function requires a red-black tree search to validate every allocation block, it is
3415 * very expensive and should ONLY be run in debug mode, and even then, infrequently.
3416 *
3417 * 'end' is non-inclusive, so it should represent the total number of blocks in the volume.
3418 *
3419 */
3420 void
3421 hfs_validate_rbtree (struct hfsmount *hfsmp, u_int32_t start, u_int32_t end){
3422
3423 u_int32_t current;
3424 extent_node_t* node1;
3425
3426 hfs_checktreelinks (hfsmp);
3427
3428 for (current = start; current < end; current++) {
3429 node1 = NULL;
3430 int rbtree = hfs_isrbtree_allocated(hfsmp, current, 1, &node1);
3431 int bitmap = hfs_isallocated(hfsmp, current, 1);
3432
3433 if (bitmap != rbtree){
3434 panic("HFS: Allocator mismatch @ block %d -- bitmap %d : rbtree %d\n",
3435 current, bitmap, rbtree);
3436 }
3437 }
3438 }
3439
3440 /*
3441 * Exhaustive Red-Black Tree Linked List verification routine.
3442 *
3443 * This function iterates through the red-black tree's nodes, and then verifies that the linked list
3444 * embedded within each of the nodes accurately points to the correct node as its "next" pointer.
3445 * The bitmap lock must be held while this function is run.
3446 */
3447
3448 void
3449 hfs_checktreelinks (struct hfsmount *hfsmp) {
3450 extent_tree_offset_t *tree = &hfsmp->offset_tree;
3451
3452 extent_node_t *current = NULL;
3453 extent_node_t *next = NULL;
3454 extent_node_t *treenext;
3455
3456 current = extent_tree_off_first (tree);
3457
3458 while (current) {
3459 next = current->offset_next;
3460 treenext = extent_tree_off_next (tree, current);
3461 if (next != treenext) {
3462 panic("hfs_checktreelinks: mismatch for node (%p), next: %p , treenext %p !\n", current, next, treenext);
3463 }
3464 current = treenext;
3465 }
3466 }
3467
3468 #endif
3469
3470
3471 #if CONFIG_HFS_ALLOC_RBTREE
3472 /*
3473 * Test to see if any free blocks exist at a given offset.
3474 * If there exists a node at the specified offset, it will return the appropriate
3475 * node.
3476 *
3477 * NULL indicates allocated blocks exist at that offset.
3478 *
3479 * Allocation file lock must be held.
3480 *
3481 * Returns:
3482 * 1 if blocks in the range are allocated.
3483 * 0 if all blocks in the range are free.
3484 */
3485
3486 static int
3487 hfs_isrbtree_allocated (struct hfsmount *hfsmp, u_int32_t startBlock,
3488 u_int32_t numBlocks, extent_node_t **ret_node) {
3489
3490 extent_node_t search_sentinel;
3491 extent_node_t *node = NULL;
3492 extent_node_t *nextnode = NULL;
3493
3494 /*
3495 * With only one tree, then we just have to validate that there are entries
3496 * in the R/B tree at the specified offset if it really is free.
3497 */
3498 search_sentinel.offset = startBlock;
3499 search_sentinel.length = numBlocks;
3500
3501 node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
3502 if (node) {
3503
3504 *ret_node = node;
3505 nextnode = extent_tree_off_next (&hfsmp->offset_tree, node);
3506 if (nextnode != node->offset_next) {
3507 panic ("hfs_rbtree_isallocated: Next pointers out of sync!\n");
3508 }
3509
3510 /*
3511 * Check to see if it is a superset of our target range. Because we started
3512 * with the offset or some offset prior to it, then we know the node's offset is
3513 * at least <= startBlock. So, if the end of the node is greater than the end of
3514 * our target range, then the whole range is free.
3515 */
3516
3517 if ((node->offset + node->length) >= (startBlock + numBlocks)) {
3518 if (node->offset > startBlock) {
3519 panic ("hfs_rbtree_isallocated: bad node ordering!");
3520 }
3521 return 0;
3522 }
3523 }
3524 /*
3525 * We got here if either our node search resulted in a node whose extent
3526 * was strictly before our target offset, or we couldnt' find a previous node
3527 * at all (the beginning of the volume). If the former, then we can infer that
3528 * at least one block in the target range is allocated since the next node's offset
3529 * must be greater than startBlock.
3530 *
3531 * Either way, this means that the target node is unavailable to allocate, so
3532 * just return 1;
3533 */
3534 return 1;
3535 }
3536
3537
3538 #endif
3539
3540 /*
3541 * Count number of bits set in the given 32-bit unsigned number
3542 *
3543 * Returns:
3544 * Number of bits set
3545 */
3546 static int num_bits_set(u_int32_t num)
3547 {
3548 int count;
3549
3550 for (count = 0; num; count++) {
3551 num &= num - 1;
3552 }
3553
3554 return count;
3555 }
3556
3557 /*
3558 * For a given range of blocks, find the total number of blocks
3559 * allocated. If 'stop_on_first' is true, it stops as soon as it
3560 * encounters the first allocated block. This option is useful
3561 * to determine if any block is allocated or not.
3562 *
3563 * Inputs:
3564 * startingBlock First allocation block number of the range to be scanned.
3565 * numBlocks Total number of blocks that need to be scanned.
3566 * stop_on_first Stop the search after the first allocated block is found.
3567 *
3568 * Output:
3569 * allocCount Total number of allocation blocks allocated in the given range.
3570 *
3571 * On error, it is the number of allocated blocks found
3572 * before the function got an error.
3573 *
3574 * If 'stop_on_first' is set,
3575 * allocCount = 1 if any allocated block was found.
3576 * allocCount = 0 if no allocated block was found.
3577 *
3578 * Returns:
3579 * 0 on success, non-zero on failure.
3580 */
3581 static int
3582 hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock,
3583 u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
3584 {
3585 u_int32_t *currentWord; // Pointer to current word within bitmap block
3586 u_int32_t wordsLeft; // Number of words left in this bitmap block
3587 u_int32_t bitMask; // Word with given bits already set (ready to test)
3588 u_int32_t firstBit; // Bit index within word of first bit to allocate
3589 u_int32_t numBits; // Number of bits in word to allocate
3590 u_int32_t *buffer = NULL;
3591 uintptr_t blockRef;
3592 u_int32_t bitsPerBlock;
3593 u_int32_t wordsPerBlock;
3594 u_int32_t blockCount = 0;
3595 int error;
3596
3597 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3598 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0);
3599
3600 /*
3601 * Pre-read the bitmap block containing the first word of allocation
3602 */
3603 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3604 if (error)
3605 goto JustReturn;
3606
3607 /*
3608 * Initialize currentWord, and wordsLeft.
3609 */
3610 {
3611 u_int32_t wordIndexInBlock;
3612
3613 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
3614 wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
3615
3616 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3617 currentWord = buffer + wordIndexInBlock;
3618 wordsLeft = wordsPerBlock - wordIndexInBlock;
3619 }
3620
3621 /*
3622 * First test any non word aligned bits.
3623 */
3624 firstBit = startingBlock % kBitsPerWord;
3625 if (firstBit != 0) {
3626 bitMask = kAllBitsSetInWord >> firstBit;
3627 numBits = kBitsPerWord - firstBit;
3628 if (numBits > numBlocks) {
3629 numBits = numBlocks;
3630 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
3631 }
3632 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3633 if (stop_on_first) {
3634 blockCount = 1;
3635 goto Exit;
3636 }
3637 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3638 }
3639 numBlocks -= numBits;
3640 ++currentWord;
3641 --wordsLeft;
3642 }
3643
3644 /*
3645 * Test whole words (32 blocks) at a time.
3646 */
3647 while (numBlocks >= kBitsPerWord) {
3648 if (wordsLeft == 0) {
3649 /* Read in the next bitmap block. */
3650 startingBlock += bitsPerBlock;
3651
3652 buffer = NULL;
3653 error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3654 if (error) goto Exit;
3655
3656 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3657 if (error) goto Exit;
3658
3659 /* Readjust currentWord and wordsLeft. */
3660 currentWord = buffer;
3661 wordsLeft = wordsPerBlock;
3662 }
3663 if (*currentWord != 0) {
3664 if (stop_on_first) {
3665 blockCount = 1;
3666 goto Exit;
3667 }
3668 blockCount += num_bits_set(*currentWord);
3669 }
3670 numBlocks -= kBitsPerWord;
3671 ++currentWord;
3672 --wordsLeft;
3673 }
3674
3675 /*
3676 * Test any remaining blocks.
3677 */
3678 if (numBlocks != 0) {
3679 bitMask = ~(kAllBitsSetInWord >> numBlocks);
3680 if (wordsLeft == 0) {
3681 /* Read in the next bitmap block */
3682 startingBlock += bitsPerBlock;
3683
3684 buffer = NULL;
3685 error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3686 if (error) goto Exit;
3687
3688 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3689 if (error) goto Exit;
3690
3691 currentWord = buffer;
3692 wordsLeft = wordsPerBlock;
3693 }
3694 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3695 if (stop_on_first) {
3696 blockCount = 1;
3697 goto Exit;
3698 }
3699 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3700 }
3701 }
3702 Exit:
3703 if (buffer) {
3704 (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
3705 }
3706 if (allocCount) {
3707 *allocCount = blockCount;
3708 }
3709
3710 JustReturn:
3711 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3712 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0);
3713
3714 return (error);
3715 }
3716
3717 /*
3718 * Count total number of blocks that are allocated in the given
3719 * range from the bitmap. This is used to preflight total blocks
3720 * that need to be relocated during volume resize.
3721 *
3722 * The journal or allocation file lock must be held.
3723 *
3724 * Returns:
3725 * 0 on success, non-zero on failure.
3726 * On failure, allocCount is zero.
3727 */
3728 int
3729 hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
3730 u_int32_t numBlocks, u_int32_t *allocCount)
3731 {
3732 return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
3733 }
3734
3735 /*
3736 * Test to see if any blocks in a range are allocated.
3737 *
3738 * Note: On error, this function returns 1, which means that
3739 * one or more blocks in the range are allocated. This function
3740 * is primarily used for volume resize and we do not want
3741 * to report to the caller that the blocks are free when we
3742 * were not able to deterministically find it out. So on error,
3743 * we always report that the blocks are allocated.
3744 *
3745 * The journal or allocation file lock must be held.
3746 *
3747 * Returns
3748 * 0 if all blocks in the range are free.
3749 * 1 if blocks in the range are allocated, or there was an error.
3750 */
3751 int
3752 hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
3753 {
3754 int error;
3755 u_int32_t allocCount;
3756
3757 error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
3758 if (error) {
3759 /* On error, we always say that the blocks are allocated
3760 * so that volume resize does not return false success.
3761 */
3762 return 1;
3763 } else {
3764 /* The function was deterministically able to find out
3765 * if there was any block allocated or not. In that case,
3766 * the value in allocCount is good enough to be returned
3767 * back to the caller.
3768 */
3769 return allocCount;
3770 }
3771 }
3772
3773 /*
3774 * Check to see if the red-black tree is live. Allocation file lock must be held
3775 * shared or exclusive to call this function. Note that we may call this even if
3776 * HFS is built without activating the red-black tree code.
3777 */
3778 __private_extern__
3779 int
3780 hfs_isrbtree_active(struct hfsmount *hfsmp){
3781
3782 //TODO: Update this function to deal with a truncate/resize coming in when the tree
3783 //isn't fully finished. maybe we need to check the flags for something other than ENABLED?
3784
3785 #if CONFIG_HFS_ALLOC_RBTREE
3786 if (ALLOC_DEBUG) {
3787 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
3788 }
3789 if (hfsmp){
3790
3791 if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ENABLED) {
3792 return 1;
3793 }
3794 }
3795 #else
3796 #pragma unused (hfsmp)
3797 #endif
3798 /* If the RB Tree code is not enabled, then just always return 0 */
3799 return 0;
3800 }
3801
3802
3803 /*
3804 * This function scans the specified bitmap block and acts on it as necessary.
3805 * We may add it to the list of blocks to be UNMAP/TRIM'd or add it to allocator
3806 * data structures. This function is not #if'd to the CONFIG_RB case because
3807 * we want to use it unilaterally at mount time if on a UNMAP-capable device.
3808 *
3809 * Additionally, we may want an allocating thread to invoke this if the tree
3810 * does not have enough extents to satisfy an allocation request.
3811 *
3812 * startbit - the allocation block represented by a bit in 'allocblock' where we need to
3813 * start our scan. For instance, we may need to start the normal allocation scan
3814 * in the middle of an existing allocation block.
3815 * endBit - the allocation block where we should end this search (inclusive).
3816 * bitToScan - output argument for this function to specify the next bit to scan.
3817 *
3818 * Returns:
3819 * 0 on success
3820 * nonzero on failure.
3821 */
3822
3823 static int hfs_alloc_scan_block(struct hfsmount *hfsmp, u_int32_t startbit,
3824 u_int32_t endBit, u_int32_t *bitToScan,
3825 struct jnl_trim_list *list) {
3826
3827 int error;
3828 u_int32_t curAllocBlock;
3829 struct buf *blockRef = NULL;
3830 u_int32_t *buffer = NULL;
3831 u_int32_t wordIndexInBlock;
3832 u_int32_t blockSize = (u_int32_t)hfsmp->vcbVBMIOSize;
3833 u_int32_t wordsPerBlock = blockSize / kBytesPerWord;
3834 u_int32_t offset = 0;
3835 u_int32_t size = 0;
3836
3837 /*
3838 * Read the appropriate block from the bitmap file. ReadBitmapBlock
3839 * figures out which actual on-disk block corresponds to the bit we're
3840 * looking at.
3841 */
3842 error = ReadBitmapBlock(hfsmp, startbit, &buffer, (uintptr_t*)&blockRef);
3843 if (error) {
3844 return error;
3845 }
3846
3847 /* curAllocBlock represents the logical block we're analyzing. */
3848 curAllocBlock = startbit;
3849
3850 /* Figure out which word curAllocBlock corresponds to in the block we read */
3851 wordIndexInBlock = (curAllocBlock / kBitsPerWord) % wordsPerBlock;
3852
3853 /* Scan a word at a time */
3854 while (wordIndexInBlock < wordsPerBlock) {
3855 u_int32_t currentWord = SWAP_BE32(buffer[wordIndexInBlock]);
3856 u_int32_t curBit;
3857
3858 /* modulate curBit because it may start in the middle of a word */
3859 for (curBit = curAllocBlock % kBitsPerWord; curBit < kBitsPerWord; curBit++) {
3860
3861 u_int32_t is_allocated = currentWord & (1 << (kBitsWithinWordMask - curBit));
3862 if (ALLOC_DEBUG) {
3863 u_int32_t res = hfs_isallocated_scan (hfsmp, curAllocBlock, buffer);
3864 if ( ((res) && (!is_allocated)) || ((!res) && (is_allocated))) {
3865 panic("hfs_alloc_scan: curAllocBit %u, curBit (%d), word (0x%x), is_allocated (0x%x) res(0x%x) \n",
3866 curAllocBlock, curBit, currentWord, is_allocated, res);
3867 }
3868 }
3869 /*
3870 * If curBit is not allocated, keep track of the start of the free range.
3871 * Increment a running tally on how many free blocks in a row we've seen.
3872 */
3873 if (!is_allocated) {
3874 size++;
3875 if (offset == 0) {
3876 offset = curAllocBlock;
3877 }
3878 }
3879 else {
3880 /*
3881 * If we hit an allocated block, insert the extent that tracked the range
3882 * we saw, and reset our tally counter.
3883 */
3884 if (size != 0) {
3885 #if CONFIG_HFS_ALLOC_RBTREE
3886 extent_tree_free_space(&hfsmp->offset_tree, size, offset);
3887 #endif
3888 hfs_track_unmap_blocks (hfsmp, offset, size, list);
3889 size = 0;
3890 offset = 0;
3891 }
3892 }
3893 curAllocBlock++;
3894 /*
3895 * Exit early if the next bit we'd analyze would take us beyond the end of the
3896 * range that we're supposed to scan.
3897 */
3898 if (curAllocBlock >= endBit) {
3899 goto DoneScanning;
3900 }
3901 }
3902 wordIndexInBlock++;
3903 }
3904 DoneScanning:
3905
3906 /* We may have been tracking a range of free blocks that hasn't been inserted yet. */
3907 if (size != 0) {
3908 #if CONFIG_HFS_ALLOC_RBTREE
3909 extent_tree_free_space(&hfsmp->offset_tree, size, offset);
3910 #endif
3911 hfs_track_unmap_blocks (hfsmp, offset, size, list);
3912 }
3913 /*
3914 * curAllocBlock represents the next block we need to scan while we're in this
3915 * function.
3916 */
3917 *bitToScan = curAllocBlock;
3918
3919 ReleaseScanBitmapBlock(blockRef);
3920
3921 return 0;
3922 }
3923
3924
3925 /*
3926 * This function is basically the same as hfs_isallocated, except it's designed for
3927 * use with the red-black tree validation code. It assumes we're only checking whether
3928 * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
3929 * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
3930 *
3931 * This should not be called in general purpose scanning code.
3932 */
3933 int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
3934
3935 u_int32_t *currentWord; // Pointer to current word within bitmap block
3936 u_int32_t bitMask; // Word with given bits already set (ready to test)
3937 u_int32_t firstBit; // Bit index within word of first bit to allocate
3938 u_int32_t numBits; // Number of bits in word to allocate
3939 u_int32_t bitsPerBlock;
3940 uintptr_t blockRef;
3941 u_int32_t wordsPerBlock;
3942 u_int32_t numBlocks = 1;
3943 u_int32_t *buffer = NULL;
3944
3945 int inuse = 0;
3946 int error;
3947
3948
3949 if (bp_buf) {
3950 /* just use passed-in buffer if avail. */
3951 buffer = bp_buf;
3952 }
3953 else {
3954 /*
3955 * Pre-read the bitmap block containing the first word of allocation
3956 */
3957 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3958 if (error)
3959 return (error);
3960 }
3961
3962 /*
3963 * Initialize currentWord, and wordsLeft.
3964 */
3965 u_int32_t wordIndexInBlock;
3966
3967 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
3968 wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
3969
3970 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3971 currentWord = buffer + wordIndexInBlock;
3972
3973 /*
3974 * First test any non word aligned bits.
3975 */
3976 firstBit = startingBlock % kBitsPerWord;
3977 bitMask = kAllBitsSetInWord >> firstBit;
3978 numBits = kBitsPerWord - firstBit;
3979 if (numBits > numBlocks) {
3980 numBits = numBlocks;
3981 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
3982 }
3983 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3984 inuse = 1;
3985 goto Exit;
3986 }
3987 numBlocks -= numBits;
3988 ++currentWord;
3989
3990 Exit:
3991 if(bp_buf == NULL) {
3992 if (buffer) {
3993 (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
3994 }
3995 }
3996 return (inuse);
3997
3998
3999
4000 }
4001
4002 #if CONFIG_HFS_ALLOC_RBTREE
4003
4004 /*
4005 * Extern function that is called from mount and upgrade mount routines
4006 * that enable us to initialize the tree.
4007 */
4008
4009 __private_extern__
4010 u_int32_t InitTree(struct hfsmount *hfsmp) {
4011 extent_tree_init (&(hfsmp->offset_tree));
4012 return 0;
4013 }
4014
4015
4016 /*
4017 * This function builds the trees specified in its arguments. It uses
4018 * buf_meta_breads to scan through the bitmap and re-build the tree state.
4019 * It is very important to use buf_meta_bread because we need to ensure that we
4020 * read the most current version of the blocks that we're scanning. If we used
4021 * cluster_io, then journaled transactions could still be sitting in RAM since they are
4022 * written to disk in the proper location asynchronously.
4023 *
4024 * Because this could be still running when mount has finished, we need to check
4025 * after every allocation block that we're working on if an unmount or some other
4026 * operation that would cause us to teardown has come in. (think downgrade mount).
4027 * If an unmount has come in, then abort whatever we're doing and return -1
4028 * to indicate we hit an error. If we don't do this, we'd hold up unmount for
4029 * a very long time.
4030 *
4031 * This function assumes that the bitmap lock is acquired exclusively before being
4032 * called. It will drop the lock and then re-acquire it during operation, but
4033 * will always return with the lock held.
4034 */
4035 __private_extern__
4036 u_int32_t GenerateTree(struct hfsmount *hfsmp, u_int32_t endBlock, int *flags, int initialscan) {
4037
4038 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
4039
4040 u_int32_t *cur_block_eof;
4041 int error = 0;
4042
4043 int USE_FINE_GRAINED_LOCKING = 0;
4044
4045 /* Initialize the block counter while we hold the bitmap lock */
4046 cur_block_eof = &hfsmp->offset_block_end;
4047
4048 /*
4049 * This loop advances over all allocation bitmap blocks of the current region
4050 * to scan them and add the results into the red-black tree. We use the mount point
4051 * variable offset_block_end as our loop counter. This gives us flexibility
4052 * because we can release the allocation bitmap lock and allow a thread that wants
4053 * to make an allocation to grab the lock and do some scanning on our behalf while we're
4054 * waiting to re-acquire the lock. Then, the allocating thread will only do as much bitmap
4055 * scanning as needed to fulfill its allocation.
4056 *
4057 * If the other thread does IO for us, then it will update the offset_block_end
4058 * variable as well, since it will use the same hfs_alloc_scan_block function to do its bit
4059 * scanning. So when we re-grab the lock, our current EOF/loop will immediately skip us to the next
4060 * block that needs scanning.
4061 */
4062
4063 while (*cur_block_eof < endBlock) {
4064
4065 /*
4066 * If the filesystem is being resized before the bitmap has been fully scanned, we'll
4067 * update our endBlock to match the current allocation limit in the hfsmp struct.
4068 * The allocLimit field would only be be updated while holding the bitmap lock, so we won't
4069 * be executing this code at the same time that the resize is going on.
4070 */
4071 if ((initialscan) && (endBlock != hfsmp->allocLimit)) {
4072
4073 /* If we're past the new/modified allocLimit, then just stop immediately.*/
4074 if (*cur_block_eof >= hfsmp->allocLimit ) {
4075 break;
4076 }
4077 endBlock = hfsmp->allocLimit;
4078 }
4079
4080 /*
4081 * TODO: fix unmount stuff!
4082 * See rdar://7391404
4083 *
4084 * Once the RB allocator is checked in, we'll want to augment it to not hold the
4085 * allocation bitmap lock for the entire duration of the tree scan. For a first check-in
4086 * it's ok to do that but we can't leave it like that forever.
4087 *
4088 * The gist of the new algorithm will work as follows:
4089 * if an unmount is in flight and has been detected:
4090 * abort tree-build.
4091 * unset tree-in-progress bit.
4092 * wakeup unmount thread
4093 * unlock allocation bitmap lock, fail out.
4094 *
4095 * The corresponding code in the unmount side should already be in place.
4096 */
4097
4098 error = hfs_alloc_scan_block (hfsmp, *cur_block_eof, endBlock, cur_block_eof);
4099
4100 //TODO: Fix this below!
4101 if (USE_FINE_GRAINED_LOCKING){
4102 hfs_systemfile_unlock(hfsmp, *flags);
4103 *flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4104 }
4105 //TODO: Infer that if *flags == 0, we don't actually need to lock/unlock.
4106 }
4107
4108 return error;
4109 }
4110
4111 /*
4112 * This function destroys the specified rb-trees associated with the mount point.
4113 */
4114 __private_extern__
4115 void DestroyTrees(struct hfsmount *hfsmp) {
4116
4117 if (ALLOC_DEBUG) {
4118 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
4119 printf("DestroyTrees: Validating red/black tree for vol %s\n", (char*) hfsmp->vcbVN);
4120 hfs_validate_rbtree (hfsmp, 0, hfsmp->offset_block_end );
4121 }
4122
4123 /*
4124 * extent_tree_destroy will start with the first entry in the tree (by offset), then
4125 * iterate through the tree quickly using its embedded linked list. This results in tree
4126 * destruction in O(n) time.
4127 */
4128
4129 if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ENABLED) {
4130 extent_tree_destroy(&hfsmp->offset_tree);
4131
4132 /* Mark Trees as disabled */
4133 hfsmp->extent_tree_flags &= ~HFS_ALLOC_RB_ENABLED;
4134 }
4135
4136 return;
4137 }
4138
4139 #endif
4140
4141 /*
4142 * This function resets all of the data structures relevant to the
4143 * free extent cache stored in the hfsmount struct.
4144 *
4145 * If we are using the red-black tree code then we need to account for the fact that
4146 * we may encounter situations where we need to jettison the tree. If that is the
4147 * case, then we fail-over to the bitmap scanning logic, but we need to ensure that
4148 * the free ext cache is zeroed before we start using it.
4149 *
4150 * We also reset and disable the cache when allocLimit is updated... which
4151 * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()).
4152 * It is independent of the type of allocator being used currently.
4153 */
4154 void ResetVCBFreeExtCache(struct hfsmount *hfsmp)
4155 {
4156 int bytes;
4157 void *freeExt;
4158
4159 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4160 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0);
4161
4162 lck_spin_lock(&hfsmp->vcbFreeExtLock);
4163
4164 /* reset Free Extent Count */
4165 hfsmp->vcbFreeExtCnt = 0;
4166
4167 /* reset the actual array */
4168 bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor);
4169 freeExt = (void*)(hfsmp->vcbFreeExt);
4170
4171 bzero (freeExt, bytes);
4172
4173 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4174
4175 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4176 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0);
4177
4178 return;
4179 }
4180
4181 /*
4182 * This function is used to inform the allocator if we have to effectively shrink
4183 * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs.
4184 *
4185 * The bitmap lock must be held when calling this function. This function also modifies the
4186 * allocLimit field in the hfs mount point structure in the general case.
4187 *
4188 * In the shrinking case, we'll have to remove all free extents from the red-black
4189 * tree past the specified offset new_end_block. In the growth case, we'll have to force
4190 * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
4191 *
4192 * new_end_block represents the total number of blocks available for allocation in the resized
4193 * filesystem. Block #new_end_block should not be allocatable in the resized filesystem since it
4194 * will be out of the (0, n-1) range that are indexable in the bitmap.
4195 *
4196 * Returns 0 on success
4197 * errno on failure
4198 */
4199 __private_extern__
4200 u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
4201
4202 /*
4203 * Update allocLimit to the argument specified, but don't do anything else
4204 * if the red/black tree is not enabled.
4205 */
4206 hfsmp->allocLimit = new_end_block;
4207
4208 /* Invalidate the free extent cache completely so that
4209 * it does not have any extents beyond end of current
4210 * volume.
4211 */
4212 ResetVCBFreeExtCache(hfsmp);
4213
4214 #if CONFIG_HFS_ALLOC_RBTREE
4215 /* Shrinking the existing filesystem */
4216 if ((new_end_block < hfsmp->offset_block_end) &&
4217 (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE)) {
4218 extent_node_t search_sentinel;
4219 extent_node_t *node = NULL;
4220 /* Remover points to the current item to free/remove from the tree */
4221 extent_node_t *remover = NULL;
4222
4223 /* Begin search at the specified offset */
4224 memset (&search_sentinel, 0, sizeof(extent_node_t));
4225 search_sentinel.offset = new_end_block;
4226
4227 /*
4228 * Find the first available extent that satifies the allocation by searching
4229 * from the starting point or 1 earlier. We may need to split apart an existing node
4230 * if it straddles the new alloc limit.
4231 */
4232 node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
4233 if (node) {
4234 /* If it's an exact match, then just remove them all from this point forward */
4235 if (node->offset == new_end_block) {
4236 /*
4237 * Find the previous entry and update its next pointer to NULL
4238 * since this entry is biting the dust. Update remover to node.
4239 */
4240 extent_node_t *prev = NULL;
4241 prev = extent_tree_off_prev (&hfsmp->offset_tree, node);
4242 if (prev) {
4243 prev->offset_next = NULL;
4244 }
4245 remover = node;
4246 }
4247 else {
4248 /* See if we need to split this node */
4249 if ((node->offset + node->length) > new_end_block) {
4250 /*
4251 * Update node to reflect its new size up until new_end_block.
4252 */
4253 remover = node->offset_next;
4254 node->length = new_end_block - node->offset;
4255 /* node is becoming the last free extent in the volume. */
4256 node->offset_next = NULL;
4257 }
4258 else {
4259 if (node->offset_next == NULL) {
4260 /*
4261 * 'node' points to the last free extent in the volume.
4262 * Coincidentally, it is also before the new cut-off point at which
4263 * we will stop representing bitmap values in the tree. Just bail out now.
4264 */
4265 return 0;
4266 }
4267 /*
4268 * Otherwise, point our temp variable 'remover' to the node where
4269 * we'll need to start yanking things out of the tree, and make 'node'
4270 * the last element in the tree in the linked list.
4271 */
4272 remover = node->offset_next;
4273 if (remover->offset <= new_end_block) {
4274 panic ("UpdateAllocLimit: Invalid RBTree node next ptr!");
4275 }
4276 node->offset_next = NULL;
4277 }
4278 }
4279
4280 /*
4281 * Remover is our "temp" pointer that points to the current node to remove from
4282 * the offset tree. We'll simply iterate through the tree linked list, removing the current
4283 * element from the tree, freeing them as we come across them.
4284 */
4285 while (remover) {
4286 extent_node_t *next = remover->offset_next;
4287 extent_tree_remove_node (&hfsmp->offset_tree, remover);
4288 free_node (remover);
4289 remover = next;
4290 }
4291
4292 if (ALLOC_DEBUG) {
4293 printf ("UpdateAllocLimit: Validating rbtree after truncation\n");
4294 hfs_validate_rbtree (hfsmp, 0, new_end_block-1);
4295 }
4296
4297 /*
4298 * Don't forget to shrink offset_block_end after a successful truncation
4299 * new_end_block should represent the number of blocks available on the
4300 * truncated volume.
4301 */
4302
4303 hfsmp->offset_block_end = new_end_block;
4304
4305 return 0;
4306 }
4307 else {
4308 if (ALLOC_DEBUG) {
4309 panic ("UpdateAllocLimit: no prev!");
4310 }
4311 return ENOSPC;
4312 }
4313 }
4314 /* Growing the existing filesystem */
4315 else if ((new_end_block > hfsmp->offset_block_end) &&
4316 (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE)) {
4317 int flags = 0;
4318 int retval = 0;
4319
4320 if (ALLOC_DEBUG) {
4321 printf ("UpdateAllocLimit: Validating rbtree prior to growth\n");
4322 hfs_validate_rbtree (hfsmp, 0, hfsmp->offset_block_end);
4323 }
4324
4325
4326 retval = GenerateTree (hfsmp, new_end_block, &flags, 0);
4327
4328 /*
4329 * Don't forget to update offset_block_end after a successful tree extension.
4330 */
4331 if (retval == 0) {
4332
4333 if (ALLOC_DEBUG) {
4334 printf ("UpdateAllocLimit: Validating rbtree after growth\n");
4335 hfs_validate_rbtree (hfsmp, 0, new_end_block);
4336 }
4337
4338 hfsmp->offset_block_end = new_end_block;
4339 }
4340
4341 return retval;
4342 }
4343 /* Otherwise, do nothing. fall through to the code below. */
4344 printf ("error : off_block_end: %d, alloclimit: %d, new_end_block: %d\n",
4345 hfsmp->offset_block_end,hfsmp->allocLimit, new_end_block);
4346 #endif
4347
4348 return 0;
4349
4350 }
4351
4352
4353 /*
4354 * Remove an extent from the list of free extents.
4355 *
4356 * This is a low-level routine. It does not handle overlaps or splitting;
4357 * that is the responsibility of the caller. The input extent must exactly
4358 * match an extent already in the list; it will be removed, and any following
4359 * extents in the list will be shifted up.
4360 *
4361 * Inputs:
4362 * startBlock - Start of extent to remove
4363 * blockCount - Number of blocks in extent to remove
4364 *
4365 * Result:
4366 * The index of the extent that was removed.
4367 */
4368 static void remove_free_extent_list(struct hfsmount *hfsmp, int index)
4369 {
4370 if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) {
4371 if (ALLOC_DEBUG)
4372 panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
4373 else
4374 printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
4375 return;
4376 }
4377 int shift_count = hfsmp->vcbFreeExtCnt - index - 1;
4378 if (shift_count > 0) {
4379 memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
4380 }
4381 hfsmp->vcbFreeExtCnt--;
4382 }
4383
4384
4385 /*
4386 * Add an extent to the list of free extents.
4387 *
4388 * This is a low-level routine. It does not handle overlaps or coalescing;
4389 * that is the responsibility of the caller. This routine *does* make
4390 * sure that the extent it is adding is inserted in the correct location.
4391 * If the list is full, this routine will handle either removing the last
4392 * extent in the list to make room for the new extent, or ignoring the
4393 * new extent if it is "worse" than the last extent in the list.
4394 *
4395 * Inputs:
4396 * startBlock - Start of extent to add
4397 * blockCount - Number of blocks in extent to add
4398 *
4399 * Result:
4400 * The index where the extent that was inserted, or kMaxFreeExtents
4401 * if the extent was not inserted (the list was full, and the extent
4402 * being added was "worse" than everything in the list).
4403 */
4404 static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4405 {
4406 uint32_t i;
4407
4408 /* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */
4409 if (ALLOC_DEBUG) {
4410 uint32_t endBlock = startBlock + blockCount;
4411 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4412 if (endBlock < hfsmp->vcbFreeExt[i].startBlock ||
4413 startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) {
4414 continue;
4415 }
4416 panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d",
4417 hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i);
4418 }
4419 }
4420
4421 /* Figure out what index the new extent should be inserted at. */
4422 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4423 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4424 /* The list is sorted by increasing offset. */
4425 if (startBlock < hfsmp->vcbFreeExt[i].startBlock) {
4426 break;
4427 }
4428 } else {
4429 /* The list is sorted by decreasing size. */
4430 if (blockCount > hfsmp->vcbFreeExt[i].blockCount) {
4431 break;
4432 }
4433 }
4434 }
4435
4436 /* When we get here, i is the index where the extent should be inserted. */
4437 if (i == kMaxFreeExtents) {
4438 /*
4439 * The new extent is worse than anything already in the list,
4440 * and the list is full, so just ignore the extent to be added.
4441 */
4442 return i;
4443 }
4444
4445 /*
4446 * Grow the list (if possible) to make room for an insert.
4447 */
4448 if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents)
4449 hfsmp->vcbFreeExtCnt++;
4450
4451 /*
4452 * If we'll be keeping any extents after the insert position, then shift them.
4453 */
4454 int shift_count = hfsmp->vcbFreeExtCnt - i - 1;
4455 if (shift_count > 0) {
4456 memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
4457 }
4458
4459 /* Finally, store the new extent at its correct position. */
4460 hfsmp->vcbFreeExt[i].startBlock = startBlock;
4461 hfsmp->vcbFreeExt[i].blockCount = blockCount;
4462 return i;
4463 }
4464
4465
4466 /*
4467 * Remove an entry from free extent cache after it has been allocated.
4468 *
4469 * This is a high-level routine. It handles removing a portion of a
4470 * cached extent, potentially splitting it into two (if the cache was
4471 * already full, throwing away the extent that would sort last). It
4472 * also handles removing an extent that overlaps multiple extents in
4473 * the cache.
4474 *
4475 * Inputs:
4476 * hfsmp - mount point structure
4477 * startBlock - starting block of the extent to be removed.
4478 * blockCount - number of blocks of the extent to be removed.
4479 */
4480 static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4481 {
4482 u_int32_t i, insertedIndex;
4483 u_int32_t currentStart, currentEnd, endBlock;
4484 int extentsRemoved = 0;
4485
4486 #if CONFIG_HFS_ALLOC_RBTREE
4487 /* If red-black tree is enabled, no free extent cache is necessary */
4488 if (hfs_isrbtree_active(hfsmp) == true) {
4489 return;
4490 }
4491 #endif
4492
4493 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4494 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
4495
4496 endBlock = startBlock + blockCount;
4497
4498 lck_spin_lock(&hfsmp->vcbFreeExtLock);
4499
4500 /*
4501 * Iterate over all of the extents in the free extent cache, removing or
4502 * updating any entries that overlap with the input extent.
4503 */
4504 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4505 currentStart = hfsmp->vcbFreeExt[i].startBlock;
4506 currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount;
4507
4508 /*
4509 * If the current extent is entirely before or entirely after the
4510 * the extent to be removed, then we keep it as-is.
4511 */
4512 if (currentEnd <= startBlock || currentStart >= endBlock) {
4513 continue;
4514 }
4515
4516 /*
4517 * If the extent being removed entirely contains the current extent,
4518 * then remove the current extent.
4519 */
4520 if (startBlock <= currentStart && endBlock >= currentEnd) {
4521 remove_free_extent_list(hfsmp, i);
4522
4523 /*
4524 * We just removed the extent at index i. The extent at
4525 * index i+1 just got shifted to index i. So decrement i
4526 * to undo the loop's "++i", and the next iteration will
4527 * examine index i again, which contains the next extent
4528 * in the list.
4529 */
4530 --i;
4531 ++extentsRemoved;
4532 continue;
4533 }
4534
4535 /*
4536 * If the extent being removed is strictly "in the middle" of the
4537 * current extent, then we need to split the current extent into
4538 * two discontiguous extents (the "head" and "tail"). The good
4539 * news is that we don't need to examine any other extents in
4540 * the list.
4541 */
4542 if (startBlock > currentStart && endBlock < currentEnd) {
4543 remove_free_extent_list(hfsmp, i);
4544 add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
4545 add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
4546 break;
4547 }
4548
4549 /*
4550 * The only remaining possibility is that the extent to be removed
4551 * overlaps the start or end (but not both!) of the current extent.
4552 * So we need to replace the current extent with a shorter one.
4553 *
4554 * The only tricky part is that the updated extent might be at a
4555 * different index than the original extent. If the updated extent
4556 * was inserted after the current extent, then we need to re-examine
4557 * the entry at index i, since it now contains the extent that was
4558 * previously at index i+1. If the updated extent was inserted
4559 * before or at the same index as the removed extent, then the
4560 * following extents haven't changed position.
4561 */
4562 remove_free_extent_list(hfsmp, i);
4563 if (startBlock > currentStart) {
4564 /* Remove the tail of the current extent. */
4565 insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
4566 } else {
4567 /* Remove the head of the current extent. */
4568 insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
4569 }
4570 if (insertedIndex > i) {
4571 --i; /* Undo the "++i" in the loop, so we examine the entry at index i again. */
4572 }
4573 }
4574
4575 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4576
4577 sanity_check_free_ext(hfsmp, 0);
4578
4579 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4580 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);
4581
4582 return;
4583 }
4584
4585
4586 /*
4587 * Add an entry to free extent cache after it has been deallocated.
4588 *
4589 * This is a high-level routine. It will merge overlapping or contiguous
4590 * extents into a single, larger extent.
4591 *
4592 * If the extent provided has blocks beyond current allocLimit, it is
4593 * clipped to allocLimit (so that we won't accidentally find and allocate
4594 * space beyond allocLimit).
4595 *
4596 * Inputs:
4597 * hfsmp - mount point structure
4598 * startBlock - starting block of the extent to be removed.
4599 * blockCount - number of blocks of the extent to be removed.
4600 *
4601 * Returns:
4602 * true - if the extent was added successfully to the list
4603 * false - if the extent was not added to the list, maybe because
4604 * the extent was beyond allocLimit, or is not best
4605 * candidate to be put in the cache.
4606 */
4607 static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4608 {
4609 Boolean retval = false;
4610 uint32_t endBlock;
4611 uint32_t currentEnd;
4612 uint32_t i;
4613
4614 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4615 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
4616
4617 /*
4618 * If using the red-black tree allocator, then there's no need to special case
4619 * for the sparse device case. We'll simply add the region we've recently freed
4620 * to the red-black tree, where it will get sorted by offset and length. The only special
4621 * casing will need to be done on the allocation side, where we may favor free extents
4622 * based on offset even if it will cause fragmentation. This may be true, for example, if
4623 * we are trying to reduce the number of bandfiles created in a sparse bundle disk image.
4624 */
4625 #if CONFIG_HFS_ALLOC_RBTREE
4626 if (hfs_isrbtree_active(hfsmp) == true) {
4627 goto out_not_locked;
4628 }
4629 #endif
4630
4631 /* No need to add extent that is beyond current allocLimit */
4632 if (startBlock >= hfsmp->allocLimit) {
4633 goto out_not_locked;
4634 }
4635
4636 /* If end of the free extent is beyond current allocLimit, clip the extent */
4637 if ((startBlock + blockCount) > hfsmp->allocLimit) {
4638 blockCount = hfsmp->allocLimit - startBlock;
4639 }
4640
4641 lck_spin_lock(&hfsmp->vcbFreeExtLock);
4642
4643 /*
4644 * Make a pass through the free extent cache, looking for known extents that
4645 * overlap or are contiguous with the extent to be added. We'll remove those
4646 * extents from the cache, and incorporate them into the new extent to be added.
4647 */
4648 endBlock = startBlock + blockCount;
4649 for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) {
4650 currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount;
4651 if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) {
4652 /* Extent i does not overlap and is not contiguous, so keep it. */
4653 continue;
4654 } else {
4655 /* We need to remove extent i and combine it with the input extent. */
4656 if (hfsmp->vcbFreeExt[i].startBlock < startBlock)
4657 startBlock = hfsmp->vcbFreeExt[i].startBlock;
4658 if (currentEnd > endBlock)
4659 endBlock = currentEnd;
4660
4661 remove_free_extent_list(hfsmp, i);
4662 /*
4663 * We just removed the extent at index i. The extent at
4664 * index i+1 just got shifted to index i. So decrement i
4665 * to undo the loop's "++i", and the next iteration will
4666 * examine index i again, which contains the next extent
4667 * in the list.
4668 */
4669 --i;
4670 }
4671 }
4672 add_free_extent_list(hfsmp, startBlock, endBlock - startBlock);
4673
4674 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4675
4676 out_not_locked:
4677 sanity_check_free_ext(hfsmp, 0);
4678
4679 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4680 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);
4681
4682 return retval;
4683 }
4684
4685 /* Debug function to check if the free extent cache is good or not */
4686 static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
4687 {
4688 u_int32_t i, j;
4689
4690 /* Do not do anything if debug is not on, or if we're using the red-black tree */
4691 if ((ALLOC_DEBUG == 0) || (hfs_isrbtree_active(hfsmp) == true)) {
4692 return;
4693 }
4694
4695 lck_spin_lock(&hfsmp->vcbFreeExtLock);
4696
4697 if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents)
4698 panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt);
4699
4700 /*
4701 * Iterate the Free extent cache and ensure no entries are bogus or refer to
4702 * allocated blocks.
4703 */
4704 for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
4705 u_int32_t start, nblocks;
4706
4707 start = hfsmp->vcbFreeExt[i].startBlock;
4708 nblocks = hfsmp->vcbFreeExt[i].blockCount;
4709
4710 //printf ("hfs: %p: slot:%d (%u,%u)\n", hfsmp, i, start, nblocks);
4711
4712 /* Check if any of the blocks in free extent cache are allocated.
4713 * This should not be enabled always because it might take
4714 * very long for large extents that get added to the list.
4715 *
4716 * We have to drop vcbFreeExtLock while we call hfs_isallocated
4717 * because it is going to do I/O. Note that the free extent
4718 * cache could change. That's a risk we take when using this
4719 * debugging code. (Another alternative would be to try to
4720 * detect when the free extent cache changed, and perhaps
4721 * restart if the list changed while we dropped the lock.)
4722 */
4723 if (check_allocated) {
4724 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4725 if (hfs_isallocated(hfsmp, start, nblocks)) {
4726 panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
4727 hfsmp, i, start, nblocks);
4728 }
4729 lck_spin_lock(&hfsmp->vcbFreeExtLock);
4730 }
4731
4732 /* Check if any part of the extent is beyond allocLimit */
4733 if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
4734 panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
4735 hfsmp, i, start, nblocks, hfsmp->allocLimit);
4736 }
4737
4738 /* Check if there are any duplicate start blocks */
4739 for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
4740 if (start == hfsmp->vcbFreeExt[j].startBlock) {
4741 panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n",
4742 hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock,
4743 hfsmp->vcbFreeExt[j].blockCount);
4744 }
4745 }
4746
4747 /* Check if the entries are out of order */
4748 if ((i+1) != hfsmp->vcbFreeExtCnt) {
4749 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4750 /* sparse devices are sorted by starting block number (ascending) */
4751 if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) {
4752 panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n",
4753 hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
4754 hfsmp->vcbFreeExt[i+1].blockCount);
4755 }
4756 } else {
4757 /* normally sorted by block count (descending) */
4758 if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
4759 panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n",
4760 hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
4761 hfsmp->vcbFreeExt[i+1].blockCount);
4762 }
4763 }
4764 }
4765 }
4766 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4767 }