]> git.saurik.com Git - apple/hfs.git/blob - core/VolumeAllocation.c
hfs-407.1.3.tar.gz
[apple/hfs.git] / core / VolumeAllocation.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 File: VolumeAllocation.c
30
31 Contains: Routines for accessing and modifying the volume bitmap.
32
33 Version: HFS Plus 1.0
34
35 Copyright: (c) 1996-2009 by Apple Inc., all rights reserved.
36
37 */
38
39 /*
40 Public routines:
41 BlockAllocate / hfs_block_alloc
42 Allocate space on a volume. Can allocate space contiguously.
43 If not contiguous, then allocation may be less than what was
44 asked for. Returns the starting block number, and number of
45 blocks. It will only return a single extent.
46
47 BlockDeallocate
48 Deallocate a contiguous run of allocation blocks.
49
50 BlockMarkAllocated
51 Exported wrapper to mark blocks as in-use. This will correctly determine
52 whether or not the red-black tree is enabled and call the appropriate function
53 if applicable.
54 BlockMarkFree
55 Exported wrapper to mark blocks as freed. This will correctly determine whether or
56 not the red-black tree is enabled and call the appropriate function if applicable.
57
58
59 ResetVCBFreeExtCache
60 Since the red-black tree obviates the need to maintain the free extent cache, we do
61 not update it if the tree is also live. As a result, if we ever need to destroy the trees
62 we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
63 bitmap scanning allocator.
64 We also reset and disable the free extent cache when volume resizing is
65 in flight.
66
67 UpdateAllocLimit
68 Adjusts the AllocLimit field in the hfs mount point. This is used when we need to prevent
69 allocations from occupying space in the region we are modifying during a filesystem resize.
70 At other times, it should be consistent with the total number of allocation blocks in the
71 filesystem. It is also used to shrink or grow the number of blocks that the red-black tree should
72 know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
73 number of items in the tree that we can allocate from.
74
75 ScanUnmapBlocks
76 Traverse the entire allocation bitmap. Potentially issue DKIOCUNMAPs to the device as it
77 tracks unallocated ranges when iterating the volume bitmap. Additionally, build up the in-core
78 summary table of the allocation bitmap.
79
80 Internal routines:
81 BlockMarkFreeInternal
82 Mark a contiguous range of blocks as free. The corresponding
83 bits in the volume bitmap will be cleared. This will actually do the work
84 of modifying the bitmap for us.
85
86 BlockMarkAllocatedInternal
87 Mark a contiguous range of blocks as allocated. The cor-
88 responding bits in the volume bitmap are set. Also tests to see
89 if any of the blocks were previously unallocated.
90 BlockFindContiguous
91 Find a contiguous range of blocks of a given size. The caller
92 specifies where to begin the search (by block number). The
93 block number of the first block in the range is returned. This is only
94 called by the bitmap scanning logic as the red-black tree should be able
95 to do this internally by searching its tree.
96 BlockFindAny
97 Find and allocate a contiguous range of blocks up to a given size. The
98 first range of contiguous free blocks found are allocated, even if there
99 are fewer blocks than requested (and even if a contiguous range of blocks
100 of the given size exists elsewhere).
101 BlockFindAnyBitmap
102 Finds a range of blocks per the above requirements without using the
103 Allocation RB Tree. This relies on the bitmap-scanning logic in order to find
104 any valid range of free space needed.
105 BlockFindContig
106 Find a contiguous range of blocks of a given size.
107 If the minimum cannot be satisfied, nothing is
108 returned.
109 BlockFindKnown
110 Try to allocate space from known free space in the volume's
111 free extent cache.
112 ReadBitmapBlock
113 Given an allocation block number, read the bitmap block that
114 contains that allocation block into a caller-supplied buffer.
115
116 ReleaseBitmapBlock
117 Release a bitmap block back into the buffer cache.
118
119 ReadBitmapRange
120 Given an allocation block number, read a range of bitmap that
121 must begin at that allocation block into a caller supplied buffer.
122
123 ReleaseBitmapRange
124 Release and invalidate a buf_t corresponding to the bitmap
125 back into the UBC in order to prevent coherency issues.
126
127 remove_free_extent_cache
128 Remove an extent from the free extent cache. Handles overlaps
129 with multiple extents in the cache, and handles splitting an
130 extent in the cache if the extent to be removed is in the middle
131 of a cached extent.
132
133 add_free_extent_cache
134 Add an extent to the free extent cache. It will merge the
135 input extent with extents already in the cache.
136 CheckUnmappedBytes
137 Check whether or not the current transaction
138 has allocated blocks that were recently freed. This may have data safety implications.
139
140
141
142 Debug/Test Routines
143 hfs_isallocated
144 Test to see if any blocks in a range are allocated. Journal or
145 allocation file lock must be held.
146
147 hfs_isallocated_scan
148 Test to see if any blocks in a range are allocated. Releases and
149 invalidates the block used when finished.
150
151 Optimization Routines
152 hfs_alloc_scan_block
153 Given a starting allocation block number, figures out which physical block contains that
154 allocation block's bit, and scans it from the starting bit until either the ending bit or
155 the end of the block. Free space extents are inserted into the appropriate red-black tree.
156
157 */
158
159
160 #include <sys/types.h>
161 #include <sys/buf.h>
162
163 #if !HFS_ALLOC_TEST
164
165 #include "hfs_macos_defs.h"
166 #include <sys/systm.h>
167 #include <sys/ubc.h>
168 /* For VM Page size */
169 #include <libkern/libkern.h>
170 #include "hfs_journal.h"
171 #include "hfs.h"
172 #include "hfs_endian.h"
173 #include "FileMgrInternal.h"
174
175 #endif // !HFS_ALLOC_TEST
176
177 #include <sys/sysctl.h>
178 #include <sys/disk.h>
179 #include <sys/uio.h>
180 #include <sys/malloc.h>
181
182 #include "hfs_dbg.h"
183 #include "hfs_format.h"
184 #include "hfs_kdebug.h"
185 #include "rangelist.h"
186 #include "hfs_extents.h"
187
188 /* Headers for unmap-on-mount support */
189 #include <sys/disk.h>
190
191 /*
192 * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
193 * KERNEL_DEBUG_CONSTANT events are enabled at runtime. (They're
194 * disabled by default because there can be a lot of these events,
195 * and we don't want to overwhelm the kernel debug buffer. If you
196 * want to watch these events in particular, just set the sysctl.)
197 */
198 static int hfs_kdebug_allocation = 0;
199 SYSCTL_DECL(_vfs_generic);
200 HFS_SYSCTL(NODE, _vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system")
201 HFS_SYSCTL(NODE, _vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug")
202 HFS_SYSCTL(INT, _vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations")
203 enum {
204 /*
205 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
206 * BlockDeallocate, including the internal BlockAllocateXxx
207 * routines so we can see how an allocation was satisfied.
208 *
209 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
210 * free extent cache.
211 *
212 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
213 *
214 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
215 * or clearing bits, scanning the bitmap).
216 */
217 HFSDBG_ALLOC_ENABLED = 1,
218 HFSDBG_EXT_CACHE_ENABLED = 2,
219 HFSDBG_UNMAP_ENABLED = 4,
220 HFSDBG_BITMAP_ENABLED = 8
221 };
222
223 enum {
224 kBytesPerWord = 4,
225 kBitsPerByte = 8,
226 kBitsPerWord = 32,
227
228 kBitsWithinWordMask = kBitsPerWord-1
229 };
230
231 #define kLowBitInWordMask 0x00000001ul
232 #define kHighBitInWordMask 0x80000000ul
233 #define kAllBitsSetInWord 0xFFFFFFFFul
234
235 #define HFS_MIN_SUMMARY_BLOCKSIZE 4096
236
237 #define ALLOC_DEBUG 0
238
239 static OSErr ReadBitmapBlock(
240 ExtendedVCB *vcb,
241 u_int32_t bit,
242 u_int32_t **buffer,
243 uintptr_t *blockRef,
244 hfs_block_alloc_flags_t flags);
245
246 static OSErr ReleaseBitmapBlock(
247 ExtendedVCB *vcb,
248 uintptr_t blockRef,
249 Boolean dirty);
250
251 static OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
252 HFSPlusExtentDescriptor *extent,
253 hfs_block_alloc_flags_t flags,
254 hfs_alloc_extra_args_t *ap);
255
256 static OSErr BlockFindAny(
257 ExtendedVCB *vcb,
258 u_int32_t startingBlock,
259 u_int32_t endingBlock,
260 u_int32_t maxBlocks,
261 hfs_block_alloc_flags_t flags,
262 Boolean trustSummary,
263 u_int32_t *actualStartBlock,
264 u_int32_t *actualNumBlocks);
265
266 static OSErr BlockFindAnyBitmap(
267 ExtendedVCB *vcb,
268 u_int32_t startingBlock,
269 u_int32_t endingBlock,
270 u_int32_t maxBlocks,
271 hfs_block_alloc_flags_t flags,
272 u_int32_t *actualStartBlock,
273 u_int32_t *actualNumBlocks);
274
275 static OSErr BlockFindContig(
276 ExtendedVCB *vcb,
277 u_int32_t startingBlock,
278 u_int32_t minBlocks,
279 u_int32_t maxBlocks,
280 hfs_block_alloc_flags_t flags,
281 u_int32_t *actualStartBlock,
282 u_int32_t *actualNumBlocks);
283
284 static OSErr BlockFindContiguous(
285 ExtendedVCB *vcb,
286 u_int32_t startingBlock,
287 u_int32_t endingBlock,
288 u_int32_t minBlocks,
289 u_int32_t maxBlocks,
290 Boolean useMetaZone,
291 Boolean trustSummary,
292 u_int32_t *actualStartBlock,
293 u_int32_t *actualNumBlocks,
294 hfs_block_alloc_flags_t flags);
295
296 static OSErr BlockFindKnown(
297 ExtendedVCB *vcb,
298 u_int32_t maxBlocks,
299 u_int32_t *actualStartBlock,
300 u_int32_t *actualNumBlocks);
301
302 static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
303 HFSPlusExtentDescriptor *extent,
304 uint32_t max_blocks,
305 hfs_block_alloc_flags_t flags);
306
307 static OSErr BlockMarkAllocatedInternal (
308 ExtendedVCB *vcb,
309 u_int32_t startingBlock,
310 u_int32_t numBlocks,
311 hfs_block_alloc_flags_t flags);
312
313 static OSErr BlockMarkFreeInternal(
314 ExtendedVCB *vcb,
315 u_int32_t startingBlock,
316 u_int32_t numBlocks,
317 Boolean do_validate);
318
319
320 static OSErr ReadBitmapRange (struct hfsmount *hfsmp, uint32_t offset, uint32_t iosize,
321 uint32_t **buffer, struct buf **blockRef);
322
323 static OSErr ReleaseScanBitmapRange( struct buf *bp );
324
325 static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset,
326 u_int32_t numBlocks, struct jnl_trim_list *list);
327
328 static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list);
329
330 static int hfs_alloc_scan_range(struct hfsmount *hfsmp,
331 u_int32_t startbit,
332 u_int32_t *bitToScan,
333 struct jnl_trim_list *list);
334
335 static int hfs_scan_range_size (struct hfsmount* hfsmp, uint32_t start, uint32_t *iosize);
336 static uint32_t CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recent, uint32_t *next);
337
338 /* Bitmap Re-use Detection */
339 static inline int extents_overlap (uint32_t start1, uint32_t len1,
340 uint32_t start2, uint32_t len2) {
341 return !( ((start1 + len1) <= start2) || ((start2 + len2) <= start1) );
342 }
343
344
345 int hfs_isallocated_scan (struct hfsmount *hfsmp,
346 u_int32_t startingBlock,
347 u_int32_t *bp_buf);
348
349 /* Summary Table Functions */
350 static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse);
351 static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t *index);
352 static int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
353 static int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t *alloc);
354 static int hfs_release_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t length);
355 static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t *freeblocks);
356 static int hfs_rebuild_summary (struct hfsmount *hfsmp);
357
358 #if 0
359 static int hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
360 #endif
361
362 /* Used in external mount code to initialize the summary table */
363 int hfs_init_summary (struct hfsmount *hfsmp);
364
365 #if ALLOC_DEBUG
366 void hfs_validate_summary (struct hfsmount *hfsmp);
367 #endif
368
369
370 /* Functions for manipulating free extent cache */
371 static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
372 static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
373 static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
374
375 static void hfs_release_reserved(hfsmount_t *hfsmp, struct rl_entry *range, int list);
376
377 /* Functions for getting free exents */
378
379 typedef struct bitmap_context {
380 void *bitmap; // current bitmap chunk
381 uint32_t run_offset; // offset (in bits) from start of bitmap to start of current run
382 uint32_t chunk_current; // next bit to scan in the chunk
383 uint32_t chunk_end; // number of valid bits in this chunk
384 struct hfsmount *hfsmp;
385 struct buf *bp;
386 uint32_t last_free_summary_bit; // last marked free summary bit
387 int lockflags;
388 uint64_t lock_start;
389 } bitmap_context_t;
390
391
392 static errno_t get_more_bits(bitmap_context_t *bitmap_ctx);
393 static int bit_count_set(void *bitmap, int start, int end);
394 static int bit_count_clr(void *bitmap, int start, int end);
395 static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count);
396 static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count);
397 static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count);
398 static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set);
399 static int clzll(uint64_t x);
400
401 #if ALLOC_DEBUG
402 /*
403 * Validation Routine to verify that the TRIM list maintained by the journal
404 * is in good shape relative to what we think the bitmap should have. We should
405 * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
406 * we panic.
407 */
408 int trim_validate_bitmap (struct hfsmount *hfsmp);
409 int trim_validate_bitmap (struct hfsmount *hfsmp) {
410 u_int64_t blockno_offset;
411 u_int64_t numblocks;
412 int i;
413 int count;
414 u_int32_t startblk;
415 u_int32_t blks;
416 int err = 0;
417 uint32_t alloccount = 0;
418
419 if (hfsmp->jnl) {
420 struct journal *jnl = (struct journal*)hfsmp->jnl;
421 if (jnl->active_tr) {
422 struct jnl_trim_list *trim = &(jnl->active_tr->trim);
423 count = trim->extent_count;
424 for (i = 0; i < count; i++) {
425 blockno_offset = trim->extents[i].offset;
426 blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
427 blockno_offset = blockno_offset / hfsmp->blockSize;
428 numblocks = trim->extents[i].length / hfsmp->blockSize;
429
430 startblk = (u_int32_t)blockno_offset;
431 blks = (u_int32_t) numblocks;
432 err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);
433
434 if (err == 0 && alloccount != 0) {
435 panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
436 }
437 }
438 }
439 }
440 return 0;
441 }
442
443 #endif
444
445
446 /*
447 ;________________________________________________________________________________
448 ;
449 ; Routine: hfs_unmap_free_extent
450 ;
451 ; Function: Make note of a range of allocation blocks that should be
452 ; unmapped (trimmed). That is, the given range of blocks no
453 ; longer have useful content, and the device can unmap the
454 ; previous contents. For example, a solid state disk may reuse
455 ; the underlying storage for other blocks.
456 ;
457 ; This routine is only supported for journaled volumes. The extent
458 ; being freed is passed to the journal code, and the extent will
459 ; be unmapped after the current transaction is written to disk.
460 ;
461 ; Input Arguments:
462 ; hfsmp - The volume containing the allocation blocks.
463 ; startingBlock - The first allocation block of the extent being freed.
464 ; numBlocks - The number of allocation blocks of the extent being freed.
465 ;________________________________________________________________________________
466 */
467 static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
468 {
469 u_int64_t offset;
470 u_int64_t length;
471 u_int64_t device_sz;
472 int err = 0;
473
474 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
475 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
476
477 if (ALLOC_DEBUG) {
478 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
479 panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks);
480 }
481 }
482
483 if (hfsmp->jnl != NULL) {
484 device_sz = hfsmp->hfs_logical_bytes;
485 offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
486 length = (u_int64_t) numBlocks * hfsmp->blockSize;
487
488 /* Validate that the trim is in a valid range of bytes */
489 if ((offset >= device_sz) || ((offset + length) > device_sz)) {
490 printf("hfs_unmap_free_ext: ignoring trim vol=%s @ off %lld len %lld \n", hfsmp->vcbVN, offset, length);
491 err = EINVAL;
492 }
493
494 if (err == 0) {
495 err = journal_trim_add_extent(hfsmp->jnl, offset, length);
496 if (err) {
497 printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent for vol=%s", err, hfsmp->vcbVN);
498 }
499 }
500 }
501
502 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
503 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0);
504 }
505
506 /*
507 ;________________________________________________________________________________
508 ;
509 ; Routine: hfs_track_unmap_blocks
510 ;
511 ; Function: Make note of a range of allocation blocks that should be
512 ; unmapped (trimmed). That is, the given range of blocks no
513 ; longer have useful content, and the device can unmap the
514 ; previous contents. For example, a solid state disk may reuse
515 ; the underlying storage for other blocks.
516 ;
517 ; This routine is only supported for journaled volumes.
518 ;
519 ; *****NOTE*****:
520 ; This function should *NOT* be used when the volume is fully
521 ; mounted. This function is intended to support a bitmap iteration
522 ; at mount time to fully inform the SSD driver of the state of all blocks
523 ; at mount time, and assumes that there is no allocation/deallocation
524 ; interference during its iteration.,
525 ;
526 ; Input Arguments:
527 ; hfsmp - The volume containing the allocation blocks.
528 ; offset - The first allocation block of the extent being freed.
529 ; numBlocks - The number of allocation blocks of the extent being freed.
530 ; list - The list of currently tracked trim ranges.
531 ;________________________________________________________________________________
532 */
533 static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start,
534 u_int32_t numBlocks, struct jnl_trim_list *list) {
535
536 u_int64_t offset;
537 u_int64_t length;
538 int error = 0;
539
540 if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL) && list->allocated_count && list->extents != NULL) {
541 int extent_no = list->extent_count;
542 offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
543 length = (u_int64_t) numBlocks * hfsmp->blockSize;
544
545
546 list->extents[extent_no].offset = offset;
547 list->extents[extent_no].length = length;
548 list->extent_count++;
549 if (list->extent_count == list->allocated_count) {
550 error = hfs_issue_unmap (hfsmp, list);
551 }
552 }
553
554 return error;
555 }
556
557 /*
558 ;________________________________________________________________________________
559 ;
560 ; Routine: hfs_issue_unmap
561 ;
562 ; Function: Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list
563 ;
564 ; Input Arguments:
565 ; hfsmp - The volume containing the allocation blocks.
566 ; list - The list of currently tracked trim ranges.
567 ;________________________________________________________________________________
568 */
569
570 static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list)
571 {
572 dk_unmap_t unmap;
573 int error = 0;
574
575 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
576 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
577 }
578
579 if (list->extent_count > 0 && list->extents != NULL) {
580 bzero(&unmap, sizeof(unmap));
581 unmap.extents = list->extents;
582 unmap.extentsCount = list->extent_count;
583
584 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
585 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_NONE, hfsmp->hfs_raw_dev, unmap.extentsCount, 0, 0, 0);
586 }
587
588 #if CONFIG_PROTECT
589 /*
590 * If we have not yet completed the first scan through the bitmap, then
591 * optionally inform the block driver below us that this is an initialization
592 * TRIM scan, if it can deal with this information.
593 */
594 if ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED) == 0) {
595 unmap.options |= _DK_UNMAP_INITIALIZE;
596 }
597 #endif
598 /* Issue a TRIM and flush them out */
599 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
600
601 bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t)));
602 bzero (&unmap, sizeof(unmap));
603 list->extent_count = 0;
604 }
605
606 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
607 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
608 }
609
610 return error;
611 }
612
613 /*
614 ;________________________________________________________________________________
615 ;
616 ; Routine: hfs_unmap_alloc_extent
617 ;
618 ; Function: Make note of a range of allocation blocks, some of
619 ; which may have previously been passed to hfs_unmap_free_extent,
620 ; is now in use on the volume. The given blocks will be removed
621 ; from any pending DKIOCUNMAP.
622 ;
623 ; Input Arguments:
624 ; hfsmp - The volume containing the allocation blocks.
625 ; startingBlock - The first allocation block of the extent being allocated.
626 ; numBlocks - The number of allocation blocks being allocated.
627 ;________________________________________________________________________________
628 */
629
630 static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
631 {
632 u_int64_t offset;
633 u_int64_t length;
634 int err = 0;
635
636 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
637 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
638
639 if (hfsmp->jnl != NULL) {
640 offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
641 length = (u_int64_t) numBlocks * hfsmp->blockSize;
642
643 err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
644 if (err) {
645 printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent for vol=%s", err, hfsmp->vcbVN);
646 }
647 }
648
649 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
650 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0);
651 }
652
653
654 /*
655 ;________________________________________________________________________________
656 ;
657 ; Routine: hfs_trim_callback
658 ;
659 ; Function: This function is called when a transaction that freed extents
660 ; (via hfs_unmap_free_extent/journal_trim_add_extent) has been
661 ; written to the on-disk journal. This routine will add those
662 ; extents to the free extent cache so that they can be reused.
663 ;
664 ; CAUTION: This routine is called while the journal's trim lock
665 ; is held shared, so that no other thread can reuse any portion
666 ; of those extents. We must be very careful about which locks
667 ; we take from within this callback, to avoid deadlock. The
668 ; call to add_free_extent_cache will end up taking the cache's
669 ; lock (just long enough to add these extents to the cache).
670 ;
671 ; CAUTION: If the journal becomes invalid (eg., due to an I/O
672 ; error when trying to write to the journal), this callback
673 ; will stop getting called, even if extents got freed before
674 ; the journal became invalid!
675 ;
676 ; Input Arguments:
677 ; arg - The hfsmount of the volume containing the extents.
678 ; extent_count - The number of extents freed in the transaction.
679 ; extents - An array of extents (byte ranges) that were freed.
680 ;________________________________________________________________________________
681 */
682
683 void
684 hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
685 {
686 uint32_t i;
687 uint32_t startBlock, numBlocks;
688 struct hfsmount *hfsmp = arg;
689
690 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
691 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0);
692
693 for (i=0; i<extent_count; ++i) {
694 /* Convert the byte range in *extents back to a range of allocation blocks. */
695 startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize;
696 numBlocks = extents[i].length / hfsmp->blockSize;
697 (void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
698 }
699
700 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
701 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0);
702 }
703
704
705 /*
706 ;________________________________________________________________________________
707 ;
708 ; Routine: CheckUnmappedBytes
709 ;
710 ; Function: From the specified inputs, determine if the extent in question overlaps
711 ; space that was recently freed, where the recently freed space may still be
712 ; lingering in an uncommitted journal transaction. This may have data safety
713 ; implications. The intended use is to decide whether or not to force a journal flush
714 ; before allowing file data I/O to be issued. If we did not do this
715 ; then it would be possible to issue the file I/O ahead of the
716 ; journal, resulting in data being overwritten if the transaction either
717 ; is not committed or cannot be replayed.
718 ;
719 ; NOTE: This function assumes that the journal and catalog/extent locks are held.
720 ;
721 ; Input Arguments:
722 ; hfsmp - The volume containing the allocation blocks.
723 ; foffset - start of the extent in question (in allocation blocks)
724 ; numbytes - number of blocks in the extent.
725 ; recently_freed: - output pointer containing whether or not the blocks were freed recently
726 ; overlap_end - end of the overlap between the argument extent and the trim list (in allocation blocks)
727 ;
728 ; Output:
729 ;
730 ; Returns 0 if we could determine extent validity for this (or a previous transaction)
731 ; Returns errno if there was an error
732 ;
733 ; If returned 0, then recently freed will contain a boolean that indicates
734 ; that it was recently freed.
735 ;________________________________________________________________________________
736 */
737
738 u_int32_t
739 CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recently_freed, uint32_t *overlap_end) {
740 uint64_t device_offset;
741 uint64_t numbytes;
742 uint32_t err = 0;
743 uint64_t lba_overlap_end;
744
745 if (hfsmp->jnl != NULL) {
746 /*
747 * Convert the allocation block # and the number of blocks into device-relative
748 * offsets so that they can be compared using the TRIM list.
749 */
750 uint64_t device_sz = hfsmp->hfs_logical_bytes;
751 device_offset = blockno * ((uint64_t)hfsmp->blockSize);
752 device_offset += hfsmp->hfsPlusIOPosOffset;
753 numbytes = (((uint64_t)hfsmp->blockSize) * numblocks);
754
755 /*
756 * Since we check that the device_offset isn't too large, it's safe to subtract it
757 * from the size in the second check.
758 */
759 if ((device_offset >= device_sz) || (numbytes > (device_sz - device_offset))) {
760 return EINVAL;
761 }
762
763 /* Ask the journal if this extent overlaps with any pending TRIMs */
764 if (journal_trim_extent_overlap (hfsmp->jnl, device_offset, numbytes, &lba_overlap_end)) {
765 *recently_freed = 1;
766
767 /* Convert lba_overlap_end back into allocation blocks */
768 uint64_t end_offset = lba_overlap_end - hfsmp->hfsPlusIOPosOffset;
769 end_offset = end_offset / ((uint64_t) hfsmp->blockSize);
770 *overlap_end = (uint32_t) end_offset;
771 }
772 else {
773 *recently_freed = 0;
774 }
775 err = 0;
776 }
777 else {
778 /* There may not be a journal. In that case, always return success. */
779 *recently_freed = 0;
780 }
781 return err;
782
783 }
784
785
786 /*
787 ;________________________________________________________________________________
788 ;
789 ; Routine: ScanUnmapBlocks
790 ;
791 ; Function: Traverse the bitmap, and potentially issue DKIOCUNMAPs to the underlying
792 ; device as needed so that the underlying disk device is as
793 ; up-to-date as possible with which blocks are unmapped.
794 ; Additionally build up the summary table as needed.
795 ;
796 ; This function reads the bitmap in large block size
797 ; (up to 1MB) unlike the runtime which reads the bitmap
798 ; in 4K block size. So if this function is being called
799 ; after the volume is mounted and actively modified, the
800 ; caller needs to invalidate all of the existing buffers
801 ; associated with the bitmap vnode before calling this
802 ; function. If the buffers are not invalidated, it can
803 ; cause buf_t collision and potential data corruption.
804 ;
805 ; Input Arguments:
806 ; hfsmp - The volume containing the allocation blocks.
807 ;________________________________________________________________________________
808 */
809
810 u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp)
811 {
812 u_int32_t blocks_scanned = 0;
813 int error = 0;
814 struct jnl_trim_list trimlist;
815
816 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
817 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
818 }
819
820 /*
821 *struct jnl_trim_list {
822 uint32_t allocated_count;
823 uint32_t extent_count;
824 dk_extent_t *extents;
825 };
826 */
827 bzero (&trimlist, sizeof(trimlist));
828
829 /*
830 * Any trim related work should be tied to whether the underlying
831 * storage media supports UNMAP, as any solid state device would
832 * on desktop or embedded.
833 *
834 * We do this because we may want to scan the full bitmap on
835 * desktop for spinning media for the purposes of building up the
836 * summary table.
837 *
838 * We also avoid sending TRIMs down to the underlying media if the
839 * mount is read-only.
840 */
841
842 if ((hfsmp->hfs_flags & HFS_UNMAP) &&
843 ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
844 /* If the underlying device supports unmap and the mount is read-write, initialize */
845 int alloc_count = PAGE_SIZE / sizeof(dk_extent_t);
846 void *extents = hfs_malloc(alloc_count * sizeof(dk_extent_t));
847 trimlist.extents = (dk_extent_t*)extents;
848 trimlist.allocated_count = alloc_count;
849 trimlist.extent_count = 0;
850 }
851
852 while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){
853
854 error = hfs_alloc_scan_range (hfsmp, blocks_scanned, &blocks_scanned, &trimlist);
855
856 if (error) {
857 printf("HFS: bitmap scan range error: %d on vol=%s\n", error, hfsmp->vcbVN);
858 break;
859 }
860 }
861
862 if ((hfsmp->hfs_flags & HFS_UNMAP) &&
863 ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
864 if (error == 0) {
865 hfs_issue_unmap(hfsmp, &trimlist);
866 }
867 if (trimlist.extents) {
868 hfs_free(trimlist.extents, trimlist.allocated_count * sizeof(dk_extent_t));
869 }
870 }
871
872 /*
873 * This is in an #if block because hfs_validate_summary prototype and function body
874 * will only show up if ALLOC_DEBUG is on, to save wired memory ever so slightly.
875 */
876 #if ALLOC_DEBUG
877 sanity_check_free_ext(hfsmp, 1);
878 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
879 /* Validate the summary table too! */
880 hfs_validate_summary(hfsmp);
881 printf("HFS: Summary validation complete on %s\n", hfsmp->vcbVN);
882 }
883 #endif
884
885 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
886 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
887 }
888
889 return error;
890 }
891
892 static void add_to_reserved_list(hfsmount_t *hfsmp, uint32_t start,
893 uint32_t count, int list,
894 struct rl_entry **reservation)
895 {
896 struct rl_entry *range, *next_range;
897
898 if (list == HFS_TENTATIVE_BLOCKS) {
899 int nranges = 0;
900 // Don't allow more than 4 tentative reservations
901 TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
902 rl_link, next_range) {
903 if (++nranges > 3)
904 hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
905 }
906 }
907
908 range = hfs_malloc(sizeof(*range));
909 range->rl_start = start;
910 range->rl_end = start + count - 1;
911 TAILQ_INSERT_HEAD(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
912 *reservation = range;
913 }
914
915 static void hfs_release_reserved(hfsmount_t *hfsmp,
916 struct rl_entry *range,
917 int list)
918 {
919 if (range->rl_start == -1)
920 return;
921
922 TAILQ_REMOVE(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
923
924 if (rl_len(range) > 0) {
925 if (list == HFS_TENTATIVE_BLOCKS)
926 hfsmp->tentativeBlocks -= rl_len(range);
927 else {
928 /*
929 * We don't need to unmap tentative blocks because we won't have
930 * written to them, but we might have written to reserved blocks.
931 * Nothing can refer to those blocks so this doesn't have to be
932 * via the journal. If this proves to be too expensive, we could
933 * consider not sending down the unmap or we could require this
934 * to always be called within a transaction and then we can use
935 * the journal.
936 */
937 dk_extent_t extent = {
938 .offset = (hfs_blk_to_bytes(range->rl_start, hfsmp->blockSize)
939 + hfsmp->hfsPlusIOPosOffset),
940 .length = hfs_blk_to_bytes(rl_len(range), hfsmp->blockSize)
941 };
942 dk_unmap_t unmap = {
943 .extents = &extent,
944 .extentsCount = 1,
945 };
946 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap,
947 0, vfs_context_kernel());
948 hfs_assert(hfsmp->lockedBlocks >= rl_len(range));
949 hfsmp->lockedBlocks -= rl_len(range);
950 }
951 hfs_release_summary(hfsmp, range->rl_start, rl_len(range));
952 add_free_extent_cache(hfsmp, range->rl_start, rl_len(range));
953 }
954
955 range->rl_start = -1;
956 range->rl_end = -2;
957 }
958
959 static void hfs_free_locked_internal(hfsmount_t *hfsmp,
960 struct rl_entry **reservation,
961 int list)
962 {
963 if (*reservation) {
964 hfs_release_reserved(hfsmp, *reservation, list);
965 hfs_free(*reservation, sizeof(**reservation));
966 *reservation = NULL;
967 }
968 }
969
970 void hfs_free_tentative(hfsmount_t *hfsmp, struct rl_entry **reservation)
971 {
972 hfs_free_locked_internal(hfsmp, reservation, HFS_TENTATIVE_BLOCKS);
973 }
974
975 void hfs_free_locked(hfsmount_t *hfsmp, struct rl_entry **reservation)
976 {
977 hfs_free_locked_internal(hfsmp, reservation, HFS_LOCKED_BLOCKS);
978 }
979
980 OSErr BlockAllocate (
981 hfsmount_t *hfsmp, /* which volume to allocate space on */
982 u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */
983 u_int32_t minBlocks, /* desired number of blocks to allocate */
984 u_int32_t maxBlocks, /* maximum number of blocks to allocate */
985 hfs_block_alloc_flags_t flags, /* option flags */
986 u_int32_t *actualStartBlock, /* actual first block of allocation */
987 u_int32_t *actualNumBlocks)
988 {
989 hfs_alloc_extra_args_t extra_args = {
990 .max_blocks = maxBlocks
991 };
992
993 HFSPlusExtentDescriptor extent = { startingBlock, minBlocks };
994
995 OSErr err = hfs_block_alloc_int(hfsmp, &extent, flags, &extra_args);
996
997 *actualStartBlock = extent.startBlock;
998 *actualNumBlocks = extent.blockCount;
999
1000 return err;
1001 }
1002
1003 errno_t hfs_block_alloc(hfsmount_t *hfsmp,
1004 HFSPlusExtentDescriptor *extent,
1005 hfs_block_alloc_flags_t flags,
1006 hfs_alloc_extra_args_t *ap)
1007 {
1008 return MacToVFSError(hfs_block_alloc_int(hfsmp, extent, flags, ap));
1009 }
1010
1011 /*
1012 ;________________________________________________________________________________
1013 ;
1014 ; Routine: hfs_block_alloc_int
1015 ;
1016 ; Function: Allocate space on a volume. If contiguous allocation is requested,
1017 ; at least the requested number of bytes will be allocated or an
1018 ; error will be returned. If contiguous allocation is not forced,
1019 ; the space will be allocated with the first largest extent available
1020 ; at the requested starting allocation block. If there is not enough
1021 ; room there, a block allocation of less than the requested size will be
1022 ; allocated.
1023 ;
1024 ; If the requested starting block is 0 (for new file allocations),
1025 ; the volume's allocation block pointer will be used as a starting
1026 ; point.
1027 ;
1028 ; Input Arguments:
1029 ; hfsmp - Pointer to the HFS mount structure.
1030 ; extent - startBlock indicates the block to start
1031 ; searching from and blockCount is the number of
1032 ; blocks required. Depending on the flags used,
1033 ; more or less blocks may be returned. The
1034 ; allocated extent is returned via this
1035 ; parameter.
1036 ; flags - Flags to specify options like contiguous, use
1037 ; metadata zone, skip free block check, etc.
1038 ; ap - Additional arguments used depending on flags.
1039 ; See hfs_alloc_extra_args_t and below.
1040 ;
1041 ; Output:
1042 ; (result) - Error code, zero for successful allocation
1043 ; extent - If successful, the allocated extent.
1044 ;
1045 ; Side effects:
1046 ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
1047 ;
1048 ; HFS_ALLOC_TENTATIVE
1049 ; Blocks will be reserved but not marked allocated. They can be
1050 ; stolen if free space is limited. Tentative blocks can be used by
1051 ; passing HFS_ALLOC_USE_TENTATIVE and passing in the resevation.
1052 ; @ap->reservation_out is used to store the reservation.
1053 ;
1054 ; HFS_ALLOC_USE_TENTATIVE
1055 ; Use blocks previously returned with HFS_ALLOC_TENTATIVE.
1056 ; @ap->reservation_in should be set to whatever @ap->reservation_out
1057 ; was set to when HFS_ALLOC_TENTATIVE was used. If the tentative
1058 ; reservation was stolen, a normal allocation will take place.
1059 ;
1060 ; HFS_ALLOC_LOCKED
1061 ; Blocks will be reserved but not marked allocated. Unlike tentative
1062 ; reservations they cannot be stolen. It is safe to write to these
1063 ; blocks. @ap->reservation_out is used to store the reservation.
1064 ;
1065 ; HFS_ALLOC_COMMIT
1066 ; This will take blocks previously returned with HFS_ALLOC_LOCKED and
1067 ; mark them allocated on disk. @ap->reservation_in is used.
1068 ;
1069 ; HFS_ALLOC_ROLL_BACK
1070 ; Take blocks that were just recently deallocated and mark them
1071 ; allocated. This is for roll back situations. Blocks got
1072 ; deallocated and then something went wrong and we need to roll back
1073 ; by marking the blocks allocated.
1074 ;
1075 ; HFS_ALLOC_FORCECONTIG
1076 ; It will not return fewer than @min_blocks.
1077 ;
1078 ; HFS_ALLOC_TRY_HARD
1079 ; We will perform an exhaustive search to try and find @max_blocks.
1080 ; It will not return fewer than @min_blocks.
1081 ;
1082 ;________________________________________________________________________________
1083 */
1084 OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
1085 HFSPlusExtentDescriptor *extent,
1086 hfs_block_alloc_flags_t flags,
1087 hfs_alloc_extra_args_t *ap)
1088 {
1089 u_int32_t freeBlocks;
1090 OSErr err = 0;
1091 Boolean updateAllocPtr = false; // true if nextAllocation needs to be updated
1092 Boolean forceContiguous = false;
1093 Boolean forceFlush;
1094
1095 uint32_t startingBlock = extent->startBlock;
1096 uint32_t minBlocks = extent->blockCount;
1097 uint32_t maxBlocks = (ap && ap->max_blocks) ? ap->max_blocks : minBlocks;
1098
1099 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1100 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
1101
1102 if (ISSET(flags, HFS_ALLOC_COMMIT)) {
1103 extent->startBlock = (*ap->reservation_in)->rl_start;
1104 extent->blockCount = rl_len(*ap->reservation_in);
1105 goto mark_allocated;
1106 }
1107
1108 if (ISSET(flags, HFS_ALLOC_ROLL_BACK))
1109 goto mark_allocated;
1110
1111 freeBlocks = hfs_freeblks(hfsmp, 0);
1112
1113 if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
1114 struct rl_entry *range = *ap->reservation_in;
1115
1116 if (range && range->rl_start != -1) {
1117 /*
1118 * It's possible that we have a tentative reservation
1119 * but there aren't enough free blocks due to loaned blocks
1120 * or insufficient space in the backing store.
1121 */
1122 uint32_t count = min(min(maxBlocks, rl_len(range)), freeBlocks);
1123
1124 if (count >= minBlocks) {
1125 extent->startBlock = range->rl_start;
1126 extent->blockCount = count;
1127
1128 // Should we go straight to commit?
1129 if (!ISSET(flags, HFS_ALLOC_LOCKED))
1130 SET(flags, HFS_ALLOC_COMMIT);
1131
1132 goto mark_allocated;
1133 }
1134 }
1135
1136 /*
1137 * We can't use the tentative reservation so free it and allocate
1138 * normally.
1139 */
1140 hfs_free_tentative(hfsmp, ap->reservation_in);
1141 CLR(flags, HFS_ALLOC_USE_TENTATIVE);
1142 }
1143
1144 if (ISSET(flags, HFS_ALLOC_FORCECONTIG | HFS_ALLOC_TRY_HARD))
1145 forceContiguous = true;
1146
1147 if (flags & HFS_ALLOC_FLUSHTXN) {
1148 forceFlush = true;
1149 }
1150 else {
1151 forceFlush = false;
1152 }
1153
1154 hfs_assert(hfsmp->freeBlocks >= hfsmp->tentativeBlocks);
1155
1156 // See if we have to steal tentative blocks
1157 if (freeBlocks < hfsmp->tentativeBlocks + minBlocks)
1158 SET(flags, HFS_ALLOC_IGNORE_TENTATIVE);
1159
1160 /* Skip free block check if blocks are being allocated for relocating
1161 * data during truncating a volume.
1162 *
1163 * During hfs_truncatefs(), the volume free block count is updated
1164 * before relocating data to reflect the total number of free blocks
1165 * that will exist on the volume after resize is successful. This
1166 * means that we have reserved allocation blocks required for relocating
1167 * the data and hence there is no need to check the free blocks.
1168 * It will also prevent resize failure when the number of blocks in
1169 * an extent being relocated is more than the free blocks that will
1170 * exist after the volume is resized.
1171 */
1172 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1173 // If the disk is already full, don't bother.
1174 if (freeBlocks == 0) {
1175 err = dskFulErr;
1176 goto exit;
1177 }
1178 if (forceContiguous && freeBlocks < minBlocks) {
1179 err = dskFulErr;
1180 goto exit;
1181 }
1182
1183 /*
1184 * Clip if necessary so we don't over-subscribe the free blocks.
1185 */
1186 if (minBlocks > freeBlocks) {
1187 minBlocks = freeBlocks;
1188 }
1189 if (maxBlocks > freeBlocks) {
1190 maxBlocks = freeBlocks;
1191 }
1192 }
1193
1194 if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
1195 err = hfs_alloc_try_hard(hfsmp, extent, maxBlocks, flags);
1196 if (err)
1197 goto exit;
1198
1199 goto mark_allocated;
1200 }
1201
1202 //
1203 // If caller didn't specify a starting block number, then use the volume's
1204 // next block to allocate from.
1205 //
1206 if (startingBlock == 0) {
1207 hfs_lock_mount (hfsmp);
1208
1209 /* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
1210 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1211 startingBlock = hfsmp->sparseAllocation;
1212 }
1213 else {
1214 startingBlock = hfsmp->nextAllocation;
1215 }
1216 hfs_unlock_mount(hfsmp);
1217 updateAllocPtr = true;
1218 }
1219
1220
1221 if (startingBlock >= hfsmp->allocLimit) {
1222 startingBlock = 0; /* overflow so start at beginning */
1223 }
1224
1225 //
1226 // If the request must be contiguous, then find a sequence of free blocks
1227 // that is long enough. Otherwise, find the first free block.
1228 //
1229 if (forceContiguous) {
1230 err = BlockFindContig(hfsmp, startingBlock, minBlocks, maxBlocks,
1231 flags, &extent->startBlock, &extent->blockCount);
1232 /*
1233 * If we allocated from a new position then also update the roving allocator.
1234 * This will keep the roving allocation pointer up-to-date even
1235 * if we are using the new R/B tree allocator, since
1236 * it doesn't matter to us here, how the underlying allocator found
1237 * the block to vend out.
1238 */
1239 if ((err == noErr) &&
1240 (extent->startBlock > startingBlock) &&
1241 ((extent->startBlock < hfsmp->hfs_metazone_start) ||
1242 (extent->startBlock > hfsmp->hfs_metazone_end))) {
1243 updateAllocPtr = true;
1244 }
1245 } else {
1246 /*
1247 * Scan the bitmap once, gather the N largest free extents, then
1248 * allocate from these largest extents. Repeat as needed until
1249 * we get all the space we needed. We could probably build up
1250 * that list when the higher level caller tried (and failed) a
1251 * contiguous allocation first.
1252 *
1253 * Note that the free-extent cache will be cease to be updated if
1254 * we are using the red-black tree for allocations. If we jettison
1255 * the tree, then we will reset the free-extent cache and start over.
1256 */
1257
1258 /* Disable HFS_ALLOC_FLUSHTXN if needed */
1259 if (forceFlush) {
1260 flags &= ~HFS_ALLOC_FLUSHTXN;
1261 }
1262
1263 /*
1264 * BlockFindKnown only examines the free extent cache; anything in there will
1265 * have been committed to stable storage already.
1266 */
1267 err = BlockFindKnown(hfsmp, maxBlocks, &extent->startBlock,
1268 &extent->blockCount);
1269
1270 /* dskFulErr out of BlockFindKnown indicates an empty Free Extent Cache */
1271
1272 if (err == dskFulErr) {
1273 /*
1274 * Now we have to do a bigger scan. Start at startingBlock and go up until the
1275 * allocation limit. We 'trust' the summary bitmap in this call, if it tells us
1276 * that it could not find any free space.
1277 */
1278 err = BlockFindAny(hfsmp, startingBlock, hfsmp->allocLimit,
1279 maxBlocks, flags, true,
1280 &extent->startBlock, &extent->blockCount);
1281 }
1282 if (err == dskFulErr) {
1283 /*
1284 * Vary the behavior here if the summary table is on or off.
1285 * If it is on, then we don't trust it it if we get into this case and
1286 * basically do a full scan for maximum coverage.
1287 * If it is off, then we trust the above and go up until the startingBlock.
1288 */
1289 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1290 err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
1291 flags, false,
1292 &extent->startBlock, &extent->blockCount);
1293 }
1294 else {
1295 err = BlockFindAny(hfsmp, 1, startingBlock, maxBlocks,
1296 flags, false,
1297 &extent->startBlock, &extent->blockCount);
1298 }
1299
1300 /*
1301 * Last Resort: Find/use blocks that may require a journal flush.
1302 */
1303 if (err == dskFulErr && forceFlush) {
1304 flags |= HFS_ALLOC_FLUSHTXN;
1305 err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
1306 flags, false,
1307 &extent->startBlock, &extent->blockCount);
1308 }
1309 }
1310 }
1311
1312 if (err)
1313 goto exit;
1314
1315 mark_allocated:
1316
1317 // Handle alignment
1318 if (ap && ap->alignment && extent->blockCount < ap->max_blocks) {
1319 /*
1320 * See the comment in FileMgrInternal.h for alignment
1321 * semantics.
1322 */
1323 uint32_t rounding = ((extent->blockCount + ap->alignment_offset)
1324 % ap->alignment);
1325
1326 // @minBlocks is still the minimum
1327 if (extent->blockCount >= minBlocks + rounding)
1328 extent->blockCount -= rounding;
1329 }
1330
1331 err = BlockMarkAllocatedInternal(hfsmp, extent->startBlock,
1332 extent->blockCount, flags);
1333
1334 if (err)
1335 goto exit;
1336
1337 if (ISSET(hfsmp->hfs_flags, HFS_CS) && extent->blockCount != 0
1338 && !ISSET(flags, HFS_ALLOC_TENTATIVE)) {
1339 if (ISSET(flags, HFS_ALLOC_FAST_DEV)) {
1340 #if !HFS_ALLOC_TEST /* need this guard because this file is compiled outside of the kernel */
1341 hfs_pin_block_range(hfsmp, HFS_PIN_IT,
1342 extent->startBlock, extent->blockCount);
1343 #endif
1344 } else {
1345 _dk_cs_map_t cm = {
1346 .cm_extent = {
1347 (hfs_blk_to_bytes(extent->startBlock, hfsmp->blockSize)
1348 + hfsmp->hfsPlusIOPosOffset),
1349 hfs_blk_to_bytes(extent->blockCount, hfsmp->blockSize)
1350 }
1351 };
1352
1353 errno_t err2 = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP,
1354 (caddr_t)&cm, 0, vfs_context_current());
1355
1356 /*
1357 * Ignore errors for now; we are fully provisioned so in
1358 * theory CoreStorage should be able to handle this
1359 * allocation. Should we want to change this in future, then
1360 * we should think carefully how we handle errors. Allowing
1361 * CoreStorage to truncate our allocation is problematic
1362 * because we might have minimum and alignment requirements
1363 * and backing out changes we have already made is
1364 * non-trivial.
1365 */
1366
1367 if (err2 || cm.cm_bytes_mapped < cm.cm_extent.length) {
1368 printf("hfs: _DKIOCCSMAP error: %d, bytes_mapped: %llu\n",
1369 err2, cm.cm_bytes_mapped);
1370 }
1371 }
1372 }
1373
1374 // if we actually allocated something then go update the
1375 // various bits of state that we maintain regardless of
1376 // whether there was an error (i.e. partial allocations
1377 // still need to update things like the free block count).
1378 //
1379 if (extent->blockCount != 0) {
1380 //
1381 // If we used the volume's roving allocation pointer, then we need to update it.
1382 // Adding in the length of the current allocation might reduce the next allocate
1383 // call by avoiding a re-scan of the already allocated space. However, the clump
1384 // just allocated can quite conceivably end up being truncated or released when
1385 // the file is closed or its EOF changed. Leaving the allocation pointer at the
1386 // start of the last allocation will avoid unnecessary fragmentation in this case.
1387 //
1388 hfs_lock_mount (hfsmp);
1389
1390 if (!ISSET(flags, HFS_ALLOC_USE_TENTATIVE | HFS_ALLOC_COMMIT)) {
1391 lck_spin_lock(&hfsmp->vcbFreeExtLock);
1392 if (hfsmp->vcbFreeExtCnt == 0 && hfsmp->hfs_freed_block_count == 0) {
1393 hfsmp->sparseAllocation = extent->startBlock;
1394 }
1395 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
1396 if (extent->blockCount < hfsmp->hfs_freed_block_count) {
1397 hfsmp->hfs_freed_block_count -= extent->blockCount;
1398 } else {
1399 hfsmp->hfs_freed_block_count = 0;
1400 }
1401
1402 if (updateAllocPtr &&
1403 ((extent->startBlock < hfsmp->hfs_metazone_start) ||
1404 (extent->startBlock > hfsmp->hfs_metazone_end))) {
1405 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, extent->startBlock);
1406 }
1407
1408 (void) remove_free_extent_cache(hfsmp, extent->startBlock, extent->blockCount);
1409 }
1410
1411 if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
1412 (*ap->reservation_in)->rl_start += extent->blockCount;
1413 hfsmp->tentativeBlocks -= extent->blockCount;
1414 if (rl_len(*ap->reservation_in) <= 0)
1415 hfs_free_tentative(hfsmp, ap->reservation_in);
1416 } else if (ISSET(flags, HFS_ALLOC_COMMIT)) {
1417 // Handle committing locked extents
1418 hfs_assert(hfsmp->lockedBlocks >= extent->blockCount);
1419 (*ap->reservation_in)->rl_start += extent->blockCount;
1420 hfsmp->lockedBlocks -= extent->blockCount;
1421 hfs_free_locked(hfsmp, ap->reservation_in);
1422 }
1423
1424 /*
1425 * Update the number of free blocks on the volume
1426 *
1427 * Skip updating the free blocks count if the block are
1428 * being allocated to relocate data as part of hfs_truncatefs()
1429 */
1430
1431 if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
1432 hfsmp->tentativeBlocks += extent->blockCount;
1433 } else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
1434 hfsmp->lockedBlocks += extent->blockCount;
1435 } else if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1436 hfsmp->freeBlocks -= extent->blockCount;
1437 }
1438 MarkVCBDirty(hfsmp);
1439 hfs_unlock_mount(hfsmp);
1440
1441 hfs_generate_volume_notifications(hfsmp);
1442
1443 if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
1444 hfs_assert(ap);
1445 add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount,
1446 0, ap->reservation_out);
1447 } else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
1448 hfs_assert(ap);
1449 add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount,
1450 1, ap->reservation_out);
1451 }
1452
1453 if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
1454 /*
1455 * See if we used tentative blocks. Note that we cannot
1456 * free the reservations here because we don't have access
1457 * to the external pointers. All we can do is update the
1458 * reservations and they'll be cleaned up when whatever is
1459 * holding the pointers calls us back.
1460 *
1461 * We use the rangelist code to detect overlaps and
1462 * constrain the tentative block allocation. Note that
1463 * @end is inclusive so that our rangelist code will
1464 * resolve the various cases for us. As a result, we need
1465 * to ensure that we account for it properly when removing
1466 * the blocks from the tentative count in the mount point
1467 * and re-inserting the remainder (either head or tail)
1468 */
1469 struct rl_entry *range, *next_range;
1470 struct rl_head *ranges = &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS];
1471 const uint32_t start = extent->startBlock;
1472 const uint32_t end = start + extent->blockCount - 1;
1473 TAILQ_FOREACH_SAFE(range, ranges, rl_link, next_range) {
1474 switch (rl_overlap(range, start, end)) {
1475 case RL_OVERLAPCONTAINSRANGE:
1476 // Keep the bigger part
1477 if (start - range->rl_start > range->rl_end - end) {
1478 // Discard the tail
1479 hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
1480 hfs_release_summary(hfsmp, end + 1, range->rl_end - end);
1481 const uint32_t old_end = range->rl_end;
1482 range->rl_end = start - 1;
1483 add_free_extent_cache(hfsmp, end + 1, old_end - end);
1484 } else {
1485 // Discard the head
1486 hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
1487 hfs_release_summary(hfsmp, range->rl_start,
1488 start - range->rl_start);
1489 const uint32_t old_start = range->rl_start;
1490 range->rl_start = end + 1;
1491 add_free_extent_cache(hfsmp, old_start,
1492 start - old_start);
1493 }
1494 hfs_assert(range->rl_end >= range->rl_start);
1495 break;
1496 case RL_MATCHINGOVERLAP:
1497 case RL_OVERLAPISCONTAINED:
1498 hfsmp->tentativeBlocks -= rl_len(range);
1499 range->rl_end = range->rl_start - 1;
1500 hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
1501 break;
1502 case RL_OVERLAPSTARTSBEFORE:
1503 hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
1504 range->rl_end = start - 1;
1505 hfs_assert(range->rl_end >= range->rl_start);
1506 break;
1507 case RL_OVERLAPENDSAFTER:
1508 hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
1509 range->rl_start = end + 1;
1510 hfs_assert(range->rl_end >= range->rl_start);
1511 break;
1512 case RL_NOOVERLAP:
1513 break;
1514 }
1515 }
1516 }
1517 }
1518
1519 exit:
1520
1521 if (ALLOC_DEBUG) {
1522 if (err == noErr) {
1523 if (extent->startBlock >= hfsmp->totalBlocks) {
1524 panic ("BlockAllocate: vending invalid blocks!");
1525 }
1526 if (extent->startBlock >= hfsmp->allocLimit) {
1527 panic ("BlockAllocate: vending block past allocLimit!");
1528 }
1529
1530 if ((extent->startBlock + extent->blockCount) >= hfsmp->totalBlocks) {
1531 panic ("BlockAllocate: vending too many invalid blocks!");
1532 }
1533
1534 if ((extent->startBlock + extent->blockCount) >= hfsmp->allocLimit) {
1535 panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
1536 }
1537 }
1538 }
1539
1540 if (err) {
1541 // Just to be safe...
1542 extent->startBlock = 0;
1543 extent->blockCount = 0;
1544 }
1545
1546 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1547 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, extent->startBlock, extent->blockCount, 0, 0);
1548
1549 return err;
1550 }
1551
1552
1553 /*
1554 ;________________________________________________________________________________
1555 ;
1556 ; Routine: BlockDeallocate
1557 ;
1558 ; Function: Update the bitmap to deallocate a run of disk allocation blocks
1559 ;
1560 ; Input Arguments:
1561 ; vcb - Pointer to ExtendedVCB for the volume to free space on
1562 ; firstBlock - First allocation block to be freed
1563 ; numBlocks - Number of allocation blocks to free up (must be > 0!)
1564 ;
1565 ; Output:
1566 ; (result) - Result code
1567 ;
1568 ; Side effects:
1569 ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
1570 ; The Allocator's red-black trees may also be modified as a result.
1571 ;
1572 ;________________________________________________________________________________
1573 */
1574
1575 OSErr BlockDeallocate (
1576 ExtendedVCB *vcb, // Which volume to deallocate space on
1577 u_int32_t firstBlock, // First block in range to deallocate
1578 u_int32_t numBlocks, // Number of contiguous blocks to deallocate
1579 hfs_block_alloc_flags_t flags)
1580 {
1581 if (ISSET(flags, HFS_ALLOC_TENTATIVE | HFS_ALLOC_LOCKED))
1582 return 0;
1583
1584 OSErr err;
1585 struct hfsmount *hfsmp;
1586 hfsmp = VCBTOHFS(vcb);
1587
1588 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1589 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0);
1590
1591 //
1592 // If no blocks to deallocate, then exit early
1593 //
1594 if (numBlocks == 0) {
1595 err = noErr;
1596 goto Exit;
1597 }
1598
1599
1600 if (ALLOC_DEBUG) {
1601 if (firstBlock >= hfsmp->totalBlocks) {
1602 panic ("BlockDeallocate: freeing invalid blocks!");
1603 }
1604
1605 if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {
1606 panic ("BlockDeallocate: freeing too many invalid blocks!");
1607 }
1608 }
1609
1610 /*
1611 * If we're using the summary bitmap, then try to mark the bits
1612 * as potentially usable/free before actually deallocating them.
1613 * It is better to be slightly speculative here for correctness.
1614 */
1615
1616 (void) hfs_release_summary (hfsmp, firstBlock, numBlocks);
1617
1618 err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
1619
1620 if (err) {
1621 goto Exit;
1622 }
1623
1624 //
1625 // Update the volume's free block count, and mark the VCB as dirty.
1626 //
1627 hfs_lock_mount(hfsmp);
1628 /*
1629 * Do not update the free block count. This flags is specified
1630 * when a volume is being truncated.
1631 */
1632 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1633 vcb->freeBlocks += numBlocks;
1634 }
1635
1636 vcb->hfs_freed_block_count += numBlocks;
1637
1638 if (vcb->nextAllocation == (firstBlock + numBlocks)) {
1639 HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
1640 }
1641
1642 if (hfsmp->jnl == NULL) {
1643 /*
1644 * In the journal case, we'll add the free extent once the journal
1645 * calls us back to tell us it wrote the transaction to disk.
1646 */
1647 (void) add_free_extent_cache(vcb, firstBlock, numBlocks);
1648
1649 /*
1650 * If the journal case, we'll only update sparseAllocation once the
1651 * free extent cache becomes empty (when we remove the last entry
1652 * from the cache). Skipping it here means we're less likely to
1653 * find a recently freed extent via the bitmap before it gets added
1654 * to the free extent cache.
1655 */
1656 if (firstBlock < vcb->sparseAllocation) {
1657 vcb->sparseAllocation = firstBlock;
1658 }
1659 }
1660
1661 MarkVCBDirty(vcb);
1662 hfs_unlock_mount(hfsmp);
1663
1664 hfs_generate_volume_notifications(VCBTOHFS(vcb));
1665 Exit:
1666
1667 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1668 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0);
1669
1670 return err;
1671 }
1672
1673
1674 u_int8_t freebitcount[16] = {
1675 4, 3, 3, 2, 3, 2, 2, 1, /* 0 1 2 3 4 5 6 7 */
1676 3, 2, 2, 1, 2, 1, 1, 0, /* 8 9 A B C D E F */
1677 };
1678
1679 u_int32_t
1680 MetaZoneFreeBlocks(ExtendedVCB *vcb)
1681 {
1682 u_int32_t freeblocks;
1683 u_int32_t *currCache;
1684 uintptr_t blockRef;
1685 u_int32_t bit;
1686 u_int32_t lastbit;
1687 int bytesleft;
1688 int bytesperblock;
1689 u_int8_t byte;
1690 u_int8_t *buffer;
1691
1692 blockRef = 0;
1693 bytesleft = freeblocks = 0;
1694 buffer = NULL;
1695 bit = VCBTOHFS(vcb)->hfs_metazone_start;
1696 if (bit == 1)
1697 bit = 0;
1698
1699 lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
1700 bytesperblock = vcb->vcbVBMIOSize;
1701
1702 /*
1703 * Count all the bits from bit to lastbit.
1704 */
1705 while (bit < lastbit) {
1706 /*
1707 * Get next bitmap block.
1708 */
1709 if (bytesleft == 0) {
1710 if (blockRef) {
1711 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1712 blockRef = 0;
1713 }
1714 if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef,
1715 HFS_ALLOC_IGNORE_TENTATIVE) != 0) {
1716 return (0);
1717 }
1718 buffer = (u_int8_t *)currCache;
1719 bytesleft = bytesperblock;
1720 }
1721 byte = *buffer++;
1722 freeblocks += freebitcount[byte & 0x0F];
1723 freeblocks += freebitcount[(byte >> 4) & 0x0F];
1724 bit += kBitsPerByte;
1725 --bytesleft;
1726 }
1727 if (blockRef)
1728 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1729
1730 return (freeblocks);
1731 }
1732
1733
1734 /*
1735 * Obtain the next allocation block (bit) that's
1736 * outside the metadata allocation zone.
1737 */
1738 static u_int32_t NextBitmapBlock(
1739 ExtendedVCB *vcb,
1740 u_int32_t bit)
1741 {
1742 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1743
1744 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
1745 return (bit);
1746 /*
1747 * Skip over metadata allocation zone.
1748 */
1749 if ((bit >= hfsmp->hfs_metazone_start) &&
1750 (bit <= hfsmp->hfs_metazone_end)) {
1751 bit = hfsmp->hfs_metazone_end + 1;
1752 }
1753 return (bit);
1754 }
1755
1756
1757 // Assumes @bitmap is aligned to 8 bytes and multiple of 8 bytes.
1758 static void bits_set(void *bitmap, int start, int end)
1759 {
1760 const int start_bit = start & 63;
1761 const int end_bit = end & 63;
1762
1763 #define LEFT_MASK(bit) OSSwapHostToBigInt64(0xffffffffffffffffull << (64 - bit))
1764 #define RIGHT_MASK(bit) OSSwapHostToBigInt64(0xffffffffffffffffull >> bit)
1765
1766 uint64_t *p = (uint64_t *)bitmap + start / 64;
1767
1768 if ((start & ~63) == (end & ~63)) {
1769 // Start and end in same 64 bits
1770 *p |= RIGHT_MASK(start_bit) & LEFT_MASK(end_bit);
1771 } else {
1772 *p++ |= RIGHT_MASK(start_bit);
1773
1774 int nquads = (end - end_bit - start - 1) / 64;
1775
1776 while (nquads--)
1777 *p++ = 0xffffffffffffffffull;
1778
1779 if (end_bit)
1780 *p |= LEFT_MASK(end_bit);
1781 }
1782 }
1783
1784 // Modifies the buffer and applies any reservations that we might have
1785 static buf_t process_reservations(hfsmount_t *hfsmp, buf_t bp, off_t offset,
1786 hfs_block_alloc_flags_t flags,
1787 bool always_copy)
1788 {
1789 bool taken_copy = false;
1790 void *buffer = (void *)buf_dataptr(bp);
1791 const uint32_t nbytes = buf_count(bp);
1792 const off_t end = offset + nbytes * 8 - 1;
1793
1794 for (int i = (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
1795 ? HFS_LOCKED_BLOCKS : HFS_TENTATIVE_BLOCKS); i < 2; ++i) {
1796 struct rl_entry *entry;
1797 TAILQ_FOREACH(entry, &hfsmp->hfs_reserved_ranges[i], rl_link) {
1798 uint32_t a, b;
1799
1800 enum rl_overlaptype overlap_type = rl_overlap(entry, offset, end);
1801
1802 if (overlap_type == RL_NOOVERLAP)
1803 continue;
1804
1805 /*
1806 * If always_copy is false, we only take a copy if B_LOCKED is
1807 * set because ReleaseScanBitmapRange doesn't invalidate the
1808 * buffer in that case.
1809 */
1810 if (!taken_copy && (always_copy || ISSET(buf_flags(bp), B_LOCKED))) {
1811 buf_t new_bp = buf_create_shadow(bp, true, 0, NULL, NULL);
1812 buf_brelse(bp);
1813 bp = new_bp;
1814 buf_setflags(bp, B_NOCACHE);
1815 buffer = (void *)buf_dataptr(bp);
1816 taken_copy = true;
1817 }
1818
1819 switch (overlap_type) {
1820 case RL_OVERLAPCONTAINSRANGE:
1821 case RL_MATCHINGOVERLAP:
1822 memset(buffer, 0xff, nbytes);
1823 return bp;
1824 case RL_OVERLAPISCONTAINED:
1825 a = entry->rl_start;
1826 b = entry->rl_end;
1827 break;
1828 case RL_OVERLAPSTARTSBEFORE:
1829 a = offset;
1830 b = entry->rl_end;
1831 break;
1832 case RL_OVERLAPENDSAFTER:
1833 a = entry->rl_start;
1834 b = end;
1835 break;
1836 case RL_NOOVERLAP:
1837 __builtin_unreachable();
1838 }
1839
1840 a -= offset;
1841 b -= offset;
1842
1843 hfs_assert(a < buf_count(bp) * 8);
1844 hfs_assert(b < buf_count(bp) * 8);
1845 hfs_assert(b >= a);
1846
1847 // b is inclusive
1848 bits_set(buffer, a, b + 1);
1849 }
1850 } // for (;;)
1851
1852 return bp;
1853 }
1854
1855 /*
1856 ;_______________________________________________________________________
1857 ;
1858 ; Routine: ReadBitmapBlock
1859 ;
1860 ; Function: Read in a bitmap block corresponding to a given allocation
1861 ; block (bit). Return a pointer to the bitmap block.
1862 ;
1863 ; Inputs:
1864 ; vcb -- Pointer to ExtendedVCB
1865 ; bit -- Allocation block whose bitmap block is desired
1866 ;
1867 ; Outputs:
1868 ; buffer -- Pointer to bitmap block corresonding to "block"
1869 ; blockRef
1870 ;_______________________________________________________________________
1871 */
1872 static OSErr ReadBitmapBlock(ExtendedVCB *vcb,
1873 u_int32_t bit,
1874 u_int32_t **buffer,
1875 uintptr_t *blockRef,
1876 hfs_block_alloc_flags_t flags)
1877 {
1878 OSErr err;
1879 struct buf *bp = NULL;
1880 struct vnode *vp = NULL;
1881 daddr64_t block;
1882 u_int32_t blockSize;
1883
1884 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1885 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0);
1886
1887 /*
1888 * volume bitmap blocks are protected by the allocation file lock
1889 */
1890 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1891
1892 blockSize = (u_int32_t)vcb->vcbVBMIOSize;
1893 block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
1894
1895 /* HFS+ / HFSX */
1896 if (vcb->vcbSigWord != kHFSSigWord) {
1897 vp = vcb->hfs_allocation_vp; /* use allocation file vnode */
1898 }
1899 #if CONFIG_HFS_STD
1900 else {
1901 /* HFS Standard */
1902 vp = VCBTOHFS(vcb)->hfs_devvp; /* use device I/O vnode */
1903 block += vcb->vcbVBMSt; /* map to physical block */
1904 }
1905 #endif
1906
1907 err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);
1908
1909 if (bp) {
1910 if (err) {
1911 buf_brelse(bp);
1912 *blockRef = 0;
1913 *buffer = NULL;
1914 } else {
1915 if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED)) {
1916 bp = process_reservations(vcb, bp, block * blockSize * 8,
1917 flags, /* always_copy: */ true);
1918 }
1919
1920 buf_setfsprivate(bp, (void *)(uintptr_t)flags);
1921
1922 *blockRef = (uintptr_t)bp;
1923 *buffer = (u_int32_t *)buf_dataptr(bp);
1924 }
1925 } else
1926 hfs_assert(err);
1927
1928 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1929 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0);
1930
1931 return err;
1932 }
1933
1934
1935 /*
1936 ;_______________________________________________________________________
1937 ;
1938 ; Routine: ReadBitmapRange
1939 ;
1940 ; Function: Read in a range of the bitmap starting at the given offset.
1941 ; Use the supplied size to determine the amount of I/O to generate
1942 ; against the bitmap file. Return a pointer to the bitmap block.
1943 ;
1944 ; Inputs:
1945 ; hfsmp -- Pointer to hfs mount
1946 ; offset -- byte offset into the bitmap file
1947 ; size -- How much I/O to generate against the bitmap file.
1948 ;
1949 ; Outputs:
1950 ; buffer -- Pointer to bitmap block data corresonding to "block"
1951 ; blockRef -- struct 'buf' pointer which MUST be released in a subsequent call.
1952 ;_______________________________________________________________________
1953 */
1954 static OSErr ReadBitmapRange(struct hfsmount *hfsmp, uint32_t offset,
1955 uint32_t iosize, uint32_t **buffer, struct buf **blockRef)
1956 {
1957
1958 OSErr err;
1959 struct buf *bp = NULL;
1960 struct vnode *vp = NULL;
1961 daddr64_t block;
1962
1963 /* This function isn't supported for HFS standard */
1964 if (hfsmp->vcbSigWord != kHFSPlusSigWord) {
1965 return EINVAL;
1966 }
1967
1968 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1969 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_START, offset, iosize, 0, 0, 0);
1970 }
1971
1972 /*
1973 * volume bitmap blocks are protected by the allocation file lock
1974 */
1975 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
1976
1977 vp = hfsmp->hfs_allocation_vp; /* use allocation file vnode */
1978
1979 /*
1980 * The byte offset argument must be converted into bitmap-relative logical
1981 * block numbers before using it in buf_meta_bread.
1982 *
1983 * buf_meta_bread (and the things it calls) will eventually try to
1984 * reconstruct the byte offset into the file by multiplying the logical
1985 * block number passed in below by the vcbVBMIOSize field in the mount
1986 * point. So we prepare for that by converting the byte offset back into
1987 * logical blocks in terms of VBMIOSize units.
1988 *
1989 * The amount of I/O requested and the byte offset should be computed
1990 * based on the helper function in the frame that called us, so we can
1991 * get away with just doing a simple divide here.
1992 */
1993 block = (daddr64_t)(offset / hfsmp->vcbVBMIOSize);
1994
1995 err = (int) buf_meta_bread(vp, block, iosize, NOCRED, &bp);
1996
1997 if (bp) {
1998 if (err) {
1999 buf_brelse(bp);
2000 *blockRef = 0;
2001 *buffer = NULL;
2002 } else {
2003 bp = process_reservations(hfsmp, bp, (offset * 8), 0,
2004 /* always_copy: */ false);
2005
2006 *blockRef = bp;
2007 *buffer = (u_int32_t *)buf_dataptr(bp);
2008 }
2009 }
2010
2011 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
2012 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_END, err, 0, 0, 0, 0);
2013 }
2014
2015 return err;
2016 }
2017
2018
2019 /*
2020 ;_______________________________________________________________________
2021 ;
2022 ; Routine: ReleaseBitmapBlock
2023 ;
2024 ; Function: Relase a bitmap block.
2025 ;
2026 ; Inputs:
2027 ; vcb
2028 ; blockRef
2029 ; dirty
2030 ;_______________________________________________________________________
2031 */
2032 static OSErr ReleaseBitmapBlock(
2033 ExtendedVCB *vcb,
2034 uintptr_t blockRef,
2035 Boolean dirty)
2036 {
2037 struct buf *bp = (struct buf *)blockRef;
2038
2039 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2040 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0);
2041
2042 if (blockRef == 0) {
2043 if (dirty)
2044 panic("hfs: ReleaseBitmapBlock: missing bp");
2045 return (0);
2046 }
2047
2048 if (bp) {
2049 if (dirty) {
2050 hfs_block_alloc_flags_t flags = (uintptr_t)buf_fsprivate(bp);
2051
2052 if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED))
2053 panic("Modified read-only bitmap buffer!");
2054
2055 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2056
2057 if (hfsmp->jnl) {
2058 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
2059 } else {
2060 buf_bdwrite(bp);
2061 }
2062 } else {
2063 buf_brelse(bp);
2064 }
2065 }
2066
2067 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2068 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0);
2069
2070 return (0);
2071 }
2072
2073 /*
2074 * ReleaseScanBitmapRange
2075 *
2076 * This is used to release struct bufs that were created for use by
2077 * bitmap scanning code. Because they may be of sizes different than the
2078 * typical runtime manipulation code, we want to force them to be purged out
2079 * of the buffer cache ASAP, so we'll release them differently than in the
2080 * ReleaseBitmapBlock case.
2081 *
2082 * Additionally, because we know that we're only reading the blocks and that they
2083 * should have been clean prior to reading them, we will never
2084 * issue a write to them (thus dirtying them).
2085 */
2086
2087 static OSErr ReleaseScanBitmapRange(struct buf *bp ) {
2088
2089 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
2090 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, 0, 0, 0, 0, 0);
2091 }
2092
2093 if (bp) {
2094 /* Mark the buffer invalid if it isn't locked, then release it */
2095 if ((buf_flags(bp) & B_LOCKED) == 0) {
2096 buf_markinvalid(bp);
2097 }
2098 buf_brelse(bp);
2099 }
2100
2101 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
2102 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_SCAN_BITMAP | DBG_FUNC_END, 0, 0, 0, 0, 0);
2103 }
2104
2105 return (0);
2106 }
2107
2108 /*
2109 * @extent.startBlock, on input, contains a preferred block for the
2110 * allocation. @extent.blockCount, on input, contains the minimum
2111 * number of blocks acceptable. Upon success, the result is conveyed
2112 * in @extent.
2113 */
2114 static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
2115 HFSPlusExtentDescriptor *extent,
2116 uint32_t max_blocks,
2117 hfs_block_alloc_flags_t flags)
2118 {
2119 OSErr err = dskFulErr;
2120
2121 const uint32_t min_blocks = extent->blockCount;
2122
2123 // It's > rather than >= because the last block is always reserved
2124 if (extent->startBlock > 0 && extent->startBlock < hfsmp->allocLimit
2125 && hfsmp->allocLimit - extent->startBlock > max_blocks) {
2126 /*
2127 * This is just checking to see if there's an extent starting
2128 * at extent->startBlock that will suit. We only check for
2129 * @max_blocks here; @min_blocks is ignored.
2130 */
2131
2132 err = BlockFindContiguous(hfsmp, extent->startBlock, extent->startBlock + max_blocks,
2133 max_blocks, max_blocks, true, true,
2134 &extent->startBlock, &extent->blockCount, flags);
2135
2136 if (err != dskFulErr)
2137 return err;
2138 }
2139
2140 err = BlockFindKnown(hfsmp, max_blocks, &extent->startBlock,
2141 &extent->blockCount);
2142
2143 if (!err) {
2144 if (extent->blockCount >= max_blocks)
2145 return 0;
2146 } else if (err != dskFulErr)
2147 return err;
2148
2149 // Try a more exhaustive search
2150 return BlockFindContiguous(hfsmp, 1, hfsmp->allocLimit,
2151 min_blocks, max_blocks,
2152 /* useMetaZone: */ true,
2153 /* trustSummary: */ true,
2154 &extent->startBlock, &extent->blockCount, flags);
2155 }
2156
2157 /*
2158 _______________________________________________________________________
2159
2160 Routine: BlockFindContig
2161
2162 Function: Find a contiguous group of allocation blocks. If the
2163 minimum cannot be satisfied, nothing is returned. The
2164 caller guarantees that there are enough free blocks
2165 (though they may not be contiguous, in which case this
2166 call will fail).
2167
2168 Inputs:
2169 vcb Pointer to volume where space is to be allocated
2170 startingBlock Preferred first block for allocation
2171 minBlocks Minimum number of contiguous blocks to allocate
2172 maxBlocks Maximum number of contiguous blocks to allocate
2173 flags
2174
2175 Outputs:
2176 actualStartBlock First block of range allocated, or 0 if error
2177 actualNumBlocks Number of blocks allocated, or 0 if error
2178 _______________________________________________________________________
2179 */
2180 static OSErr BlockFindContig(
2181 ExtendedVCB *vcb,
2182 u_int32_t startingBlock,
2183 u_int32_t minBlocks,
2184 u_int32_t maxBlocks,
2185 hfs_block_alloc_flags_t flags,
2186 u_int32_t *actualStartBlock,
2187 u_int32_t *actualNumBlocks)
2188 {
2189 OSErr retval = noErr;
2190 uint32_t currentStart = startingBlock;
2191
2192 uint32_t foundStart = 0; // values to emit to caller
2193 uint32_t foundCount = 0;
2194
2195 uint32_t collision_start = 0; // if we have to re-allocate a recently deleted extent, use this
2196 uint32_t collision_count = 0;
2197
2198 int err;
2199 int allowReuse = (flags & HFS_ALLOC_FLUSHTXN);
2200 Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
2201
2202 int recently_deleted = 0;
2203 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2204
2205 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2206 KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
2207
2208 while ((retval == noErr) && (foundStart == 0) && (foundCount == 0)) {
2209
2210 /* Try and find something that works. */
2211
2212 /*
2213 * NOTE: If the only contiguous free extent of at least minBlocks
2214 * crosses startingBlock (i.e. starts before, ends after), then we
2215 * won't find it. Earlier versions *did* find this case by letting
2216 * the second search look past startingBlock by minBlocks. But
2217 * with the free extent cache, this can lead to duplicate entries
2218 * in the cache, causing the same blocks to be allocated twice.
2219 */
2220 retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks,
2221 maxBlocks, useMetaZone, true, &foundStart, &foundCount, flags);
2222
2223 if (retval == dskFulErr && currentStart != 0) {
2224 /*
2225 * We constrain the endingBlock so we don't bother looking for ranges
2226 * that would overlap those found in the previous call, if the summary bitmap
2227 * is not on for this volume. If it is, then we assume that it was not trust
2228 * -worthy and do a full scan.
2229 */
2230 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2231 retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks,
2232 maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
2233 }
2234 else {
2235 retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks,
2236 maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
2237 }
2238 }
2239
2240 if (retval != noErr) {
2241 goto bailout;
2242 }
2243
2244 /* Do we overlap with the recently found collision extent? */
2245 if (collision_start) {
2246 if (extents_overlap (foundStart, foundCount, collision_start, collision_count)) {
2247 /*
2248 * We've looped around, and the only thing we could use was the collision extent.
2249 * Since we are allowed to use it, go ahead and do so now.
2250 */
2251 if(allowReuse) {
2252 /*
2253 * then we couldn't find anything except values which might have been
2254 * recently deallocated. just return our cached value if we are allowed to.
2255 */
2256 foundStart = collision_start;
2257 foundCount = collision_count;
2258 goto bailout;
2259 }
2260 else {
2261 /* Otherwise, we looped around and couldn't find anything that wouldn't require a journal flush. */
2262 retval = dskFulErr;
2263 goto bailout;
2264 }
2265 }
2266 }
2267
2268 /* OK, we know we must not have collided . See if this one is recently deleted */
2269 if (hfsmp->jnl) {
2270 recently_deleted = 0;
2271 uint32_t nextStart;
2272 err = CheckUnmappedBytes (hfsmp, (uint64_t)foundStart,
2273 (uint64_t) foundCount, &recently_deleted, &nextStart);
2274 if (err == 0) {
2275 if(recently_deleted != 0) {
2276 /*
2277 * these blocks were recently deleted/deallocated. Cache the extent, but
2278 * but keep searching to see if we can find one that won't collide here.
2279 */
2280 if (collision_start == 0) {
2281 collision_start = foundStart;
2282 collision_count = foundCount;
2283 }
2284 recently_deleted = 0;
2285
2286 /*
2287 * advance currentStart to the point just past the overlap we just found. Note that
2288 * we will automatically loop around to start of the bitmap as needed.
2289 */
2290 currentStart = nextStart;
2291 /* Unset foundStart/Count to allow it to loop around again. */
2292 foundStart = 0;
2293 foundCount = 0;
2294 }
2295 }
2296 } // end jnl/deleted case
2297
2298 /*
2299 * If we found something good, we'd break out of the loop at the top; foundCount
2300 * and foundStart should be set.
2301 */
2302
2303 } // end while loop.
2304
2305 bailout:
2306
2307 if (retval == noErr) {
2308 *actualStartBlock = foundStart;
2309 *actualNumBlocks = foundCount;
2310 }
2311
2312 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2313 KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_END, foundStart, foundCount, retval, 0, 0);
2314
2315 return retval;
2316
2317 }
2318
2319
2320 /*
2321 _______________________________________________________________________
2322
2323 Routine: BlockFindAny
2324
2325 Function: Find one or more allocation blocks and may return fewer than
2326 requested. The caller guarantees that there is at least one
2327 free block.
2328
2329 Inputs:
2330 vcb Pointer to volume where space is to be allocated
2331 startingBlock Preferred first block for allocation
2332 endingBlock Last block to check + 1
2333 maxBlocks Maximum number of contiguous blocks to allocate
2334 useMetaZone
2335
2336 Outputs:
2337 actualStartBlock First block of range allocated, or 0 if error
2338 actualNumBlocks Number of blocks allocated, or 0 if error
2339 _______________________________________________________________________
2340 */
2341
2342 static OSErr BlockFindAny(
2343 ExtendedVCB *vcb,
2344 u_int32_t startingBlock,
2345 register u_int32_t endingBlock,
2346 u_int32_t maxBlocks,
2347 hfs_block_alloc_flags_t flags,
2348 Boolean trustSummary,
2349 u_int32_t *actualStartBlock,
2350 u_int32_t *actualNumBlocks)
2351 {
2352
2353 /*
2354 * If it is enabled, scan through the summary table to find the first free block.
2355 *
2356 * If it reports that there are not any free blocks, we could have a false
2357 * positive, so in that case, use the input arguments as a pass through.
2358 */
2359 uint32_t start_blk = startingBlock;
2360 uint32_t end_blk = endingBlock;
2361 struct hfsmount *hfsmp;
2362 OSErr err;
2363
2364 hfsmp = (struct hfsmount*)vcb;
2365 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2366 uint32_t suggested_start;
2367
2368 /*
2369 * If the summary table is enabled, scan through it to find the first free
2370 * block. If there was an error, or we couldn't find anything free in the
2371 * summary table, then just leave the start_blk fields unmodified. We wouldn't
2372 * have gotten to this point if the mount point made it look like there was possibly
2373 * free space in the FS.
2374 */
2375 err = hfs_find_summary_free (hfsmp, startingBlock, &suggested_start);
2376 if (err == 0) {
2377 start_blk = suggested_start;
2378 }
2379 else {
2380 /* Differentiate between ENOSPC and a more esoteric error in the above call. */
2381 if ((err == ENOSPC) && (trustSummary)) {
2382 /*
2383 * The 'trustSummary' argument is for doing a full scan if we really
2384 * really, need the space and we think it's somewhere but can't find it in the
2385 * summary table. If it's true, then we trust the summary table and return
2386 * dskFulErr if we couldn't find it above.
2387 */
2388 return dskFulErr;
2389 }
2390 /*
2391 * If either trustSummary was false or we got a different errno, then we
2392 * want to fall through to the real bitmap single i/o code...
2393 */
2394 }
2395 }
2396
2397 err = BlockFindAnyBitmap(vcb, start_blk, end_blk, maxBlocks,
2398 flags, actualStartBlock, actualNumBlocks);
2399
2400 return err;
2401 }
2402
2403
2404 /*
2405 * BlockFindAnyBitmap finds free ranges by scanning the bitmap to
2406 * figure out where the free allocation blocks are. Inputs and
2407 * outputs are the same as for BlockFindAny.
2408 */
2409
2410 static OSErr BlockFindAnyBitmap(
2411 ExtendedVCB *vcb,
2412 u_int32_t startingBlock,
2413 register u_int32_t endingBlock,
2414 u_int32_t maxBlocks,
2415 hfs_block_alloc_flags_t flags,
2416 u_int32_t *actualStartBlock,
2417 u_int32_t *actualNumBlocks)
2418 {
2419 OSErr err;
2420 register u_int32_t block = 0; // current block number
2421 register u_int32_t currentWord; // Pointer to current word within bitmap block
2422 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
2423 register u_int32_t wordsLeft; // Number of words left in this bitmap block
2424 u_int32_t *buffer = NULL;
2425 u_int32_t *currCache = NULL;
2426 uintptr_t blockRef = 0;
2427 u_int32_t bitsPerBlock;
2428 u_int32_t wordsPerBlock;
2429 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2430 Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
2431 Boolean forceFlush = (flags & HFS_ALLOC_FLUSHTXN);
2432
2433 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2434 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);
2435
2436 restartSearchAny:
2437
2438 /*
2439 * When we're skipping the metadata zone and the start/end
2440 * range overlaps with the metadata zone then adjust the
2441 * start to be outside of the metadata zone. If the range
2442 * is entirely inside the metadata zone then we can deny the
2443 * request (dskFulErr).
2444 */
2445 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
2446 if (startingBlock <= vcb->hfs_metazone_end) {
2447 if (endingBlock > (vcb->hfs_metazone_end + 2))
2448 startingBlock = vcb->hfs_metazone_end + 1;
2449 else {
2450 err = dskFulErr;
2451 goto Exit;
2452 }
2453 }
2454 }
2455
2456 // Since this routine doesn't wrap around
2457 if (maxBlocks > (endingBlock - startingBlock)) {
2458 maxBlocks = endingBlock - startingBlock;
2459 }
2460
2461 //
2462 // Pre-read the first bitmap block
2463 //
2464 err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef, flags);
2465 if (err != noErr) goto Exit;
2466 buffer = currCache;
2467
2468 //
2469 // Set up the current position within the block
2470 //
2471 {
2472 u_int32_t wordIndexInBlock;
2473
2474 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
2475 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2476
2477 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2478 buffer += wordIndexInBlock;
2479 wordsLeft = wordsPerBlock - wordIndexInBlock;
2480 currentWord = SWAP_BE32 (*buffer);
2481 bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
2482 }
2483
2484 /*
2485 * While loop 1:
2486 * Find the first unallocated block starting at 'block'
2487 */
2488 uint32_t summary_block_scan = 0;
2489
2490 block=startingBlock;
2491 while (block < endingBlock) {
2492 if ((currentWord & bitMask) == 0)
2493 break;
2494
2495 // Next bit
2496 ++block;
2497 bitMask >>= 1;
2498 if (bitMask == 0) {
2499 // Next word
2500 bitMask = kHighBitInWordMask;
2501 ++buffer;
2502
2503 if (--wordsLeft == 0) {
2504 // Next block
2505 buffer = currCache = NULL;
2506 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2507 /*
2508 * If summary_block_scan is non-zero, then we must have
2509 * pulled a bitmap file block into core, and scanned through
2510 * the entire thing. Because we're in this loop, we are
2511 * implicitly trusting that the bitmap didn't have any knowledge
2512 * about this particular block. As a result, update the bitmap
2513 * (lazily, now that we've scanned it) with our findings that
2514 * this particular block is completely used up.
2515 */
2516 if (summary_block_scan != 0) {
2517 uint32_t summary_bit;
2518 (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
2519 hfs_set_summary (hfsmp, summary_bit, 1);
2520 }
2521 }
2522
2523 err = ReleaseBitmapBlock(vcb, blockRef, false);
2524 if (err != noErr) goto Exit;
2525
2526 /*
2527 * Skip over metadata blocks.
2528 */
2529 if (!useMetaZone) {
2530 block = NextBitmapBlock(vcb, block);
2531 }
2532 if (block >= endingBlock) {
2533 err = dskFulErr;
2534 goto Exit;
2535 }
2536
2537 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
2538 if (err != noErr) goto Exit;
2539 buffer = currCache;
2540 summary_block_scan = block;
2541 wordsLeft = wordsPerBlock;
2542 }
2543 currentWord = SWAP_BE32 (*buffer);
2544 }
2545 }
2546
2547 // Did we get to the end of the bitmap before finding a free block?
2548 // If so, then couldn't allocate anything.
2549 if (block >= endingBlock) {
2550 err = dskFulErr;
2551 goto Exit;
2552 }
2553
2554
2555 /*
2556 * Don't move forward just yet. Verify that either one of the following
2557 * two conditions is true:
2558 * 1) journaling is not enabled
2559 * 2) block is not currently on any pending TRIM list.
2560 */
2561 if (hfsmp->jnl != NULL && (forceFlush == false)) {
2562 int recently_deleted = 0;
2563 uint32_t nextblk;
2564 err = CheckUnmappedBytes (hfsmp, (uint64_t) block, 1, &recently_deleted, &nextblk);
2565 if ((err == 0) && (recently_deleted)) {
2566
2567 /* release the bitmap block & unset currCache. we may jump past it. */
2568 err = ReleaseBitmapBlock(vcb, blockRef, false);
2569 currCache = NULL;
2570 if (err != noErr) {
2571 goto Exit;
2572 }
2573 /* set our start to nextblk, and re-do the search. */
2574 startingBlock = nextblk;
2575 goto restartSearchAny;
2576 }
2577 }
2578
2579
2580 // Return the first block in the allocated range
2581 *actualStartBlock = block;
2582
2583 // If we could get the desired number of blocks before hitting endingBlock,
2584 // then adjust endingBlock so we won't keep looking. Ideally, the comparison
2585 // would be (block + maxBlocks) < endingBlock, but that could overflow. The
2586 // comparison below yields identical results, but without overflow.
2587 if (block < (endingBlock-maxBlocks)) {
2588 endingBlock = block + maxBlocks; // if we get this far, we've found enough
2589 }
2590
2591 /*
2592 * While loop 2:
2593 * Scan the bitmap, starting at 'currentWord' in the current
2594 * bitmap block. Continue iterating through the bitmap until
2595 * either we hit an allocated block, or until we have accumuluated
2596 * maxBlocks worth of bitmap.
2597 */
2598
2599 /* Continue until we see an allocated block */
2600 while ((currentWord & bitMask) == 0) {
2601 // Move to the next block. If no more, then exit.
2602 ++block;
2603 if (block == endingBlock) {
2604 break;
2605 }
2606
2607 // Next bit
2608 bitMask >>= 1;
2609 if (bitMask == 0) {
2610 // Next word
2611 bitMask = kHighBitInWordMask;
2612 ++buffer;
2613
2614 if (--wordsLeft == 0) {
2615 // Next block
2616 buffer = currCache = NULL;
2617
2618 /* We're only reading the bitmap here, so mark it as clean */
2619 err = ReleaseBitmapBlock(vcb, blockRef, false);
2620 if (err != noErr) {
2621 goto Exit;
2622 }
2623
2624 /*
2625 * Skip over metadata blocks.
2626 */
2627 if (!useMetaZone) {
2628 u_int32_t nextBlock;
2629 nextBlock = NextBitmapBlock(vcb, block);
2630 if (nextBlock != block) {
2631 goto Exit; /* allocation gap, so stop */
2632 }
2633 }
2634
2635 if (block >= endingBlock) {
2636 goto Exit;
2637 }
2638
2639 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
2640 if (err != noErr) {
2641 goto Exit;
2642 }
2643 buffer = currCache;
2644 wordsLeft = wordsPerBlock;
2645 }
2646 currentWord = SWAP_BE32 (*buffer);
2647 }
2648 }
2649
2650 Exit:
2651 if (currCache) {
2652 /* Release the bitmap reference prior to marking bits in-use */
2653 (void) ReleaseBitmapBlock(vcb, blockRef, false);
2654 currCache = NULL;
2655 }
2656
2657 if (err == noErr) {
2658 *actualNumBlocks = block - *actualStartBlock;
2659
2660 // sanity check
2661 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
2662 panic("hfs: BlockFindAnyBitmap: allocation overflow on \"%s\"", vcb->vcbVN);
2663 }
2664 }
2665 else {
2666 *actualStartBlock = 0;
2667 *actualNumBlocks = 0;
2668 }
2669
2670 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2671 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2672
2673 return err;
2674 }
2675
2676
2677 /*
2678 _______________________________________________________________________
2679
2680 Routine: BlockFindKnown
2681
2682 Function: Return a potential extent from the free extent cache. The
2683 returned extent *must* be marked allocated and removed
2684 from the cache by the *caller*.
2685
2686 Inputs:
2687 vcb Pointer to volume where space is to be allocated
2688 maxBlocks Maximum number of contiguous blocks to allocate
2689
2690 Outputs:
2691 actualStartBlock First block of range allocated, or 0 if error
2692 actualNumBlocks Number of blocks allocated, or 0 if error
2693
2694 Returns:
2695 dskFulErr Free extent cache is empty
2696 _______________________________________________________________________
2697 */
2698
2699 static OSErr BlockFindKnown(
2700 ExtendedVCB *vcb,
2701 u_int32_t maxBlocks,
2702 u_int32_t *actualStartBlock,
2703 u_int32_t *actualNumBlocks)
2704 {
2705 OSErr err;
2706 u_int32_t foundBlocks;
2707 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2708
2709 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2710 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
2711
2712 hfs_lock_mount (hfsmp);
2713 lck_spin_lock(&vcb->vcbFreeExtLock);
2714 if ( vcb->vcbFreeExtCnt == 0 ||
2715 vcb->vcbFreeExt[0].blockCount == 0) {
2716 lck_spin_unlock(&vcb->vcbFreeExtLock);
2717 hfs_unlock_mount(hfsmp);
2718 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2719 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
2720 return dskFulErr;
2721 }
2722 lck_spin_unlock(&vcb->vcbFreeExtLock);
2723 hfs_unlock_mount(hfsmp);
2724
2725 lck_spin_lock(&vcb->vcbFreeExtLock);
2726
2727 // Just grab up to maxBlocks of the first (largest) free exent.
2728 *actualStartBlock = vcb->vcbFreeExt[0].startBlock;
2729 foundBlocks = vcb->vcbFreeExt[0].blockCount;
2730 if (foundBlocks > maxBlocks)
2731 foundBlocks = maxBlocks;
2732 *actualNumBlocks = foundBlocks;
2733
2734 lck_spin_unlock(&vcb->vcbFreeExtLock);
2735
2736 // sanity check
2737 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
2738 {
2739 printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
2740 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
2741 err = EIO;
2742 } else
2743 err = 0;
2744
2745 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2746 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2747
2748 return err;
2749 }
2750
2751 /*
2752 * BlockMarkAllocated
2753 *
2754 * This is a wrapper function around the internal calls which will actually mark the blocks
2755 * as in-use. It will mark the blocks in the red-black tree if appropriate. We need to do
2756 * this logic here to avoid callers having to deal with whether or not the red-black tree
2757 * is enabled.
2758 */
2759
2760 OSErr BlockMarkAllocated(
2761 ExtendedVCB *vcb,
2762 u_int32_t startingBlock,
2763 register u_int32_t numBlocks)
2764 {
2765 return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks, 0);
2766 }
2767
2768
2769 /*
2770 _______________________________________________________________________
2771
2772 Routine: BlockMarkAllocatedInternal
2773
2774 Function: Mark a contiguous group of blocks as allocated (set in the
2775 bitmap). It assumes those bits are currently marked
2776 deallocated (clear in the bitmap). Note that this function
2777 must be called regardless of whether or not the bitmap or
2778 tree-based allocator is used, as all allocations must correctly
2779 be marked on-disk. If the tree-based approach is running, then
2780 this will be done before the node is removed from the tree.
2781
2782 Inputs:
2783 vcb Pointer to volume where space is to be allocated
2784 startingBlock First block number to mark as allocated
2785 numBlocks Number of blocks to mark as allocated
2786 _______________________________________________________________________
2787 */
2788 static
2789 OSErr BlockMarkAllocatedInternal (
2790 ExtendedVCB *vcb,
2791 u_int32_t startingBlock,
2792 u_int32_t numBlocks,
2793 hfs_block_alloc_flags_t flags)
2794 {
2795 OSErr err;
2796 register u_int32_t *currentWord; // Pointer to current word within bitmap block
2797 register u_int32_t wordsLeft; // Number of words left in this bitmap block
2798 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
2799 u_int32_t firstBit; // Bit index within word of first bit to allocate
2800 u_int32_t numBits; // Number of bits in word to allocate
2801 u_int32_t *buffer = NULL;
2802 uintptr_t blockRef = 0;
2803 u_int32_t bitsPerBlock;
2804 u_int32_t wordsPerBlock;
2805 // XXXdbg
2806 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2807
2808 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2809 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, flags, 0, 0);
2810
2811 #if DEBUG
2812
2813 if (!ISSET(flags, HFS_ALLOC_COMMIT)
2814 || ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
2815 struct rl_entry *range;
2816 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS], rl_link) {
2817 hfs_assert(rl_overlap(range, startingBlock,
2818 startingBlock + numBlocks - 1) == RL_NOOVERLAP);
2819 }
2820 }
2821
2822 #endif
2823
2824 int force_flush = 0;
2825 /*
2826 * Since we are about to mark these bits as in-use
2827 * in the bitmap, decide if we need to alert the caller
2828 * that a journal flush might be appropriate. It's safe to
2829 * poke at the journal pointer here since we MUST have
2830 * called start_transaction by the time this function is invoked.
2831 * If the journal is enabled, then it will have taken the requisite
2832 * journal locks. If it is not enabled, then we have taken
2833 * a shared lock on the global lock.
2834 */
2835 if (hfsmp->jnl) {
2836 uint32_t ignore;
2837 err = CheckUnmappedBytes (hfsmp, (uint64_t) startingBlock, (uint64_t)numBlocks, &force_flush, &ignore);
2838 if ((err == 0) && (force_flush)) {
2839 journal_request_immediate_flush (hfsmp->jnl);
2840 }
2841 }
2842
2843 hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
2844
2845 /*
2846 * Don't make changes to the disk if we're just reserving. Note that
2847 * we could do better in the tentative case because we could, in theory,
2848 * avoid the journal flush above. However, that would mean that we would
2849 * need to catch the callback to stop it incorrectly addding the extent
2850 * to our free cache.
2851 */
2852 if (ISSET(flags, HFS_ALLOC_LOCKED | HFS_ALLOC_TENTATIVE)) {
2853 err = 0;
2854 goto Exit;
2855 }
2856
2857 //
2858 // Pre-read the bitmap block containing the first word of allocation
2859 //
2860
2861 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2862 HFS_ALLOC_IGNORE_RESERVED);
2863 if (err != noErr) goto Exit;
2864 //
2865 // Initialize currentWord, and wordsLeft.
2866 //
2867 {
2868 u_int32_t wordIndexInBlock;
2869
2870 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
2871 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2872
2873 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2874 currentWord = buffer + wordIndexInBlock;
2875 wordsLeft = wordsPerBlock - wordIndexInBlock;
2876 }
2877
2878 // XXXdbg
2879 if (hfsmp->jnl) {
2880 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2881 }
2882
2883 //
2884 // If the first block to allocate doesn't start on a word
2885 // boundary in the bitmap, then treat that first word
2886 // specially.
2887 //
2888
2889 firstBit = startingBlock % kBitsPerWord;
2890 if (firstBit != 0) {
2891 bitMask = kAllBitsSetInWord >> firstBit; // turn off all bits before firstBit
2892 numBits = kBitsPerWord - firstBit; // number of remaining bits in this word
2893 if (numBits > numBlocks) {
2894 numBits = numBlocks; // entire allocation is inside this one word
2895 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); // turn off bits after last
2896 }
2897 #if DEBUG
2898 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2899 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2900 }
2901 #endif
2902 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
2903 numBlocks -= numBits; // adjust number of blocks left to allocate
2904
2905 ++currentWord; // move to next word
2906 --wordsLeft; // one less word left in this block
2907 }
2908
2909 //
2910 // Allocate whole words (32 blocks) at a time.
2911 //
2912
2913 bitMask = kAllBitsSetInWord; // put this in a register for 68K
2914 while (numBlocks >= kBitsPerWord) {
2915 if (wordsLeft == 0) {
2916 // Read in the next bitmap block
2917 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
2918
2919 buffer = NULL;
2920 err = ReleaseBitmapBlock(vcb, blockRef, true);
2921 if (err != noErr) goto Exit;
2922
2923 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2924 HFS_ALLOC_IGNORE_RESERVED);
2925 if (err != noErr) goto Exit;
2926
2927 // XXXdbg
2928 if (hfsmp->jnl) {
2929 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2930 }
2931
2932 // Readjust currentWord and wordsLeft
2933 currentWord = buffer;
2934 wordsLeft = wordsPerBlock;
2935 }
2936 #if DEBUG
2937 if (*currentWord != 0) {
2938 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2939 }
2940 #endif
2941 *currentWord = SWAP_BE32 (bitMask);
2942 numBlocks -= kBitsPerWord;
2943
2944 ++currentWord; // move to next word
2945 --wordsLeft; // one less word left in this block
2946 }
2947
2948 //
2949 // Allocate any remaining blocks.
2950 //
2951
2952 if (numBlocks != 0) {
2953 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
2954 if (wordsLeft == 0) {
2955 // Read in the next bitmap block
2956 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
2957
2958 buffer = NULL;
2959 err = ReleaseBitmapBlock(vcb, blockRef, true);
2960 if (err != noErr) goto Exit;
2961
2962 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2963 HFS_ALLOC_IGNORE_RESERVED);
2964 if (err != noErr) goto Exit;
2965
2966 // XXXdbg
2967 if (hfsmp->jnl) {
2968 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2969 }
2970
2971 currentWord = buffer;
2972 }
2973 #if DEBUG
2974 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2975 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2976 }
2977 #endif
2978 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
2979
2980 // No need to update currentWord or wordsLeft
2981 }
2982
2983 Exit:
2984
2985 if (buffer)
2986 (void)ReleaseBitmapBlock(vcb, blockRef, true);
2987
2988 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2989 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
2990
2991 return err;
2992 }
2993
2994
2995 /*
2996 * BlockMarkFree
2997 *
2998 * This is a wrapper function around the internal calls which will actually mark the blocks
2999 * as freed. It will mark the blocks in the red-black tree if appropriate. We need to do
3000 * this logic here to avoid callers having to deal with whether or not the red-black tree
3001 * is enabled.
3002 *
3003 */
3004 OSErr BlockMarkFree(
3005 ExtendedVCB *vcb,
3006 u_int32_t startingBlock,
3007 register u_int32_t numBlocks)
3008 {
3009 return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
3010 }
3011
3012
3013 /*
3014 * BlockMarkFreeUnused
3015 *
3016 * Scan the bitmap block beyond end of current file system for bits
3017 * that are marked as used. If any of the bits are marked as used,
3018 * this function marks them free.
3019 *
3020 * Note: This was specifically written to mark all bits beyond
3021 * end of current file system during hfs_extendfs(), which makes
3022 * sure that all the new blocks added to the file system are
3023 * marked as free. We expect that all the blocks beyond end of
3024 * current file system are always marked as free, but there might
3025 * be cases where are marked as used. This function assumes that
3026 * the number of blocks marked as used incorrectly are relatively
3027 * small, otherwise this can overflow journal transaction size
3028 * on certain file system configurations (example, large unused
3029 * bitmap with relatively small journal).
3030 *
3031 * Input:
3032 * startingBlock: First block of the range to mark unused
3033 * numBlocks: Number of blocks in the range to mark unused
3034 *
3035 * Returns: zero on success, non-zero on error.
3036 */
3037 OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t numBlocks)
3038 {
3039 int error = 0;
3040 struct hfsmount *hfsmp = VCBTOHFS(vcb);
3041 u_int32_t curNumBlocks;
3042 u_int32_t bitsPerBlock;
3043 u_int32_t lastBit;
3044
3045 /* Use the optimal bitmap I/O size instead of bitmap block size */
3046 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
3047
3048 /*
3049 * First clear any non bitmap allocation block aligned bits
3050 *
3051 * Calculate the first bit in the bitmap block next to
3052 * the bitmap block containing the bit for startingBlock.
3053 * Using this value, we calculate the total number of
3054 * bits to be marked unused from startingBlock to the
3055 * end of bitmap block containing startingBlock.
3056 */
3057 lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
3058 curNumBlocks = lastBit - startingBlock;
3059 if (curNumBlocks > numBlocks) {
3060 curNumBlocks = numBlocks;
3061 }
3062 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
3063 if (error) {
3064 return error;
3065 }
3066 startingBlock += curNumBlocks;
3067 numBlocks -= curNumBlocks;
3068
3069 /*
3070 * Check a full bitmap block for any 'used' bit. If any bit is used,
3071 * mark all the bits only in that bitmap block as free. This ensures
3072 * that we do not write unmodified bitmap blocks and do not
3073 * overwhelm the journal.
3074 *
3075 * The code starts by checking full bitmap block at a time, and
3076 * marks entire bitmap block as free only if any bit in that bitmap
3077 * block is marked as used. In the end, it handles the last bitmap
3078 * block which might be partially full by only checking till the
3079 * caller-specified last bit and if any bit is set, only mark that
3080 * range as free.
3081 */
3082 while (numBlocks) {
3083 if (numBlocks >= bitsPerBlock) {
3084 curNumBlocks = bitsPerBlock;
3085 } else {
3086 curNumBlocks = numBlocks;
3087 }
3088 if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
3089 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
3090 if (error) {
3091 return error;
3092 }
3093 }
3094 startingBlock += curNumBlocks;
3095 numBlocks -= curNumBlocks;
3096 }
3097
3098 return error;
3099 }
3100
3101 /*
3102 _______________________________________________________________________
3103
3104 Routine: BlockMarkFreeInternal
3105
3106 Function: Mark a contiguous group of blocks as free (clear in the
3107 bitmap). It assumes those bits are currently marked
3108 allocated (set in the bitmap).
3109
3110 Inputs:
3111 vcb Pointer to volume where space is to be freed
3112 startingBlock First block number to mark as freed
3113 numBlocks Number of blocks to mark as freed
3114 do_validate If true, validate that the blocks being
3115 deallocated to check if they are within totalBlocks
3116 for current volume and whether they were allocated
3117 before they are marked free.
3118 _______________________________________________________________________
3119 */
3120 static
3121 OSErr BlockMarkFreeInternal(
3122 ExtendedVCB *vcb,
3123 u_int32_t startingBlock_in,
3124 register u_int32_t numBlocks_in,
3125 Boolean do_validate)
3126 {
3127 OSErr err;
3128 u_int32_t startingBlock = startingBlock_in;
3129 u_int32_t numBlocks = numBlocks_in;
3130 uint32_t unmapStart = startingBlock_in;
3131 uint32_t unmapCount = numBlocks_in;
3132 uint32_t wordIndexInBlock;
3133 u_int32_t *currentWord; // Pointer to current word within bitmap block
3134 u_int32_t wordsLeft; // Number of words left in this bitmap block
3135 u_int32_t bitMask; // Word with given bits already set (ready to OR in)
3136 u_int32_t currentBit; // Bit index within word of current bit to allocate
3137 u_int32_t numBits; // Number of bits in word to allocate
3138 u_int32_t *buffer = NULL;
3139 uintptr_t blockRef = 0;
3140 u_int32_t bitsPerBlock;
3141 u_int32_t wordsPerBlock;
3142 // XXXdbg
3143 struct hfsmount *hfsmp = VCBTOHFS(vcb);
3144
3145 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3146 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);
3147
3148 /*
3149 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
3150 * need to be able to free blocks being relocated during hfs_truncatefs.
3151 */
3152 if ((do_validate == true) &&
3153 (startingBlock + numBlocks > vcb->totalBlocks)) {
3154 #if ALLOC_DEBUG || DEBUG
3155 panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
3156 __builtin_unreachable();
3157 #else
3158 printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
3159 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
3160 err = EIO;
3161 goto Exit;
3162 #endif
3163 }
3164
3165 //
3166 // Pre-read the bitmap block containing the first word of allocation
3167 //
3168
3169 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
3170 HFS_ALLOC_IGNORE_RESERVED);
3171 if (err != noErr) goto Exit;
3172 // XXXdbg
3173 if (hfsmp->jnl) {
3174 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
3175 }
3176
3177 uint32_t min_unmap = 0, max_unmap = UINT32_MAX;
3178
3179 // Work out the bounds of any unmap we can send down
3180 struct rl_entry *range;
3181 for (int i = 0; i < 2; ++i) {
3182 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
3183 if (range->rl_start < startingBlock
3184 && range->rl_end >= min_unmap) {
3185 min_unmap = range->rl_end + 1;
3186 }
3187 if (range->rl_end >= startingBlock + numBlocks
3188 && range->rl_start < max_unmap) {
3189 max_unmap = range->rl_start;
3190 }
3191 }
3192 }
3193
3194 //
3195 // Figure out how many bits and words per bitmap block.
3196 //
3197 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
3198 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
3199 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3200
3201 //
3202 // Look for a range of free blocks immediately before startingBlock
3203 // (up to the start of the current bitmap block). Set unmapStart to
3204 // the first free block.
3205 //
3206 currentWord = buffer + wordIndexInBlock;
3207 currentBit = startingBlock % kBitsPerWord;
3208 bitMask = kHighBitInWordMask >> currentBit;
3209 while (unmapStart > min_unmap) {
3210 // Move currentWord/bitMask back by one bit
3211 bitMask <<= 1;
3212 if (bitMask == 0) {
3213 if (--currentWord < buffer)
3214 break;
3215 bitMask = kLowBitInWordMask;
3216 }
3217
3218 if (*currentWord & SWAP_BE32(bitMask))
3219 break; // Found an allocated block. Stop searching.
3220 --unmapStart;
3221 ++unmapCount;
3222 }
3223
3224 //
3225 // If the first block to free doesn't start on a word
3226 // boundary in the bitmap, then treat that first word
3227 // specially.
3228 //
3229
3230 currentWord = buffer + wordIndexInBlock;
3231 wordsLeft = wordsPerBlock - wordIndexInBlock;
3232 currentBit = startingBlock % kBitsPerWord;
3233 if (currentBit != 0) {
3234 bitMask = kAllBitsSetInWord >> currentBit; // turn off all bits before currentBit
3235 numBits = kBitsPerWord - currentBit; // number of remaining bits in this word
3236 if (numBits > numBlocks) {
3237 numBits = numBlocks; // entire allocation is inside this one word
3238 bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits)); // turn off bits after last
3239 }
3240 if ((do_validate == true) &&
3241 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
3242 goto Corruption;
3243 }
3244 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
3245 numBlocks -= numBits; // adjust number of blocks left to free
3246
3247 ++currentWord; // move to next word
3248 --wordsLeft; // one less word left in this block
3249 }
3250
3251 //
3252 // Free whole words (32 blocks) at a time.
3253 //
3254
3255 while (numBlocks >= kBitsPerWord) {
3256 if (wordsLeft == 0) {
3257 // Read in the next bitmap block
3258 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
3259
3260 buffer = NULL;
3261 err = ReleaseBitmapBlock(vcb, blockRef, true);
3262 if (err != noErr) goto Exit;
3263
3264 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
3265 HFS_ALLOC_IGNORE_RESERVED);
3266 if (err != noErr) goto Exit;
3267
3268 // XXXdbg
3269 if (hfsmp->jnl) {
3270 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
3271 }
3272
3273 // Readjust currentWord and wordsLeft
3274 currentWord = buffer;
3275 wordsLeft = wordsPerBlock;
3276 }
3277 if ((do_validate == true) &&
3278 (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
3279 goto Corruption;
3280 }
3281 *currentWord = 0; // clear the entire word
3282 numBlocks -= kBitsPerWord;
3283
3284 ++currentWord; // move to next word
3285 --wordsLeft; // one less word left in this block
3286 }
3287
3288 //
3289 // Free any remaining blocks.
3290 //
3291
3292 if (numBlocks != 0) {
3293 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
3294 if (wordsLeft == 0) {
3295 // Read in the next bitmap block
3296 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
3297
3298 buffer = NULL;
3299 err = ReleaseBitmapBlock(vcb, blockRef, true);
3300 if (err != noErr) goto Exit;
3301
3302 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
3303 HFS_ALLOC_IGNORE_RESERVED);
3304 if (err != noErr) goto Exit;
3305
3306 // XXXdbg
3307 if (hfsmp->jnl) {
3308 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
3309 }
3310
3311 currentWord = buffer;
3312 }
3313 if ((do_validate == true) &&
3314 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
3315 goto Corruption;
3316 }
3317 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
3318
3319 // No need to update currentWord or wordsLeft
3320 }
3321
3322 //
3323 // Look for a range of free blocks immediately after the range we just freed
3324 // (up to the end of the current bitmap block).
3325 //
3326 wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord;
3327 wordsLeft = wordsPerBlock - wordIndexInBlock;
3328 currentWord = buffer + wordIndexInBlock;
3329 currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord;
3330 bitMask = kHighBitInWordMask >> currentBit;
3331 while (unmapStart + unmapCount < max_unmap) {
3332 // Move currentWord/bitMask/wordsLeft forward one bit
3333 bitMask >>= 1;
3334 if (bitMask == 0) {
3335 if (--wordsLeft == 0)
3336 break;
3337 ++currentWord;
3338 bitMask = kHighBitInWordMask;
3339 }
3340
3341 if (*currentWord & SWAP_BE32(bitMask))
3342 break; // Found an allocated block. Stop searching.
3343 ++unmapCount;
3344 }
3345
3346 Exit:
3347
3348 if (buffer)
3349 (void)ReleaseBitmapBlock(vcb, blockRef, true);
3350
3351 if (err == noErr) {
3352 hfs_unmap_free_extent(vcb, unmapStart, unmapCount);
3353 }
3354
3355 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3356 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
3357
3358 return err;
3359
3360 Corruption:
3361 #if DEBUG
3362 panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
3363 __builtin_unreachable();
3364 #else
3365 printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s <%u, %u>\n",
3366 vcb->vcbVN, startingBlock_in, numBlocks_in);
3367 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
3368 err = EIO;
3369 goto Exit;
3370 #endif
3371 }
3372
3373
3374 /*
3375 _______________________________________________________________________
3376
3377 Routine: BlockFindContiguous
3378
3379 Function: Find a contiguous range of blocks that are free (bits
3380 clear in the bitmap). If a contiguous range of the
3381 minimum size can't be found, an error will be returned.
3382 This is only needed to support the bitmap-scanning logic,
3383 as the red-black tree should be able to do this by internally
3384 searching its tree.
3385
3386 Inputs:
3387 vcb Pointer to volume where space is to be allocated
3388 startingBlock Preferred first block of range
3389 endingBlock Last possible block in range + 1
3390 minBlocks Minimum number of blocks needed. Must be > 0.
3391 maxBlocks Maximum (ideal) number of blocks desired
3392 useMetaZone OK to dip into metadata allocation zone
3393
3394 Outputs:
3395 actualStartBlock First block of range found, or 0 if error
3396 actualNumBlocks Number of blocks found, or 0 if error
3397
3398 Returns:
3399 noErr Found at least minBlocks contiguous
3400 dskFulErr No contiguous space found, or all less than minBlocks
3401 _______________________________________________________________________
3402 */
3403
3404 static OSErr BlockFindContiguous(
3405 ExtendedVCB *vcb,
3406 u_int32_t startingBlock,
3407 u_int32_t endingBlock,
3408 u_int32_t minBlocks,
3409 u_int32_t maxBlocks,
3410 Boolean useMetaZone,
3411 Boolean trustSummary,
3412 u_int32_t *actualStartBlock,
3413 u_int32_t *actualNumBlocks,
3414 hfs_block_alloc_flags_t flags)
3415 {
3416 OSErr err;
3417 register u_int32_t currentBlock; // Block we're currently looking at.
3418 u_int32_t firstBlock; // First free block in current extent.
3419 u_int32_t stopBlock; // If we get to this block, stop searching for first free block.
3420 u_int32_t foundBlocks; // Number of contiguous free blocks in current extent.
3421 u_int32_t *buffer = NULL;
3422 register u_int32_t *currentWord;
3423 register u_int32_t bitMask;
3424 register u_int32_t wordsLeft;
3425 register u_int32_t tempWord;
3426 uintptr_t blockRef = 0;
3427 u_int32_t wordsPerBlock;
3428 u_int32_t updated_free_extent = 0;
3429 struct hfsmount *hfsmp = (struct hfsmount*) vcb;
3430 HFSPlusExtentDescriptor best = { 0, 0 };
3431
3432 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
3433 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);
3434
3435 /*
3436 * When we're skipping the metadata zone and the start/end
3437 * range overlaps with the metadata zone then adjust the
3438 * start to be outside of the metadata zone. If the range
3439 * is entirely inside the metadata zone then we can deny the
3440 * request (dskFulErr).
3441 */
3442 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
3443 if (startingBlock <= vcb->hfs_metazone_end) {
3444 if (endingBlock > (vcb->hfs_metazone_end + 2))
3445 startingBlock = vcb->hfs_metazone_end + 1;
3446 else
3447 goto DiskFull;
3448 }
3449 }
3450
3451 if ((endingBlock - startingBlock) < minBlocks)
3452 {
3453 // The set of blocks we're checking is smaller than the minimum number
3454 // of blocks, so we couldn't possibly find a good range.
3455 goto DiskFull;
3456 }
3457
3458 stopBlock = endingBlock - minBlocks + 1;
3459 currentBlock = startingBlock;
3460 firstBlock = 0;
3461
3462 /*
3463 * Skip over metadata blocks.
3464 */
3465 if (!useMetaZone)
3466 currentBlock = NextBitmapBlock(vcb, currentBlock);
3467
3468 /*
3469 * Use the summary table if we can. Skip over any totally
3470 * allocated blocks. currentBlock should now point to the first
3471 * block beyond the metadata zone if the metazone allocations are not
3472 * allowed in this invocation.
3473 */
3474 if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
3475 uint32_t suggestion;
3476 err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
3477 if (err && err != ENOSPC)
3478 goto ErrorExit;
3479 if (err == ENOSPC || suggestion >= stopBlock)
3480 goto DiskFull;
3481 currentBlock = suggestion;
3482 }
3483
3484
3485 //
3486 // Pre-read the first bitmap block.
3487 //
3488 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
3489 if ( err != noErr ) goto ErrorExit;
3490
3491 //
3492 // Figure out where currentBlock is within the buffer.
3493 //
3494 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
3495
3496 wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1); // Current index into buffer
3497 currentWord = buffer + wordsLeft;
3498 wordsLeft = wordsPerBlock - wordsLeft;
3499
3500 uint32_t remaining = (hfsmp->freeBlocks - hfsmp->lockedBlocks
3501 - (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
3502 ? 0 : hfsmp->tentativeBlocks));
3503
3504 /*
3505 * This outer do-while loop is the main body of this function. Its job is
3506 * to search through the blocks (until we hit 'stopBlock'), and iterate
3507 * through swaths of allocated bitmap until it finds free regions.
3508 */
3509
3510 do
3511 {
3512 foundBlocks = 0;
3513 /*
3514 * We will try and update the summary table as we search
3515 * below. Note that we will never update the summary table
3516 * for the first and last blocks that the summary table
3517 * covers. Ideally, we should, but the benefits probably
3518 * aren't that significant so we leave things alone for now.
3519 */
3520 uint32_t summary_block_scan = 0;
3521 /*
3522 * Inner while loop 1:
3523 * Look for free blocks, skipping over allocated ones.
3524 *
3525 * Initialization starts with checking the initial partial word
3526 * if applicable.
3527 */
3528 bitMask = currentBlock & kBitsWithinWordMask;
3529 if (bitMask)
3530 {
3531 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
3532 bitMask = kHighBitInWordMask >> bitMask;
3533 while (tempWord & bitMask)
3534 {
3535 bitMask >>= 1;
3536 ++currentBlock;
3537 }
3538
3539 // Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)?
3540 if (bitMask)
3541 goto FoundUnused;
3542
3543 // Didn't find any unused bits, so we're done with this word.
3544 ++currentWord;
3545 --wordsLeft;
3546 }
3547
3548 //
3549 // Check whole words
3550 //
3551 while (currentBlock < stopBlock)
3552 {
3553 // See if it's time to read another block.
3554 if (wordsLeft == 0)
3555 {
3556 buffer = NULL;
3557 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3558 /*
3559 * If summary_block_scan is non-zero, then we must have
3560 * pulled a bitmap file block into core, and scanned through
3561 * the entire thing. Because we're in this loop, we are
3562 * implicitly trusting that the bitmap didn't have any knowledge
3563 * about this particular block. As a result, update the bitmap
3564 * (lazily, now that we've scanned it) with our findings that
3565 * this particular block is completely used up.
3566 */
3567 if (summary_block_scan != 0) {
3568 uint32_t summary_bit;
3569 (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
3570 hfs_set_summary (hfsmp, summary_bit, 1);
3571 }
3572 }
3573 err = ReleaseBitmapBlock(vcb, blockRef, false);
3574 if (err != noErr) goto ErrorExit;
3575
3576 /*
3577 * Skip over metadata blocks.
3578 */
3579 if (!useMetaZone) {
3580 currentBlock = NextBitmapBlock(vcb, currentBlock);
3581 if (currentBlock >= stopBlock) {
3582 goto LoopExit;
3583 }
3584 }
3585
3586 /* Skip over fully allocated bitmap blocks if we can */
3587 if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
3588 uint32_t suggestion;
3589 err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
3590 if (err && err != ENOSPC)
3591 goto ErrorExit;
3592 if (err == ENOSPC || suggestion >= stopBlock)
3593 goto LoopExit;
3594 currentBlock = suggestion;
3595 }
3596
3597 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
3598 if ( err != noErr ) goto ErrorExit;
3599
3600 /*
3601 * Set summary_block_scan to be the block we just read into the block cache.
3602 *
3603 * At this point, we've just read an allocation block worth of bitmap file
3604 * into the buffer above, but we don't know if it is completely allocated or not.
3605 * If we find that it is completely allocated/full then we will jump
3606 * through this loop again and set the appropriate summary bit as fully allocated.
3607 */
3608 summary_block_scan = currentBlock;
3609 currentWord = buffer;
3610 wordsLeft = wordsPerBlock;
3611 }
3612
3613 // See if any of the bits are clear
3614 if ((tempWord = SWAP_BE32(*currentWord)) + 1) // non-zero if any bits were clear
3615 {
3616 // Figure out which bit is clear
3617 bitMask = kHighBitInWordMask;
3618 while (tempWord & bitMask)
3619 {
3620 bitMask >>= 1;
3621 ++currentBlock;
3622 }
3623
3624 break; // Found the free bit; break out to FoundUnused.
3625 }
3626
3627 // Keep looking at the next word
3628 currentBlock += kBitsPerWord;
3629 ++currentWord;
3630 --wordsLeft;
3631 }
3632
3633 FoundUnused:
3634 // Make sure the unused bit is early enough to use
3635 if (currentBlock >= stopBlock)
3636 {
3637 break;
3638 }
3639
3640 // Remember the start of the extent
3641 firstBlock = currentBlock;
3642
3643
3644 /*
3645 * Inner while loop 2:
3646 * We get here if we find a free block. Count the number
3647 * of contiguous free blocks observed.
3648 *
3649 * Initialization starts with checking the initial partial word
3650 * if applicable.
3651 */
3652 bitMask = currentBlock & kBitsWithinWordMask;
3653 if (bitMask)
3654 {
3655 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
3656 bitMask = kHighBitInWordMask >> bitMask;
3657 while (bitMask && !(tempWord & bitMask))
3658 {
3659 bitMask >>= 1;
3660 ++currentBlock;
3661 }
3662
3663 // Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)?
3664 if (bitMask)
3665 goto FoundUsed;
3666
3667 // Didn't find any used bits, so we're done with this word.
3668 ++currentWord;
3669 --wordsLeft;
3670 }
3671
3672 //
3673 // Check whole words
3674 //
3675 while (currentBlock < endingBlock)
3676 {
3677 // See if it's time to read another block.
3678 if (wordsLeft == 0)
3679 {
3680 buffer = NULL;
3681 err = ReleaseBitmapBlock(vcb, blockRef, false);
3682 if (err != noErr) goto ErrorExit;
3683
3684 /*
3685 * Skip over metadata blocks.
3686 */
3687 if (!useMetaZone) {
3688 u_int32_t nextBlock;
3689
3690 nextBlock = NextBitmapBlock(vcb, currentBlock);
3691 if (nextBlock != currentBlock) {
3692 goto LoopExit; /* allocation gap, so stop */
3693 }
3694 }
3695
3696 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
3697 if ( err != noErr ) goto ErrorExit;
3698
3699 currentWord = buffer;
3700 wordsLeft = wordsPerBlock;
3701 }
3702
3703 // See if any of the bits are set
3704 if ((tempWord = SWAP_BE32(*currentWord)) != 0)
3705 {
3706 // Figure out which bit is set
3707 bitMask = kHighBitInWordMask;
3708 while (!(tempWord & bitMask))
3709 {
3710 bitMask >>= 1;
3711 ++currentBlock;
3712 }
3713
3714 break; // Found the used bit; break out to FoundUsed.
3715 }
3716
3717 // Keep looking at the next word
3718 currentBlock += kBitsPerWord;
3719 ++currentWord;
3720 --wordsLeft;
3721
3722 // If we found at least maxBlocks, we can quit early.
3723 if ((currentBlock - firstBlock) >= maxBlocks)
3724 break;
3725 }
3726
3727 FoundUsed:
3728 // Make sure we didn't run out of bitmap looking for a used block.
3729 // If so, pin to the end of the bitmap.
3730 if (currentBlock > endingBlock)
3731 currentBlock = endingBlock;
3732
3733 // Figure out how many contiguous free blocks there were.
3734 // Pin the answer to maxBlocks.
3735 foundBlocks = currentBlock - firstBlock;
3736 if (foundBlocks > maxBlocks)
3737 foundBlocks = maxBlocks;
3738
3739 if (remaining) {
3740 if (foundBlocks > remaining) {
3741 hfs_debug("hfs: found more blocks than are indicated free!\n");
3742 remaining = UINT32_MAX;
3743 } else
3744 remaining -= foundBlocks;
3745 }
3746
3747 if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
3748 if (foundBlocks > best.blockCount) {
3749 best.startBlock = firstBlock;
3750 best.blockCount = foundBlocks;
3751 }
3752
3753 if (foundBlocks >= maxBlocks || best.blockCount >= remaining)
3754 break;
3755
3756 /*
3757 * Note that we will go ahead and add this free extent to our
3758 * cache below but that's OK because we'll remove it again if we
3759 * decide to use this extent.
3760 */
3761 } else if (foundBlocks >= minBlocks)
3762 break; // Found what we needed!
3763
3764 /*
3765 * We did not find the total blocks we were looking for, but
3766 * add this free block run to our free extent cache list, if possible.
3767 */
3768
3769 // If we're ignoring tentative ranges, we need to account for them here
3770 if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
3771 struct rl_entry free_extent = rl_make(firstBlock, firstBlock + foundBlocks - 1);
3772 struct rl_entry *range;;
3773 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS], rl_link) {
3774 rl_subtract(&free_extent, range);
3775 if (rl_len(range) == 0)
3776 break;
3777 }
3778 firstBlock = free_extent.rl_start;
3779 foundBlocks = rl_len(&free_extent);
3780 }
3781
3782 if (foundBlocks) {
3783 if (hfsmp->jnl == NULL) {
3784 /* If there is no journal, go ahead and add to the free ext cache. */
3785 updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
3786 }
3787 else {
3788 /*
3789 * If journaled, only add to the free extent cache if this block is not
3790 * waiting for a TRIM to complete; that implies that the transaction that freed it
3791 * has not yet been committed to stable storage.
3792 */
3793 int recently_deleted = 0;
3794 uint32_t nextblock;
3795 err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock,
3796 (uint64_t)foundBlocks, &recently_deleted, &nextblock);
3797 if ((err) || (recently_deleted == 0)) {
3798 /* if we hit an error, or the blocks not recently freed, go ahead and insert it */
3799 updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
3800 }
3801 }
3802 }
3803 } while (currentBlock < stopBlock);
3804 LoopExit:
3805
3806 if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
3807 firstBlock = best.startBlock;
3808 foundBlocks = best.blockCount;
3809 }
3810
3811 // Return the outputs.
3812 if (foundBlocks < minBlocks)
3813 {
3814 DiskFull:
3815 err = dskFulErr;
3816 ErrorExit:
3817 *actualStartBlock = 0;
3818 *actualNumBlocks = 0;
3819 }
3820 else
3821 {
3822 err = noErr;
3823 *actualStartBlock = firstBlock;
3824 *actualNumBlocks = foundBlocks;
3825 /*
3826 * Sanity check for overflow
3827 */
3828 if ((firstBlock + foundBlocks) > vcb->allocLimit) {
3829 panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
3830 vcb->vcbVN, startingBlock, endingBlock, currentBlock,
3831 firstBlock, stopBlock, minBlocks, foundBlocks);
3832 }
3833 }
3834
3835 if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
3836 int i;
3837 u_int32_t min_start = vcb->totalBlocks;
3838
3839 // set the nextAllocation pointer to the smallest free block number
3840 // we've seen so on the next mount we won't rescan unnecessarily
3841 lck_spin_lock(&vcb->vcbFreeExtLock);
3842 for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
3843 if (vcb->vcbFreeExt[i].startBlock < min_start) {
3844 min_start = vcb->vcbFreeExt[i].startBlock;
3845 }
3846 }
3847 lck_spin_unlock(&vcb->vcbFreeExtLock);
3848 if (min_start != vcb->totalBlocks) {
3849 if (min_start < vcb->nextAllocation) {
3850 vcb->nextAllocation = min_start;
3851 }
3852 if (min_start < vcb->sparseAllocation) {
3853 vcb->sparseAllocation = min_start;
3854 }
3855 }
3856 }
3857
3858 if (buffer)
3859 (void) ReleaseBitmapBlock(vcb, blockRef, false);
3860
3861 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
3862 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
3863
3864 return err;
3865 }
3866
3867
3868 /*
3869 * Count number of bits set in the given 32-bit unsigned number
3870 *
3871 * Returns:
3872 * Number of bits set
3873 */
3874 static int num_bits_set(u_int32_t num)
3875 {
3876 int count;
3877
3878 for (count = 0; num; count++) {
3879 num &= num - 1;
3880 }
3881
3882 return count;
3883 }
3884
3885 /*
3886 * For a given range of blocks, find the total number of blocks
3887 * allocated. If 'stop_on_first' is true, it stops as soon as it
3888 * encounters the first allocated block. This option is useful
3889 * to determine if any block is allocated or not.
3890 *
3891 * Inputs:
3892 * startingBlock First allocation block number of the range to be scanned.
3893 * numBlocks Total number of blocks that need to be scanned.
3894 * stop_on_first Stop the search after the first allocated block is found.
3895 *
3896 * Output:
3897 * allocCount Total number of allocation blocks allocated in the given range.
3898 *
3899 * On error, it is the number of allocated blocks found
3900 * before the function got an error.
3901 *
3902 * If 'stop_on_first' is set,
3903 * allocCount = 1 if any allocated block was found.
3904 * allocCount = 0 if no allocated block was found.
3905 *
3906 * Returns:
3907 * 0 on success, non-zero on failure.
3908 */
3909 static int
3910 hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock,
3911 u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
3912 {
3913 u_int32_t *currentWord; // Pointer to current word within bitmap block
3914 u_int32_t wordsLeft; // Number of words left in this bitmap block
3915 u_int32_t bitMask; // Word with given bits already set (ready to test)
3916 u_int32_t firstBit; // Bit index within word of first bit to allocate
3917 u_int32_t numBits; // Number of bits in word to allocate
3918 u_int32_t *buffer = NULL;
3919 uintptr_t blockRef;
3920 u_int32_t bitsPerBlock;
3921 u_int32_t wordsPerBlock;
3922 u_int32_t blockCount = 0;
3923 int error;
3924
3925 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3926 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0);
3927
3928 /*
3929 * Pre-read the bitmap block containing the first word of allocation
3930 */
3931 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
3932 HFS_ALLOC_IGNORE_TENTATIVE);
3933 if (error)
3934 goto JustReturn;
3935
3936 /*
3937 * Initialize currentWord, and wordsLeft.
3938 */
3939 {
3940 u_int32_t wordIndexInBlock;
3941
3942 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
3943 wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
3944
3945 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3946 currentWord = buffer + wordIndexInBlock;
3947 wordsLeft = wordsPerBlock - wordIndexInBlock;
3948 }
3949
3950 /*
3951 * First test any non word aligned bits.
3952 */
3953 firstBit = startingBlock % kBitsPerWord;
3954 if (firstBit != 0) {
3955 bitMask = kAllBitsSetInWord >> firstBit;
3956 numBits = kBitsPerWord - firstBit;
3957 if (numBits > numBlocks) {
3958 numBits = numBlocks;
3959 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
3960 }
3961 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3962 if (stop_on_first) {
3963 blockCount = 1;
3964 goto Exit;
3965 }
3966 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3967 }
3968 numBlocks -= numBits;
3969 ++currentWord;
3970 --wordsLeft;
3971 }
3972
3973 /*
3974 * Test whole words (32 blocks) at a time.
3975 */
3976 while (numBlocks >= kBitsPerWord) {
3977 if (wordsLeft == 0) {
3978 /* Read in the next bitmap block. */
3979 startingBlock += bitsPerBlock;
3980
3981 buffer = NULL;
3982 error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3983 if (error) goto Exit;
3984
3985 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
3986 HFS_ALLOC_IGNORE_TENTATIVE);
3987 if (error) goto Exit;
3988
3989 /* Readjust currentWord and wordsLeft. */
3990 currentWord = buffer;
3991 wordsLeft = wordsPerBlock;
3992 }
3993 if (*currentWord != 0) {
3994 if (stop_on_first) {
3995 blockCount = 1;
3996 goto Exit;
3997 }
3998 blockCount += num_bits_set(*currentWord);
3999 }
4000 numBlocks -= kBitsPerWord;
4001 ++currentWord;
4002 --wordsLeft;
4003 }
4004
4005 /*
4006 * Test any remaining blocks.
4007 */
4008 if (numBlocks != 0) {
4009 bitMask = ~(kAllBitsSetInWord >> numBlocks);
4010 if (wordsLeft == 0) {
4011 /* Read in the next bitmap block */
4012 startingBlock += bitsPerBlock;
4013
4014 buffer = NULL;
4015 error = ReleaseBitmapBlock(hfsmp, blockRef, false);
4016 if (error) goto Exit;
4017
4018 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
4019 HFS_ALLOC_IGNORE_TENTATIVE);
4020 if (error) goto Exit;
4021
4022 currentWord = buffer;
4023 }
4024 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
4025 if (stop_on_first) {
4026 blockCount = 1;
4027 goto Exit;
4028 }
4029 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
4030 }
4031 }
4032 Exit:
4033 if (buffer) {
4034 (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
4035 }
4036 if (allocCount) {
4037 *allocCount = blockCount;
4038 }
4039
4040 JustReturn:
4041 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
4042 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0);
4043
4044 return (error);
4045 }
4046
4047 /*
4048 * Count total number of blocks that are allocated in the given
4049 * range from the bitmap. This is used to preflight total blocks
4050 * that need to be relocated during volume resize.
4051 *
4052 * The journal or allocation file lock must be held.
4053 *
4054 * Returns:
4055 * 0 on success, non-zero on failure.
4056 * On failure, allocCount is zero.
4057 */
4058 int
4059 hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
4060 u_int32_t numBlocks, u_int32_t *allocCount)
4061 {
4062 return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
4063 }
4064
4065 /*
4066 * Test to see if any blocks in a range are allocated.
4067 *
4068 * Note: On error, this function returns 1, which means that
4069 * one or more blocks in the range are allocated. This function
4070 * is primarily used for volume resize and we do not want
4071 * to report to the caller that the blocks are free when we
4072 * were not able to deterministically find it out. So on error,
4073 * we always report that the blocks are allocated.
4074 *
4075 * The journal or allocation file lock must be held.
4076 *
4077 * Returns
4078 * 0 if all blocks in the range are free.
4079 * 1 if blocks in the range are allocated, or there was an error.
4080 */
4081 int
4082 hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
4083 {
4084 int error;
4085 u_int32_t allocCount;
4086
4087 error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
4088 if (error) {
4089 /* On error, we always say that the blocks are allocated
4090 * so that volume resize does not return false success.
4091 */
4092 return 1;
4093 } else {
4094 /* The function was deterministically able to find out
4095 * if there was any block allocated or not. In that case,
4096 * the value in allocCount is good enough to be returned
4097 * back to the caller.
4098 */
4099 return allocCount;
4100 }
4101 }
4102
4103 /*
4104 * CONFIG_HFS_RBTREE
4105 * Check to see if the red-black tree is live. Allocation file lock must be held
4106 * shared or exclusive to call this function. Note that we may call this even if
4107 * HFS is built without activating the red-black tree code.
4108 */
4109 int
4110 hfs_isrbtree_active(struct hfsmount *hfsmp){
4111
4112 #pragma unused (hfsmp)
4113
4114 /* Just return 0 for now */
4115 return 0;
4116 }
4117
4118
4119
4120 /* Summary Table Functions */
4121 /*
4122 * hfs_check_summary:
4123 *
4124 * This function should be used to query the summary table to see if we can
4125 * bypass a bitmap block or not when we're trying to find a free allocation block.
4126 *
4127 *
4128 * Inputs:
4129 * allocblock - allocation block number. Will be used to infer the correct summary bit.
4130 * hfsmp -- filesystem in question.
4131 *
4132 * Output Arg:
4133 * *freeblocks - set to 1 if we believe at least one free blocks in this vcbVBMIOSize
4134 * page of bitmap file.
4135 *
4136 *
4137 * Returns:
4138 * 0 on success
4139 * EINVAL on error
4140 *
4141 */
4142
4143 static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t allocblock, uint32_t *freeblocks) {
4144
4145 int err = EINVAL;
4146 if (hfsmp->vcbVBMIOSize) {
4147 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4148 uint32_t index;
4149 if (hfs_get_summary_index (hfsmp, allocblock, &index)) {
4150 *freeblocks = 0;
4151 return EINVAL;
4152 }
4153
4154 /* Ok, now that we have the bit index into the array, what byte is it in ? */
4155 uint32_t byteindex = index / kBitsPerByte;
4156 uint8_t current_byte = hfsmp->hfs_summary_table[byteindex];
4157 uint8_t bit_in_byte = index % kBitsPerByte;
4158
4159 if (current_byte & (1 << bit_in_byte)) {
4160 /*
4161 * We do not believe there is anything free in the
4162 * entire vcbVBMIOSize'd block.
4163 */
4164 *freeblocks = 0;
4165 }
4166 else {
4167 /* Looks like there might be a free block here... */
4168 *freeblocks = 1;
4169 }
4170 }
4171 err = 0;
4172 }
4173
4174 return err;
4175 }
4176
4177
4178 #if 0
4179 /*
4180 * hfs_get_next_summary
4181 *
4182 * From a given allocation block, jump to the allocation block at the start of the
4183 * next vcbVBMIOSize boundary. This is useful when trying to quickly skip over
4184 * large swaths of bitmap once we have determined that the bitmap is relatively full.
4185 *
4186 * Inputs: hfsmount, starting allocation block number
4187 * Output Arg: *newblock will contain the allocation block number to start
4188 * querying.
4189 *
4190 * Returns:
4191 * 0 on success
4192 * EINVAL if the block argument is too large to be used, or the summary table not live.
4193 * EFBIG if there are no more summary bits to be queried
4194 */
4195 static int
4196 hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
4197
4198 u_int32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4199 u_int32_t start_offset;
4200 u_int32_t next_offset;
4201 int err = EINVAL;
4202
4203 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4204 if ((err = hfs_get_summary_index(hfsmp, block, &start_offset))) {
4205 return err;
4206 }
4207
4208 next_offset = start_offset++;
4209
4210 if ((start_offset >= hfsmp->hfs_summary_size) || (next_offset >= hfsmp->hfs_summary_size)) {
4211 /* Can't jump to the next summary bit. */
4212 return EINVAL;
4213 }
4214
4215 /* Otherwise, compute and return */
4216 *newblock = next_offset * bits_per_iosize;
4217 if (*newblock >= hfsmp->totalBlocks) {
4218 return EINVAL;
4219 }
4220 err = 0;
4221 }
4222
4223 return err;
4224 }
4225
4226 #endif
4227
4228 /*
4229 * hfs_release_summary
4230 *
4231 * Given an extent that is about to be de-allocated on-disk, determine the number
4232 * of summary bitmap bits that need to be marked as 'potentially available'.
4233 * Then go ahead and mark them as free.
4234 *
4235 * Inputs:
4236 * hfsmp - hfs mount
4237 * block - starting allocation block.
4238 * length - length of the extent.
4239 *
4240 * Returns:
4241 * EINVAL upon any errors.
4242 */
4243 static int hfs_release_summary(struct hfsmount *hfsmp, uint32_t start_blk, uint32_t length) {
4244 int err = EINVAL;
4245 uint32_t end_blk = (start_blk + length) - 1;
4246
4247 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4248 /* Figure out what the starting / ending block's summary bits are */
4249 uint32_t start_bit;
4250 uint32_t end_bit;
4251 uint32_t current_bit;
4252
4253 err = hfs_get_summary_index (hfsmp, start_blk, &start_bit);
4254 if (err) {
4255 goto release_err;
4256 }
4257 err = hfs_get_summary_index (hfsmp, end_blk, &end_bit);
4258 if (err) {
4259 goto release_err;
4260 }
4261
4262 if (ALLOC_DEBUG) {
4263 if (start_bit > end_bit) {
4264 panic ("HFS: start > end!, %d %d ", start_bit, end_bit);
4265 }
4266 }
4267 current_bit = start_bit;
4268 while (current_bit <= end_bit) {
4269 err = hfs_set_summary (hfsmp, current_bit, 0);
4270 current_bit++;
4271 }
4272 }
4273
4274 release_err:
4275 return err;
4276 }
4277
4278 /*
4279 * hfs_find_summary_free
4280 *
4281 * Given a allocation block as input, returns an allocation block number as output as a
4282 * suggestion for where to start scanning the bitmap in order to find free blocks. It will
4283 * determine the vcbVBMIOsize of the input allocation block, convert that into a summary
4284 * bit, then keep iterating over the summary bits in order to find the first free one.
4285 *
4286 * Inputs:
4287 * hfsmp - hfs mount
4288 * block - starting allocation block
4289 * newblock - output block as suggestion
4290 *
4291 * Returns:
4292 * 0 on success
4293 * ENOSPC if we could not find a free block
4294 */
4295
4296 int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
4297
4298 int err = ENOSPC;
4299 uint32_t bit_index = 0;
4300 uint32_t maybe_has_blocks = 0;
4301
4302 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4303 uint32_t byte_index;
4304 uint8_t curbyte;
4305 uint8_t bit_in_byte;
4306 uint32_t summary_cap;
4307
4308 /*
4309 * We generate a cap for the summary search because the summary table
4310 * always represents a full summary of the bitmap FILE, which may
4311 * be way more bits than are necessary for the actual filesystem
4312 * whose allocations are mapped by the bitmap.
4313 *
4314 * Compute how much of hfs_summary_size is useable for the given number
4315 * of allocation blocks eligible on this FS.
4316 */
4317 err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit - 1, &summary_cap);
4318 if (err) {
4319 goto summary_exit;
4320 }
4321
4322 /* Check the starting block first */
4323 err = hfs_check_summary (hfsmp, block, &maybe_has_blocks);
4324 if (err) {
4325 goto summary_exit;
4326 }
4327
4328 if (maybe_has_blocks) {
4329 /*
4330 * It looks like the initial start block could have something.
4331 * Short-circuit and just use that.
4332 */
4333 *newblock = block;
4334 goto summary_exit;
4335 }
4336
4337 /*
4338 * OK, now we know that the first block was useless.
4339 * Get the starting summary bit, and find it in the array
4340 */
4341 maybe_has_blocks = 0;
4342 err = hfs_get_summary_index (hfsmp, block, &bit_index);
4343 if (err) {
4344 goto summary_exit;
4345 }
4346
4347 /* Iterate until we find something. */
4348 while (bit_index <= summary_cap) {
4349 byte_index = bit_index / kBitsPerByte;
4350 curbyte = hfsmp->hfs_summary_table[byte_index];
4351 bit_in_byte = bit_index % kBitsPerByte;
4352
4353 if (curbyte & (1 << bit_in_byte)) {
4354 /* nothing here. increment and move on */
4355 bit_index++;
4356 }
4357 else {
4358 /*
4359 * found something! convert bit_index back into
4360 * an allocation block for use. 'newblock' will now
4361 * contain the proper allocation block # based on the bit
4362 * index.
4363 */
4364 err = hfs_get_summary_allocblock (hfsmp, bit_index, newblock);
4365 if (err) {
4366 goto summary_exit;
4367 }
4368 maybe_has_blocks = 1;
4369 break;
4370 }
4371 }
4372
4373 /* If our loop didn't find anything, set err to ENOSPC */
4374 if (maybe_has_blocks == 0) {
4375 err = ENOSPC;
4376 }
4377 }
4378
4379 /* If the summary table is not active for this mount, we'll just return ENOSPC */
4380 summary_exit:
4381 if (maybe_has_blocks) {
4382 err = 0;
4383 }
4384
4385 return err;
4386 }
4387
4388 /*
4389 * hfs_get_summary_allocblock
4390 *
4391 * Convert a summary bit into an allocation block number to use to start searching for free blocks.
4392 *
4393 * Inputs:
4394 * hfsmp - hfs mount
4395 * summarybit - summmary bit index
4396 * *alloc - allocation block number in the bitmap file.
4397 *
4398 * Output:
4399 * 0 on success
4400 * EINVAL on failure
4401 */
4402 int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t
4403 summarybit, uint32_t *alloc) {
4404 uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4405 uint32_t allocblk;
4406
4407 allocblk = summarybit * bits_per_iosize;
4408
4409 if (allocblk >= hfsmp->totalBlocks) {
4410 return EINVAL;
4411 }
4412 else {
4413 *alloc = allocblk;
4414 }
4415
4416 return 0;
4417 }
4418
4419
4420 /*
4421 * hfs_set_summary:
4422 *
4423 * This function should be used to manipulate the summary table
4424 *
4425 * The argument 'inuse' will set the value of the bit in question to one or zero
4426 * depending on its value.
4427 *
4428 * Inputs:
4429 * hfsmp - hfs mount
4430 * summarybit - the bit index into the summary table to set/unset.
4431 * inuse - the value to assign to the bit.
4432 *
4433 * Returns:
4434 * 0 on success
4435 * EINVAL on error
4436 *
4437 */
4438
4439 static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse) {
4440
4441 int err = EINVAL;
4442 if (hfsmp->vcbVBMIOSize) {
4443 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4444
4445 if (ALLOC_DEBUG) {
4446 if (hfsmp->hfs_summary_table == NULL) {
4447 panic ("hfs_set_summary: no table for %p ", hfsmp);
4448 }
4449 }
4450
4451 /* Ok, now that we have the bit index into the array, what byte is it in ? */
4452 uint32_t byte_index = summarybit / kBitsPerByte;
4453 uint8_t current_byte = hfsmp->hfs_summary_table[byte_index];
4454 uint8_t bit_in_byte = summarybit % kBitsPerByte;
4455
4456 if (inuse) {
4457 current_byte = (current_byte | (1 << bit_in_byte));
4458 }
4459 else {
4460 current_byte = (current_byte & ~(1 << bit_in_byte));
4461 }
4462
4463 hfsmp->hfs_summary_table[byte_index] = current_byte;
4464 }
4465 err = 0;
4466 }
4467
4468 return err;
4469 }
4470
4471
4472 /*
4473 * hfs_get_summary_index:
4474 *
4475 * This is a helper function which determines what summary bit represents the vcbVBMIOSize worth
4476 * of IO against the bitmap file.
4477 *
4478 * Returns:
4479 * 0 on success
4480 * EINVAL on failure
4481 */
4482 static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t* index) {
4483 uint32_t summary_bit;
4484 uint32_t bits_per_iosize;
4485 int err = EINVAL;
4486
4487 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4488 /* Is the input block bigger than the total number of blocks? */
4489 if (block >= hfsmp->totalBlocks) {
4490 return EINVAL;
4491 }
4492
4493 /* Is there even a vbmIOSize set? */
4494 if (hfsmp->vcbVBMIOSize == 0) {
4495 return EINVAL;
4496 }
4497
4498 bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4499
4500 summary_bit = block / bits_per_iosize;
4501
4502 *index = summary_bit;
4503 err = 0;
4504 }
4505
4506 return err;
4507 }
4508
4509 /*
4510 * hfs_init_summary
4511 *
4512 * From a given mount structure, compute how big the summary table should be for the given
4513 * filesystem, then allocate and bzero the memory.
4514 *
4515 * Returns:
4516 * 0 on success
4517 * EINVAL on failure
4518 */
4519 int
4520 hfs_init_summary (struct hfsmount *hfsmp) {
4521
4522 uint32_t summary_size;
4523 uint32_t summary_size_bytes;
4524 uint8_t *summary_table;
4525
4526 if (hfsmp->hfs_allocation_cp == NULL) {
4527 if (ALLOC_DEBUG) {
4528 printf("hfs: summary table cannot progress without a bitmap cnode! \n");
4529 }
4530 return EINVAL;
4531 }
4532 /*
4533 * The practical maximum size of the summary table is 16KB:
4534 *
4535 * (512MB maximum bitmap size / (4k -- min alloc block size)) / 8 bits/byte.
4536 *
4537 * HFS+ will allow filesystems with allocation block sizes smaller than 4k, but
4538 * the end result is that we'll start to issue I/O in 2k or 1k sized chunks, which makes
4539 * supporting this much worse. The math would instead look like this:
4540 * (512MB / 2k) / 8 == 32k.
4541 *
4542 * So, we will disallow the summary table if the allocation block size is < 4k.
4543 */
4544
4545 if (hfsmp->blockSize < HFS_MIN_SUMMARY_BLOCKSIZE) {
4546 printf("hfs: summary table not allowed on FS with block size of %d\n", hfsmp->blockSize);
4547 return EINVAL;
4548 }
4549
4550 summary_size = hfsmp->hfs_allocation_cp->c_blocks;
4551
4552 if (ALLOC_DEBUG) {
4553 printf("HFS Summary Table Initialization: Bitmap %u blocks\n",
4554 hfsmp->hfs_allocation_cp->c_blocks);
4555 }
4556
4557 /*
4558 * If the bitmap IO size is not the same as the allocation block size then
4559 * then re-compute the number of summary bits necessary. Note that above, the
4560 * the default size is the number of allocation blocks in the bitmap *FILE*
4561 * (not the number of bits in the bitmap itself). If the allocation block size
4562 * is large enough though, we may need to increase this.
4563 */
4564 if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
4565 uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
4566 lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
4567
4568 /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
4569 summary_size = (uint32_t) lrg_size;
4570 }
4571
4572 /*
4573 * If the block size is the same as the IO Size, then the total number of blocks
4574 * is already equal to the number of IO units, which is our number of summary bits.
4575 */
4576
4577 summary_size_bytes = summary_size / kBitsPerByte;
4578 /* Always add one byte, just in case we have a dangling number of bits */
4579 summary_size_bytes++;
4580
4581 if (ALLOC_DEBUG) {
4582 printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, summary_size);
4583 printf("HFS Summary Table Size (in bytes) %d \n", summary_size_bytes);
4584 }
4585
4586 /* Store the field in the mount point */
4587 hfsmp->hfs_summary_size = summary_size;
4588 hfsmp->hfs_summary_bytes = summary_size_bytes;
4589
4590 summary_table = hfs_mallocz(summary_size_bytes);
4591
4592 /* enable the summary table */
4593 hfsmp->hfs_flags |= HFS_SUMMARY_TABLE;
4594 hfsmp->hfs_summary_table = summary_table;
4595
4596 if (ALLOC_DEBUG) {
4597 if (hfsmp->hfs_summary_table == NULL) {
4598 panic ("HFS Summary Init: no table for %p\n", hfsmp);
4599 }
4600 }
4601 return 0;
4602 }
4603
4604 /*
4605 * hfs_rebuild_summary
4606 *
4607 * This function should be used to allocate a new hunk of memory for use as a summary
4608 * table, then copy the existing data into it. We use it whenever the filesystem's size
4609 * changes. When a resize is in progress, you can still use the extant summary
4610 * table if it is active.
4611 *
4612 * Inputs:
4613 * hfsmp -- FS in question
4614 * newlength -- new length of the FS in allocation blocks.
4615 *
4616 * Outputs:
4617 * 0 on success, EINVAL on failure. If this function fails, the summary table
4618 * will be disabled for future use.
4619 *
4620 */
4621 static int hfs_rebuild_summary (struct hfsmount *hfsmp) {
4622
4623 uint32_t new_summary_size;
4624
4625 new_summary_size = hfsmp->hfs_allocation_cp->c_blocks;
4626
4627
4628 if (ALLOC_DEBUG) {
4629 printf("HFS Summary Table Re-init: bitmap %u blocks\n", new_summary_size);
4630 }
4631
4632 /*
4633 * If the bitmap IO size is not the same as the allocation block size, then re-compute
4634 * the number of summary bits necessary. Note that above, the default size is the number
4635 * of allocation blocks in the bitmap *FILE* (not the number of bits that the bitmap manages).
4636 * If the allocation block size is large enough though, we may need to increase this, as
4637 * bitmap IO is capped at 64k per IO
4638 */
4639 if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
4640 uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
4641 lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
4642
4643 /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
4644 new_summary_size = (uint32_t) lrg_size;
4645 }
4646
4647 /*
4648 * Ok, we have the new summary bitmap theoretical max size. See if it's the same as
4649 * what we've got already...
4650 */
4651 if (new_summary_size != hfsmp->hfs_summary_size) {
4652 uint32_t summarybytes = new_summary_size / kBitsPerByte;
4653 uint32_t copysize;
4654 uint8_t *newtable;
4655 /* Add one byte for slop */
4656 summarybytes++;
4657
4658 if (ALLOC_DEBUG) {
4659 printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, new_summary_size);
4660 printf("HFS Summary Table Size (in bytes) %d \n", summarybytes);
4661 }
4662
4663 newtable = hfs_mallocz(summarybytes);
4664
4665 /*
4666 * The new table may be smaller than the old one. If this is true, then
4667 * we can't copy the full size of the existing summary table into the new
4668 * one.
4669 *
4670 * The converse is not an issue since we bzeroed the table above.
4671 */
4672 copysize = hfsmp->hfs_summary_bytes;
4673 if (summarybytes < hfsmp->hfs_summary_bytes) {
4674 copysize = summarybytes;
4675 }
4676 memcpy (newtable, hfsmp->hfs_summary_table, copysize);
4677
4678 /* We're all good. Destroy the old copy and update ptrs */
4679 hfs_free(hfsmp->hfs_summary_table, hfsmp->hfs_summary_bytes);
4680
4681 hfsmp->hfs_summary_table = newtable;
4682 hfsmp->hfs_summary_size = new_summary_size;
4683 hfsmp->hfs_summary_bytes = summarybytes;
4684 }
4685
4686 return 0;
4687 }
4688
4689
4690 #if ALLOC_DEBUG
4691 /*
4692 * hfs_validate_summary
4693 *
4694 * Validation routine for the summary table. Debug-only function.
4695 *
4696 * Bitmap lock must be held.
4697 *
4698 */
4699 void hfs_validate_summary (struct hfsmount *hfsmp) {
4700 uint32_t i;
4701 int err;
4702
4703 /*
4704 * Iterate over all of the bits in the summary table, and verify if
4705 * there really are free blocks in the pages that we believe may
4706 * may contain free blocks.
4707 */
4708
4709 if (hfsmp->hfs_summary_table == NULL) {
4710 panic ("HFS Summary: No HFS summary table!");
4711 }
4712
4713 /* 131072 bits == 16384 bytes. This is the theoretical max size of the summary table. we add 1 byte for slop */
4714 if (hfsmp->hfs_summary_size == 0 || hfsmp->hfs_summary_size > 131080) {
4715 panic("HFS Summary: Size is bad! %d", hfsmp->hfs_summary_size);
4716 }
4717
4718 if (hfsmp->vcbVBMIOSize == 0) {
4719 panic("HFS Summary: no VCB VBM IO Size !");
4720 }
4721
4722 printf("hfs: summary validation beginning on %s\n", hfsmp->vcbVN);
4723 printf("hfs: summary validation %d summary bits, %d summary blocks\n", hfsmp->hfs_summary_size, hfsmp->totalBlocks);
4724
4725
4726 /* iterate through all possible summary bits */
4727 for (i = 0; i < hfsmp->hfs_summary_size ; i++) {
4728
4729 uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4730 uint32_t byte_offset = hfsmp->vcbVBMIOSize * i;
4731
4732 /* Compute the corresponding allocation block for the summary bit. */
4733 uint32_t alloc_block = i * bits_per_iosize;
4734
4735 /*
4736 * We use a uint32_t pointer here because it will speed up
4737 * access to the real bitmap data on disk.
4738 */
4739 uint32_t *block_data;
4740 struct buf *bp;
4741 int counter;
4742 int counter_max;
4743 int saw_free_bits = 0;
4744
4745 /* Get the block */
4746 if ((err = ReadBitmapRange (hfsmp, byte_offset, hfsmp->vcbVBMIOSize, &block_data, &bp))) {
4747 panic ("HFS Summary: error (%d) in ReadBitmapRange!", err);
4748 }
4749
4750 /* Query the status of the bit and then make sure we match */
4751 uint32_t maybe_has_free_blocks;
4752 err = hfs_check_summary (hfsmp, alloc_block, &maybe_has_free_blocks);
4753 if (err) {
4754 panic ("HFS Summary: hfs_check_summary returned error (%d) ", err);
4755 }
4756 counter_max = hfsmp->vcbVBMIOSize / kBytesPerWord;
4757
4758 for (counter = 0; counter < counter_max; counter++) {
4759 uint32_t word = block_data[counter];
4760
4761 /* We assume that we'll not find any free bits here. */
4762 if (word != kAllBitsSetInWord) {
4763 if (maybe_has_free_blocks) {
4764 /* All done */
4765 saw_free_bits = 1;
4766 break;
4767 }
4768 else {
4769 panic ("HFS Summary: hfs_check_summary saw free bits!");
4770 }
4771 }
4772 }
4773
4774 if (maybe_has_free_blocks && (saw_free_bits == 0)) {
4775 panic ("HFS Summary: did not see free bits !");
4776 }
4777
4778 /* Release the block. */
4779 if ((err = ReleaseScanBitmapRange (bp))) {
4780 panic ("HFS Summary: Error (%d) in ReleaseScanBitmapRange", err);
4781 }
4782 }
4783
4784 printf("hfs: summary validation completed successfully on %s\n", hfsmp->vcbVN);
4785
4786 return;
4787 }
4788 #endif
4789
4790 /*
4791 * hfs_alloc_scan_range:
4792 *
4793 * This function should be used to scan large ranges of the allocation bitmap
4794 * at one time. It makes two key assumptions:
4795 *
4796 * 1) Bitmap lock is held during the duration of the call (exclusive)
4797 * 2) There are no pages in the buffer cache for any of the bitmap
4798 * blocks that we may encounter. It *MUST* be completely empty.
4799 *
4800 * The expected use case is when we are scanning the bitmap in full while we are
4801 * still mounting the filesystem in order to issue TRIMs or build up the summary
4802 * table for the mount point. It should be done after any potential journal replays
4803 * are completed and their I/Os fully issued.
4804 *
4805 * The key reason for assumption (2) above is that this function will try to issue
4806 * I/O against the bitmap file in chunks as large a possible -- essentially as
4807 * much as the buffer layer will handle (1MB). Because the size of these I/Os
4808 * is larger than what would be expected during normal runtime we must invalidate
4809 * the buffers as soon as we are done with them so that they do not persist in
4810 * the buffer cache for other threads to find, as they'll typically be doing
4811 * allocation-block size I/Os instead.
4812 *
4813 * Input Args:
4814 * hfsmp - hfs mount data structure
4815 * startbit - allocation block # to start our scan. It must be aligned
4816 * on a vcbVBMIOsize boundary.
4817 * list - journal trim list data structure for issuing TRIMs
4818 *
4819 * Output Args:
4820 * bitToScan - Return the next bit to scan if this function is called again.
4821 * Caller will supply this into the next invocation
4822 * of this call as 'startbit'.
4823 */
4824
4825 static int hfs_alloc_scan_range(struct hfsmount *hfsmp, u_int32_t startbit,
4826 u_int32_t *bitToScan, struct jnl_trim_list *list) {
4827
4828 int error;
4829 int readwrite = 1;
4830 u_int32_t curAllocBlock;
4831 struct buf *blockRef = NULL;
4832 u_int32_t *buffer = NULL;
4833 u_int32_t free_offset = 0; //tracks the start of the current free range
4834 u_int32_t size = 0; // tracks the length of the current free range.
4835 u_int32_t iosize = 0; //how much io we should generate against the bitmap
4836 u_int32_t byte_off; // byte offset into the bitmap file.
4837 u_int32_t completed_size; // how much io was actually completed
4838 u_int32_t last_bitmap_block;
4839 u_int32_t current_word;
4840 u_int32_t word_index = 0;
4841
4842 /* summary table building */
4843 uint32_t summary_bit = 0;
4844 uint32_t saw_free_blocks = 0;
4845 uint32_t last_marked = 0;
4846
4847 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
4848 readwrite = 0;
4849 }
4850
4851 /*
4852 * Compute how much I/O we should generate here.
4853 * hfs_scan_range_size will validate that the start bit
4854 * converted into a byte offset into the bitmap file,
4855 * is aligned on a VBMIOSize boundary.
4856 */
4857 error = hfs_scan_range_size (hfsmp, startbit, &iosize);
4858 if (error) {
4859 if (ALLOC_DEBUG) {
4860 panic ("hfs_alloc_scan_range: hfs_scan_range_size error %d\n", error);
4861 }
4862 return error;
4863 }
4864
4865 if (iosize < hfsmp->vcbVBMIOSize) {
4866 if (ALLOC_DEBUG) {
4867 panic ("hfs_alloc_scan_range: iosize too small! (iosize %d)\n", iosize);
4868 }
4869 return EINVAL;
4870 }
4871
4872 /* hfs_scan_range_size should have verified startbit. Convert it to bytes */
4873 byte_off = startbit / kBitsPerByte;
4874
4875 /*
4876 * When the journal replays blocks, it does so by writing directly to the disk
4877 * device (bypassing any filesystem vnodes and such). When it finishes its I/Os
4878 * it also immediately re-reads and invalidates the range covered by the bp so
4879 * it does not leave anything lingering in the cache (for iosize reasons).
4880 *
4881 * As such, it is safe to do large I/Os here with ReadBitmapRange.
4882 *
4883 * NOTE: It is not recommended, but it is possible to call the function below
4884 * on sections of the bitmap that may be in core already as long as the pages are not
4885 * dirty. In that case, we'd notice that something starting at that
4886 * logical block of the bitmap exists in the metadata cache, and we'd check
4887 * if the iosize requested is the same as what was already allocated for it.
4888 * Odds are pretty good we're going to request something larger. In that case,
4889 * we just free the existing memory associated with the buf and reallocate a
4890 * larger range. This function should immediately invalidate it as soon as we're
4891 * done scanning, so this shouldn't cause any coherency issues.
4892 */
4893
4894 error = ReadBitmapRange(hfsmp, byte_off, iosize, &buffer, &blockRef);
4895 if (error) {
4896 if (ALLOC_DEBUG) {
4897 panic ("hfs_alloc_scan_range: start %d iosize %d ReadBitmapRange error %d\n", startbit, iosize, error);
4898 }
4899 return error;
4900 }
4901
4902 /*
4903 * At this point, we have a giant wired buffer that represents some portion of
4904 * the bitmap file that we want to analyze. We may not have gotten all 'iosize'
4905 * bytes though, so clip our ending bit to what we actually read in.
4906 */
4907 completed_size = buf_count(blockRef);
4908 last_bitmap_block = completed_size * kBitsPerByte;
4909 last_bitmap_block = last_bitmap_block + startbit;
4910
4911 /* Cap the last block to the total number of blocks if required */
4912 if (last_bitmap_block > hfsmp->totalBlocks) {
4913 last_bitmap_block = hfsmp->totalBlocks;
4914 }
4915
4916 /* curAllocBlock represents the logical block we're analyzing. */
4917 curAllocBlock = startbit;
4918 word_index = 0;
4919 size = 0;
4920
4921 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4922 if (hfs_get_summary_index (hfsmp, startbit, &summary_bit)) {
4923 error = EINVAL;
4924 if (ALLOC_DEBUG) {
4925 panic ("hfs_alloc_scan_range: Could not acquire summary index for %u", startbit);
4926 }
4927 return error;
4928 }
4929 /*
4930 * summary_bit should now be set to the summary bit corresponding to
4931 * the allocation block of the first bit that we're supposed to scan
4932 */
4933 }
4934 saw_free_blocks = 0;
4935
4936 while (curAllocBlock < last_bitmap_block) {
4937 u_int32_t bit;
4938
4939 /* Update the summary table as needed */
4940 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4941 if (ALLOC_DEBUG) {
4942 if (hfsmp->hfs_summary_table == NULL) {
4943 panic ("hfs_alloc_scan_range: no summary table!");
4944 }
4945 }
4946
4947 uint32_t temp_summary;
4948 error = hfs_get_summary_index (hfsmp, curAllocBlock, &temp_summary);
4949 if (error) {
4950 if (ALLOC_DEBUG) {
4951 panic ("hfs_alloc_scan_range: could not get summary index for %u", curAllocBlock);
4952 }
4953 return EINVAL;
4954 }
4955
4956 if (ALLOC_DEBUG) {
4957 if (temp_summary < summary_bit) {
4958 panic ("hfs_alloc_scan_range: backwards summary bit?\n");
4959 }
4960 }
4961
4962 /*
4963 * If temp_summary is greater than summary_bit, then this
4964 * means that the next allocation block crosses a vcbVBMIOSize boundary
4965 * and we should treat this range of on-disk data as part of a new summary
4966 * bit.
4967 */
4968 if (temp_summary > summary_bit) {
4969 if (saw_free_blocks == 0) {
4970 /* Mark the bit as totally consumed in the summary table */
4971 hfs_set_summary (hfsmp, summary_bit, 1);
4972 }
4973 else {
4974 /* Mark the bit as potentially free in summary table */
4975 hfs_set_summary (hfsmp, summary_bit, 0);
4976 }
4977 last_marked = summary_bit;
4978 /*
4979 * Any time we set the summary table, update our counter which tracks
4980 * what the last bit that was fully marked in the summary table.
4981 *
4982 * Then reset our marker which says we haven't seen a free bit yet.
4983 */
4984 saw_free_blocks = 0;
4985 summary_bit = temp_summary;
4986 }
4987 } /* End summary table conditions */
4988
4989 current_word = SWAP_BE32(buffer[word_index]);
4990 /* Iterate through the word 1 bit at a time... */
4991 for (bit = 0 ; bit < kBitsPerWord ; bit++, curAllocBlock++) {
4992 if (curAllocBlock >= last_bitmap_block) {
4993 break;
4994 }
4995 u_int32_t allocated = (current_word & (kHighBitInWordMask >> bit));
4996
4997 if (allocated) {
4998 if (size != 0) {
4999 if (readwrite) {
5000 /* Insert the previously tracked range of free blocks to the trim list */
5001 hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
5002 }
5003 add_free_extent_cache (hfsmp, free_offset, size);
5004 size = 0;
5005 free_offset = 0;
5006 }
5007 }
5008 else {
5009 /* Not allocated */
5010 size++;
5011 if (free_offset == 0) {
5012 /* Start a new run of free spcae at curAllocBlock */
5013 free_offset = curAllocBlock;
5014 }
5015 if (saw_free_blocks == 0) {
5016 saw_free_blocks = 1;
5017 }
5018 }
5019 } /* end for loop iterating through the word */
5020
5021 if (curAllocBlock < last_bitmap_block) {
5022 word_index++;
5023 }
5024
5025 } /* End while loop (iterates through last_bitmap_block) */
5026
5027
5028 /*
5029 * We've (potentially) completed our pass through this region of bitmap,
5030 * but one thing we may not have done is updated that last summary bit for
5031 * the last page we scanned, because we would have never transitioned across
5032 * a vcbVBMIOSize boundary again. Check for that and update the last bit
5033 * as needed.
5034 *
5035 * Note that 'last_bitmap_block' is *not* inclusive WRT the very last bit in the bitmap
5036 * for the region of bitmap on-disk that we were scanning. (it is one greater).
5037 */
5038 if ((curAllocBlock >= last_bitmap_block) &&
5039 (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
5040 uint32_t temp_summary;
5041 /* temp_block should be INSIDE the region we just scanned, so subtract 1 */
5042 uint32_t temp_block = last_bitmap_block - 1;
5043 error = hfs_get_summary_index (hfsmp, temp_block, &temp_summary);
5044 if (error) {
5045 if (ALLOC_DEBUG) {
5046 panic ("hfs_alloc_scan_range: end bit curAllocBlock %u, last_bitmap_block %u", curAllocBlock, last_bitmap_block);
5047 }
5048 return EINVAL;
5049 }
5050
5051 /* Did we already update this in the table? */
5052 if (temp_summary > last_marked) {
5053 if (saw_free_blocks == 0) {
5054 hfs_set_summary (hfsmp, temp_summary, 1);
5055 }
5056 else {
5057 hfs_set_summary (hfsmp, temp_summary, 0);
5058 }
5059 }
5060 }
5061
5062 /*
5063 * We may have been tracking a range of free blocks that hasn't been inserted yet.
5064 * Keep the logic for the TRIM and free extent separate from that of the summary
5065 * table management even though they are closely linked.
5066 */
5067 if (size != 0) {
5068 if (readwrite) {
5069 hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
5070 }
5071 add_free_extent_cache (hfsmp, free_offset, size);
5072 }
5073
5074 /*
5075 * curAllocBlock represents the next block we need to scan when we return
5076 * to this function.
5077 */
5078 *bitToScan = curAllocBlock;
5079 ReleaseScanBitmapRange(blockRef);
5080
5081 return 0;
5082
5083 }
5084
5085
5086
5087 /*
5088 * Compute the maximum I/O size to generate against the bitmap file
5089 * Will attempt to generate at LEAST VBMIOsize I/Os for interior ranges of the bitmap.
5090 *
5091 * Inputs:
5092 * hfsmp -- hfsmount to look at
5093 * bitmap_off -- bit offset into the bitmap file
5094 *
5095 * Outputs:
5096 * iosize -- iosize to generate.
5097 *
5098 * Returns:
5099 * 0 on success; EINVAL otherwise
5100 */
5101 static int hfs_scan_range_size (struct hfsmount *hfsmp, uint32_t bitmap_st, uint32_t *iosize) {
5102
5103 /*
5104 * The maximum bitmap size is 512MB regardless of ABN size, so we can get away
5105 * with 32 bit math in this function.
5106 */
5107
5108 uint32_t bitmap_len;
5109 uint32_t remaining_bitmap;
5110 uint32_t target_iosize;
5111 uint32_t bitmap_off;
5112
5113 /* Is this bit index not word aligned? If so, immediately fail. */
5114 if (bitmap_st % kBitsPerWord) {
5115 if (ALLOC_DEBUG) {
5116 panic ("hfs_scan_range_size unaligned start bit! bitmap_st %d \n", bitmap_st);
5117 }
5118 return EINVAL;
5119 }
5120
5121 /* bitmap_off is in bytes, not allocation blocks/bits */
5122 bitmap_off = bitmap_st / kBitsPerByte;
5123
5124 if ((hfsmp->totalBlocks <= bitmap_st) || (bitmap_off > (512 * 1024 * 1024))) {
5125 if (ALLOC_DEBUG) {
5126 panic ("hfs_scan_range_size: invalid start! bitmap_st %d, bitmap_off %d\n", bitmap_st, bitmap_off);
5127 }
5128 return EINVAL;
5129 }
5130
5131 /*
5132 * Also invalid if it's not at least aligned to HFS bitmap logical
5133 * block boundaries. We don't have to emit an iosize that's an
5134 * exact multiple of the VBMIOSize, but it must start on such
5135 * a boundary.
5136 *
5137 * The vcbVBMIOSize may be SMALLER than the allocation block size
5138 * on a FS with giant allocation blocks, but it will never be
5139 * greater than it, so it should be safe to start I/O
5140 * aligned on a VBMIOsize boundary.
5141 */
5142 if (bitmap_off & (hfsmp->vcbVBMIOSize - 1)) {
5143 if (ALLOC_DEBUG) {
5144 panic ("hfs_scan_range_size: unaligned start! bitmap_off %d\n", bitmap_off);
5145 }
5146 return EINVAL;
5147 }
5148
5149 /*
5150 * Generate the total bitmap file length in bytes, then round up
5151 * that value to the end of the last allocation block, if needed (It
5152 * will probably be needed). We won't scan past the last actual
5153 * allocation block.
5154 *
5155 * Unless we're completing the bitmap scan (or bitmap < 1MB), we
5156 * have to complete the I/O on VBMIOSize boundaries, but we can only read
5157 * up until the end of the bitmap file.
5158 */
5159 bitmap_len = roundup(hfsmp->totalBlocks, hfsmp->blockSize * 8) / 8;
5160
5161 remaining_bitmap = bitmap_len - bitmap_off;
5162
5163 /*
5164 * io size is the MIN of the maximum I/O we can generate or the
5165 * remaining amount of bitmap.
5166 */
5167 target_iosize = MIN((MAXBSIZE), remaining_bitmap);
5168 *iosize = target_iosize;
5169
5170 return 0;
5171 }
5172
5173
5174
5175
5176 /*
5177 * This function is basically the same as hfs_isallocated, except it's designed for
5178 * use with the red-black tree validation code. It assumes we're only checking whether
5179 * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
5180 * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
5181 *
5182 * This should not be called in general purpose scanning code.
5183 */
5184 int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
5185
5186 u_int32_t *currentWord; // Pointer to current word within bitmap block
5187 u_int32_t bitMask; // Word with given bits already set (ready to test)
5188 u_int32_t firstBit; // Bit index within word of first bit to allocate
5189 u_int32_t numBits; // Number of bits in word to allocate
5190 u_int32_t bitsPerBlock;
5191 uintptr_t blockRef = 0;
5192 u_int32_t numBlocks = 1;
5193 u_int32_t *buffer = NULL;
5194
5195 int inuse = 0;
5196 int error;
5197
5198
5199 if (bp_buf) {
5200 /* just use passed-in buffer if avail. */
5201 buffer = bp_buf;
5202 }
5203 else {
5204 /*
5205 * Pre-read the bitmap block containing the first word of allocation
5206 */
5207 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
5208 HFS_ALLOC_IGNORE_TENTATIVE);
5209 if (error)
5210 return (error);
5211 }
5212
5213 /*
5214 * Initialize currentWord, and wordsLeft.
5215 */
5216 u_int32_t wordIndexInBlock;
5217
5218 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
5219
5220 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
5221 currentWord = buffer + wordIndexInBlock;
5222
5223 /*
5224 * First test any non word aligned bits.
5225 */
5226 firstBit = startingBlock % kBitsPerWord;
5227 bitMask = kAllBitsSetInWord >> firstBit;
5228 numBits = kBitsPerWord - firstBit;
5229 if (numBits > numBlocks) {
5230 numBits = numBlocks;
5231 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
5232 }
5233 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
5234 inuse = 1;
5235 goto Exit;
5236 }
5237 ++currentWord;
5238
5239 Exit:
5240 if(bp_buf == NULL) {
5241 if (buffer) {
5242 (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
5243 }
5244 }
5245 return (inuse);
5246
5247
5248
5249 }
5250
5251 /*
5252 * This function resets all of the data structures relevant to the
5253 * free extent cache stored in the hfsmount struct.
5254 *
5255 * If we are using the red-black tree code then we need to account for the fact that
5256 * we may encounter situations where we need to jettison the tree. If that is the
5257 * case, then we fail-over to the bitmap scanning logic, but we need to ensure that
5258 * the free ext cache is zeroed before we start using it.
5259 *
5260 * We also reset and disable the cache when allocLimit is updated... which
5261 * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()).
5262 * It is independent of the type of allocator being used currently.
5263 */
5264 void ResetVCBFreeExtCache(struct hfsmount *hfsmp)
5265 {
5266 int bytes;
5267 void *freeExt;
5268
5269 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5270 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0);
5271
5272 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5273
5274 /* reset Free Extent Count */
5275 hfsmp->vcbFreeExtCnt = 0;
5276
5277 /* reset the actual array */
5278 bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor);
5279 freeExt = (void*)(hfsmp->vcbFreeExt);
5280
5281 bzero (freeExt, bytes);
5282
5283 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5284
5285 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5286 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0);
5287
5288 return;
5289 }
5290
5291 /*
5292 * This function is used to inform the allocator if we have to effectively shrink
5293 * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs.
5294 *
5295 * The bitmap lock must be held when calling this function. This function also modifies the
5296 * allocLimit field in the hfs mount point structure in the general case.
5297 *
5298 * In the shrinking case, we'll have to remove all free extents from the red-black
5299 * tree past the specified offset new_end_block. In the growth case, we'll have to force
5300 * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
5301 *
5302 * new_end_block represents the total number of blocks available for allocation in the resized
5303 * filesystem. Block #new_end_block should not be allocatable in the resized filesystem since it
5304 * will be out of the (0, n-1) range that are indexable in the bitmap.
5305 *
5306 * Returns 0 on success
5307 * errno on failure
5308 */
5309 u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
5310
5311 /*
5312 * Update allocLimit to the argument specified
5313 */
5314 hfsmp->allocLimit = new_end_block;
5315
5316 /* Invalidate the free extent cache completely so that
5317 * it does not have any extents beyond end of current
5318 * volume.
5319 */
5320 ResetVCBFreeExtCache(hfsmp);
5321
5322 /* Force a rebuild of the summary table. */
5323 (void) hfs_rebuild_summary (hfsmp);
5324
5325 // Delete any tentative ranges that are in the area we're shrinking
5326 struct rl_entry *range, *next_range;
5327 TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
5328 rl_link, next_range) {
5329 if (rl_overlap(range, new_end_block, RL_INFINITY) != RL_NOOVERLAP)
5330 hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
5331 }
5332
5333 return 0;
5334 }
5335
5336 /*
5337 * Remove an extent from the list of free extents.
5338 *
5339 * This is a low-level routine. It does not handle overlaps or splitting;
5340 * that is the responsibility of the caller. The input extent must exactly
5341 * match an extent already in the list; it will be removed, and any following
5342 * extents in the list will be shifted up.
5343 *
5344 * Inputs:
5345 * startBlock - Start of extent to remove
5346 * blockCount - Number of blocks in extent to remove
5347 *
5348 * Result:
5349 * The index of the extent that was removed.
5350 */
5351 static void remove_free_extent_list(struct hfsmount *hfsmp, int index)
5352 {
5353 if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) {
5354 if (ALLOC_DEBUG)
5355 panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
5356 else
5357 printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
5358 return;
5359 }
5360 int shift_count = hfsmp->vcbFreeExtCnt - index - 1;
5361 if (shift_count > 0) {
5362 memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
5363 }
5364 hfsmp->vcbFreeExtCnt--;
5365 }
5366
5367
5368 /*
5369 * Add an extent to the list of free extents.
5370 *
5371 * This is a low-level routine. It does not handle overlaps or coalescing;
5372 * that is the responsibility of the caller. This routine *does* make
5373 * sure that the extent it is adding is inserted in the correct location.
5374 * If the list is full, this routine will handle either removing the last
5375 * extent in the list to make room for the new extent, or ignoring the
5376 * new extent if it is "worse" than the last extent in the list.
5377 *
5378 * Inputs:
5379 * startBlock - Start of extent to add
5380 * blockCount - Number of blocks in extent to add
5381 *
5382 * Result:
5383 * The index where the extent that was inserted, or kMaxFreeExtents
5384 * if the extent was not inserted (the list was full, and the extent
5385 * being added was "worse" than everything in the list).
5386 */
5387 static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
5388 {
5389 uint32_t i;
5390
5391 /* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */
5392 if (ALLOC_DEBUG) {
5393 uint32_t endBlock = startBlock + blockCount;
5394 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
5395 if (endBlock < hfsmp->vcbFreeExt[i].startBlock ||
5396 startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) {
5397 continue;
5398 }
5399 panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d",
5400 hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i);
5401 }
5402 }
5403
5404 /* Figure out what index the new extent should be inserted at. */
5405 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
5406 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
5407 /* The list is sorted by increasing offset. */
5408 if (startBlock < hfsmp->vcbFreeExt[i].startBlock) {
5409 break;
5410 }
5411 } else {
5412 /* The list is sorted by decreasing size. */
5413 if (blockCount > hfsmp->vcbFreeExt[i].blockCount) {
5414 break;
5415 }
5416 }
5417 }
5418
5419 /* When we get here, i is the index where the extent should be inserted. */
5420 if (i == kMaxFreeExtents) {
5421 /*
5422 * The new extent is worse than anything already in the list,
5423 * and the list is full, so just ignore the extent to be added.
5424 */
5425 return i;
5426 }
5427
5428 /*
5429 * Grow the list (if possible) to make room for an insert.
5430 */
5431 if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents)
5432 hfsmp->vcbFreeExtCnt++;
5433
5434 /*
5435 * If we'll be keeping any extents after the insert position, then shift them.
5436 */
5437 int shift_count = hfsmp->vcbFreeExtCnt - i - 1;
5438 if (shift_count > 0) {
5439 memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
5440 }
5441
5442 /* Finally, store the new extent at its correct position. */
5443 hfsmp->vcbFreeExt[i].startBlock = startBlock;
5444 hfsmp->vcbFreeExt[i].blockCount = blockCount;
5445 return i;
5446 }
5447
5448
5449 /*
5450 * Remove an entry from free extent cache after it has been allocated.
5451 *
5452 * This is a high-level routine. It handles removing a portion of a
5453 * cached extent, potentially splitting it into two (if the cache was
5454 * already full, throwing away the extent that would sort last). It
5455 * also handles removing an extent that overlaps multiple extents in
5456 * the cache.
5457 *
5458 * Inputs:
5459 * hfsmp - mount point structure
5460 * startBlock - starting block of the extent to be removed.
5461 * blockCount - number of blocks of the extent to be removed.
5462 */
5463 static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
5464 {
5465 u_int32_t i, insertedIndex;
5466 u_int32_t currentStart, currentEnd, endBlock;
5467 int extentsRemoved = 0;
5468
5469 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5470 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
5471
5472 endBlock = startBlock + blockCount;
5473
5474 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5475
5476 /*
5477 * Iterate over all of the extents in the free extent cache, removing or
5478 * updating any entries that overlap with the input extent.
5479 */
5480 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
5481 currentStart = hfsmp->vcbFreeExt[i].startBlock;
5482 currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount;
5483
5484 /*
5485 * If the current extent is entirely before or entirely after the
5486 * the extent to be removed, then we keep it as-is.
5487 */
5488 if (currentEnd <= startBlock || currentStart >= endBlock) {
5489 continue;
5490 }
5491
5492 /*
5493 * If the extent being removed entirely contains the current extent,
5494 * then remove the current extent.
5495 */
5496 if (startBlock <= currentStart && endBlock >= currentEnd) {
5497 remove_free_extent_list(hfsmp, i);
5498
5499 /*
5500 * We just removed the extent at index i. The extent at
5501 * index i+1 just got shifted to index i. So decrement i
5502 * to undo the loop's "++i", and the next iteration will
5503 * examine index i again, which contains the next extent
5504 * in the list.
5505 */
5506 --i;
5507 ++extentsRemoved;
5508 continue;
5509 }
5510
5511 /*
5512 * If the extent being removed is strictly "in the middle" of the
5513 * current extent, then we need to split the current extent into
5514 * two discontiguous extents (the "head" and "tail"). The good
5515 * news is that we don't need to examine any other extents in
5516 * the list.
5517 */
5518 if (startBlock > currentStart && endBlock < currentEnd) {
5519 remove_free_extent_list(hfsmp, i);
5520 add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
5521 add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
5522 break;
5523 }
5524
5525 /*
5526 * The only remaining possibility is that the extent to be removed
5527 * overlaps the start or end (but not both!) of the current extent.
5528 * So we need to replace the current extent with a shorter one.
5529 *
5530 * The only tricky part is that the updated extent might be at a
5531 * different index than the original extent. If the updated extent
5532 * was inserted after the current extent, then we need to re-examine
5533 * the entry at index i, since it now contains the extent that was
5534 * previously at index i+1. If the updated extent was inserted
5535 * before or at the same index as the removed extent, then the
5536 * following extents haven't changed position.
5537 */
5538 remove_free_extent_list(hfsmp, i);
5539 if (startBlock > currentStart) {
5540 /* Remove the tail of the current extent. */
5541 insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
5542 } else {
5543 /* Remove the head of the current extent. */
5544 insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
5545 }
5546 if (insertedIndex > i) {
5547 --i; /* Undo the "++i" in the loop, so we examine the entry at index i again. */
5548 }
5549 }
5550
5551 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5552
5553 sanity_check_free_ext(hfsmp, 0);
5554
5555 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5556 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);
5557
5558 return;
5559 }
5560
5561
5562 /*
5563 * Add an entry to free extent cache after it has been deallocated.
5564 *
5565 * This is a high-level routine. It will merge overlapping or contiguous
5566 * extents into a single, larger extent.
5567 *
5568 * If the extent provided has blocks beyond current allocLimit, it is
5569 * clipped to allocLimit (so that we won't accidentally find and allocate
5570 * space beyond allocLimit).
5571 *
5572 * Inputs:
5573 * hfsmp - mount point structure
5574 * startBlock - starting block of the extent to be removed.
5575 * blockCount - number of blocks of the extent to be removed.
5576 *
5577 * Returns:
5578 * true - if the extent was added successfully to the list
5579 * false - if the extent was not added to the list, maybe because
5580 * the extent was beyond allocLimit, or is not best
5581 * candidate to be put in the cache.
5582 */
5583 static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
5584 {
5585 Boolean retval = false;
5586 uint32_t endBlock;
5587 uint32_t currentEnd;
5588 uint32_t i;
5589
5590 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5591 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
5592
5593 #if DEBUG
5594 for (i = 0; i < 2; ++i) {
5595 struct rl_entry *range;
5596 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
5597 hfs_assert(rl_overlap(range, startBlock,
5598 startBlock + blockCount - 1) == RL_NOOVERLAP);
5599 }
5600 }
5601 #endif
5602
5603 /* No need to add extent that is beyond current allocLimit */
5604 if (startBlock >= hfsmp->allocLimit) {
5605 goto out_not_locked;
5606 }
5607
5608 /* If end of the free extent is beyond current allocLimit, clip the extent */
5609 if ((startBlock + blockCount) > hfsmp->allocLimit) {
5610 blockCount = hfsmp->allocLimit - startBlock;
5611 }
5612
5613 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5614
5615 /*
5616 * Make a pass through the free extent cache, looking for known extents that
5617 * overlap or are contiguous with the extent to be added. We'll remove those
5618 * extents from the cache, and incorporate them into the new extent to be added.
5619 */
5620 endBlock = startBlock + blockCount;
5621 for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) {
5622 currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount;
5623 if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) {
5624 /* Extent i does not overlap and is not contiguous, so keep it. */
5625 continue;
5626 } else {
5627 /* We need to remove extent i and combine it with the input extent. */
5628 if (hfsmp->vcbFreeExt[i].startBlock < startBlock)
5629 startBlock = hfsmp->vcbFreeExt[i].startBlock;
5630 if (currentEnd > endBlock)
5631 endBlock = currentEnd;
5632
5633 remove_free_extent_list(hfsmp, i);
5634 /*
5635 * We just removed the extent at index i. The extent at
5636 * index i+1 just got shifted to index i. So decrement i
5637 * to undo the loop's "++i", and the next iteration will
5638 * examine index i again, which contains the next extent
5639 * in the list.
5640 */
5641 --i;
5642 }
5643 }
5644 add_free_extent_list(hfsmp, startBlock, endBlock - startBlock);
5645
5646 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5647
5648 out_not_locked:
5649 sanity_check_free_ext(hfsmp, 0);
5650
5651 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5652 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);
5653
5654 return retval;
5655 }
5656
5657 /* Debug function to check if the free extent cache is good or not */
5658 static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
5659 {
5660 u_int32_t i, j;
5661
5662 /* Do not do anything if debug is not on */
5663 if (ALLOC_DEBUG == 0) {
5664 return;
5665 }
5666
5667 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5668
5669 if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents)
5670 panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt);
5671
5672 /*
5673 * Iterate the Free extent cache and ensure no entries are bogus or refer to
5674 * allocated blocks.
5675 */
5676 for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
5677 u_int32_t start, nblocks;
5678
5679 start = hfsmp->vcbFreeExt[i].startBlock;
5680 nblocks = hfsmp->vcbFreeExt[i].blockCount;
5681
5682 /* Check if any of the blocks in free extent cache are allocated.
5683 * This should not be enabled always because it might take
5684 * very long for large extents that get added to the list.
5685 *
5686 * We have to drop vcbFreeExtLock while we call hfs_isallocated
5687 * because it is going to do I/O. Note that the free extent
5688 * cache could change. That's a risk we take when using this
5689 * debugging code. (Another alternative would be to try to
5690 * detect when the free extent cache changed, and perhaps
5691 * restart if the list changed while we dropped the lock.)
5692 */
5693 if (check_allocated) {
5694 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5695 if (hfs_isallocated(hfsmp, start, nblocks)) {
5696 panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
5697 hfsmp, i, start, nblocks);
5698 }
5699 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5700 }
5701
5702 /* Check if any part of the extent is beyond allocLimit */
5703 if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
5704 panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
5705 hfsmp, i, start, nblocks, hfsmp->allocLimit);
5706 }
5707
5708 /* Check if there are any duplicate start blocks */
5709 for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
5710 if (start == hfsmp->vcbFreeExt[j].startBlock) {
5711 panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n",
5712 hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock,
5713 hfsmp->vcbFreeExt[j].blockCount);
5714 }
5715 }
5716
5717 /* Check if the entries are out of order */
5718 if ((i+1) != hfsmp->vcbFreeExtCnt) {
5719 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
5720 /* sparse devices are sorted by starting block number (ascending) */
5721 if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) {
5722 panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n",
5723 hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
5724 hfsmp->vcbFreeExt[i+1].blockCount);
5725 }
5726 } else {
5727 /* normally sorted by block count (descending) */
5728 if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
5729 panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n",
5730 hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
5731 hfsmp->vcbFreeExt[i+1].blockCount);
5732 }
5733 }
5734 }
5735 }
5736 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5737 }
5738
5739 #define BIT_RIGHT_MASK(bit) (0xffffffffffffffffull >> (bit))
5740
5741 static int clzll(uint64_t x)
5742 {
5743 if (x == 0)
5744 return 64;
5745 else
5746 return __builtin_clzll(x);
5747 }
5748
5749 #if !HFS_ALLOC_TEST
5750
5751 static errno_t get_more_bits(bitmap_context_t *bitmap_ctx)
5752 {
5753 uint32_t start_bit;
5754 uint32_t iosize = 0;
5755 uint32_t byte_offset;
5756 uint32_t last_bitmap_block;
5757 int error;
5758 struct hfsmount *hfsmp = bitmap_ctx->hfsmp;
5759 #if !HFS_ALLOC_TEST
5760 uint64_t lock_elapsed;
5761 #endif
5762
5763
5764 if (bitmap_ctx->bp)
5765 ReleaseScanBitmapRange(bitmap_ctx->bp);
5766
5767 if (msleep(NULL, NULL, PINOD | PCATCH,
5768 "hfs_fsinfo", NULL) == EINTR) {
5769 return EINTR;
5770 }
5771
5772 #if !HFS_ALLOC_TEST
5773 /*
5774 * Let someone else use the allocation map after we've processed over HFS_FSINFO_MAX_LOCKHELD_TIME .
5775 * lock_start is initialized in hfs_find_free_extents().
5776 */
5777 absolutetime_to_nanoseconds(mach_absolute_time() - bitmap_ctx->lock_start, &lock_elapsed);
5778
5779 if (lock_elapsed >= HFS_FSINFO_MAX_LOCKHELD_TIME) {
5780
5781 hfs_systemfile_unlock(hfsmp, bitmap_ctx->lockflags);
5782
5783 /* add tsleep here to force context switch and fairness */
5784 tsleep((caddr_t)get_more_bits, PRIBIO, "hfs_fsinfo", 1);
5785
5786 hfs_journal_lock(hfsmp);
5787
5788 /* Flush the journal and wait for all I/Os to finish up */
5789 error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
5790 if (error) {
5791 hfs_journal_unlock(hfsmp);
5792 return error;
5793 }
5794
5795 /*
5796 * Take bitmap lock to ensure it is not being modified while journal is still held.
5797 * Since we are reading larger than normal blocks from the bitmap, which
5798 * might confuse other parts of the bitmap code using normal blocks, we
5799 * take exclusive lock here.
5800 */
5801 bitmap_ctx->lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5802
5803 bitmap_ctx->lock_start = mach_absolute_time();
5804
5805 /* Release the journal lock */
5806 hfs_journal_unlock(hfsmp);
5807
5808 /*
5809 * Bitmap is read in large block size (up to 1MB),
5810 * unlike the runtime which reads the bitmap in the
5811 * 4K block size. If the bitmap is read by both ways
5812 * at the same time, it can result in multiple buf_t with
5813 * different sizes and potentially case data corruption.
5814 * To avoid this, we invalidate all the existing buffers
5815 * associated with the bitmap vnode.
5816 */
5817 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
5818 if (error) {
5819 /* hfs_systemfile_unlock will be called in the caller */
5820 return error;
5821 }
5822 }
5823 #endif
5824
5825 start_bit = bitmap_ctx->run_offset;
5826
5827 if (start_bit >= bitmap_ctx->hfsmp->totalBlocks) {
5828 bitmap_ctx->chunk_end = 0;
5829 bitmap_ctx->bp = NULL;
5830 bitmap_ctx->bitmap = NULL;
5831 return 0;
5832 }
5833
5834 hfs_assert(start_bit % 8 == 0);
5835
5836 /*
5837 * Compute how much I/O we should generate here.
5838 * hfs_scan_range_size will validate that the start bit
5839 * converted into a byte offset into the bitmap file,
5840 * is aligned on a VBMIOSize boundary.
5841 */
5842 error = hfs_scan_range_size (bitmap_ctx->hfsmp, start_bit, &iosize);
5843 if (error)
5844 return error;
5845
5846 hfs_assert(iosize != 0);
5847
5848 /* hfs_scan_range_size should have verified startbit. Convert it to bytes */
5849 byte_offset = start_bit / kBitsPerByte;
5850
5851 /*
5852 * When the journal replays blocks, it does so by writing directly to the disk
5853 * device (bypassing any filesystem vnodes and such). When it finishes its I/Os
5854 * it also immediately re-reads and invalidates the range covered by the bp so
5855 * it does not leave anything lingering in the cache (for iosize reasons).
5856 *
5857 * As such, it is safe to do large I/Os here with ReadBitmapRange.
5858 *
5859 * NOTE: It is not recommended, but it is possible to call the function below
5860 * on sections of the bitmap that may be in core already as long as the pages are not
5861 * dirty. In that case, we'd notice that something starting at that
5862 * logical block of the bitmap exists in the metadata cache, and we'd check
5863 * if the iosize requested is the same as what was already allocated for it.
5864 * Odds are pretty good we're going to request something larger. In that case,
5865 * we just free the existing memory associated with the buf and reallocate a
5866 * larger range. This function should immediately invalidate it as soon as we're
5867 * done scanning, so this shouldn't cause any coherency issues.
5868 */
5869 error = ReadBitmapRange(bitmap_ctx->hfsmp, byte_offset, iosize, (uint32_t **)&bitmap_ctx->bitmap, &bitmap_ctx->bp);
5870 if (error)
5871 return error;
5872
5873 /*
5874 * At this point, we have a giant wired buffer that represents some portion of
5875 * the bitmap file that we want to analyze. We may not have gotten all 'iosize'
5876 * bytes though, so clip our ending bit to what we actually read in.
5877 */
5878 last_bitmap_block = start_bit + buf_count(bitmap_ctx->bp) * kBitsPerByte;
5879
5880 /* Cap the last block to the total number of blocks if required */
5881 if (last_bitmap_block > bitmap_ctx->hfsmp->totalBlocks)
5882 last_bitmap_block = bitmap_ctx->hfsmp->totalBlocks;
5883
5884 bitmap_ctx->chunk_current = 0; // new chunk of bitmap
5885 bitmap_ctx->chunk_end = last_bitmap_block - start_bit;
5886
5887 return 0;
5888 }
5889
5890 #endif // !HFS_ALLOC_TEST
5891
5892 // Returns number of contiguous bits set at start
5893 static int bit_count_set(void *bitmap, int start, int end)
5894 {
5895 if (start == end)
5896 return 0;
5897
5898 hfs_assert(end > start);
5899
5900 const int start_bit = start & 63;
5901 const int end_bit = end & 63;
5902
5903 uint64_t *p = (uint64_t *)bitmap + start / 64;
5904 uint64_t x = ~OSSwapBigToHostInt64(*p);
5905
5906 if ((start & ~63) == (end & ~63)) {
5907 // Start and end in same 64 bits
5908 x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit);
5909 return clzll(x) - start_bit;
5910 }
5911
5912 // Deal with initial unaligned bit
5913 x &= BIT_RIGHT_MASK(start_bit);
5914
5915 if (x)
5916 return clzll(x) - start_bit;
5917
5918 // Go fast
5919 ++p;
5920 int count = 64 - start_bit;
5921 int nquads = (end - end_bit - start - 1) / 64;
5922
5923 while (nquads--) {
5924 if (*p != 0xffffffffffffffffull) {
5925 x = ~OSSwapBigToHostInt64(*p);
5926 return count + clzll(x);
5927 }
5928 ++p;
5929 count += 64;
5930 }
5931
5932 if (end_bit) {
5933 x = ~OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit);
5934 count += clzll(x);
5935 }
5936
5937 return count;
5938 }
5939
5940 /* Returns the number of a run of cleared bits:
5941 * bitmap is a single chunk of memory being examined
5942 * start: the start bit relative to the current buffer to be examined; start is inclusive.
5943 * end: the end bit relative to the current buffer to be examined; end is not inclusive.
5944 */
5945 static int bit_count_clr(void *bitmap, int start, int end)
5946 {
5947 if (start == end)
5948 return 0;
5949
5950 hfs_assert(end > start);
5951
5952 const int start_bit = start & 63;
5953 const int end_bit = end & 63;
5954
5955 uint64_t *p = (uint64_t *)bitmap + start / 64;
5956 uint64_t x = OSSwapBigToHostInt64(*p);
5957
5958 if ((start & ~63) == (end & ~63)) {
5959 // Start and end in same 64 bits
5960 x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit);
5961
5962 return clzll(x) - start_bit;
5963 }
5964
5965 // Deal with initial unaligned bit
5966 x &= BIT_RIGHT_MASK(start_bit);
5967
5968 if (x)
5969 return clzll(x) - start_bit;
5970
5971 // Go fast
5972 ++p;
5973 int count = 64 - start_bit;
5974 int nquads = (end - end_bit - start - 1) / 64;
5975
5976 while (nquads--) {
5977 if (*p) {
5978 x = OSSwapBigToHostInt64(*p);
5979 return count + clzll(x);
5980 }
5981 ++p;
5982 count += 64;
5983 }
5984
5985 if (end_bit) {
5986 x = OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit);
5987
5988 count += clzll(x);
5989 }
5990
5991 return count;
5992 }
5993
5994 #if !HFS_ALLOC_TEST
5995 static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set)
5996 {
5997 uint32_t end, start_summary_bit, end_summary_bit;
5998 errno_t error = 0;
5999
6000 if (count == 0)
6001 goto out;
6002
6003 if (!ISSET(bitmap_ctx->hfsmp->hfs_flags, HFS_SUMMARY_TABLE))
6004 return 0;
6005
6006 if (hfs_get_summary_index (bitmap_ctx->hfsmp, start, &start_summary_bit)) {
6007 error = EINVAL;
6008 goto out;
6009 }
6010
6011 end = start + count - 1;
6012 if (hfs_get_summary_index (bitmap_ctx->hfsmp, end, &end_summary_bit)) {
6013 error = EINVAL;
6014 goto out;
6015 }
6016
6017 // if summary table bit has been updated with free block previously, leave it.
6018 if ((start_summary_bit == bitmap_ctx->last_free_summary_bit) && set)
6019 start_summary_bit++;
6020
6021 for (uint32_t summary_bit = start_summary_bit; summary_bit <= end_summary_bit; summary_bit++)
6022 hfs_set_summary (bitmap_ctx->hfsmp, summary_bit, set);
6023
6024 if (!set)
6025 bitmap_ctx->last_free_summary_bit = end_summary_bit;
6026
6027 out:
6028 return error;
6029
6030 }
6031 #endif //!HFS_ALLOC_TEST
6032
6033 /*
6034 * Read in chunks of the bitmap into memory, and find a run of cleared/set bits;
6035 * the run can extend across chunk boundaries.
6036 * bit_count_clr can be passed to get a run of cleared bits.
6037 * bit_count_set can be passed to get a run of set bits.
6038 */
6039 static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count)
6040 {
6041 int count;
6042 errno_t error = 0;
6043
6044 *bit_count = 0;
6045
6046 do {
6047 if (bitmap_ctx->run_offset == 0 || bitmap_ctx->chunk_current == bitmap_ctx->chunk_end) {
6048 if ((error = get_more_bits(bitmap_ctx)) != 0)
6049 goto out;
6050 }
6051
6052 if (bitmap_ctx->chunk_end == 0)
6053 break;
6054
6055 count = fn(bitmap_ctx->bitmap, bitmap_ctx->chunk_current, bitmap_ctx->chunk_end);
6056
6057 bitmap_ctx->run_offset += count;
6058 bitmap_ctx->chunk_current += count;
6059 *bit_count += count;
6060
6061 } while (bitmap_ctx->chunk_current >= bitmap_ctx->chunk_end && count);
6062
6063 out:
6064 return error;
6065
6066 }
6067
6068 // Returns count of number of bits clear
6069 static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count)
6070 {
6071 return hfs_bit_count(bitmap_ctx, bit_count_clr, count);
6072 }
6073
6074 // Returns count of number of bits set
6075 static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count)
6076 {
6077 return hfs_bit_count(bitmap_ctx, bit_count_set, count);
6078 }
6079
6080 static uint32_t hfs_bit_offset(bitmap_context_t *bitmap_ctx)
6081 {
6082 return bitmap_ctx->run_offset;
6083 }
6084
6085 /*
6086 * Perform a full scan of the bitmap file.
6087 * Note: during the scan of bitmap file, it may drop and reacquire the
6088 * bitmap lock to let someone else use the bitmap for fairness.
6089 * Currently it is used by HFS_GET_FSINFO statistic gathing, which
6090 * is run while other processes might perform HFS operations.
6091 */
6092
6093 errno_t hfs_find_free_extents(struct hfsmount *hfsmp,
6094 void (*callback)(void *data, off_t free_extent_size), void *callback_arg)
6095 {
6096 struct bitmap_context bitmap_ctx;
6097 uint32_t count;
6098 errno_t error = 0;
6099
6100 if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
6101 error = hfs_init_summary(hfsmp);
6102 if (error)
6103 return error;
6104 }
6105
6106 bzero(&bitmap_ctx, sizeof(struct bitmap_context));
6107
6108 /*
6109 * The journal maintains list of recently deallocated blocks to
6110 * issue DKIOCUNMAPs when the corresponding journal transaction is
6111 * flushed to the disk. To avoid any race conditions, we only
6112 * want one active trim list. Therefore we make sure that the
6113 * journal trim list is sync'ed, empty, and not modifiable for
6114 * the duration of our scan.
6115 *
6116 * Take the journal lock before flushing the journal to the disk.
6117 * We will keep on holding the journal lock till we don't get the
6118 * bitmap lock to make sure that no new journal transactions can
6119 * start. This will make sure that the journal trim list is not
6120 * modified after the journal flush and before getting bitmap lock.
6121 * We can release the journal lock after we acquire the bitmap
6122 * lock as it will prevent any further block deallocations.
6123 */
6124 hfs_journal_lock(hfsmp);
6125
6126 /* Flush the journal and wait for all I/Os to finish up */
6127 error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
6128 if (error) {
6129 hfs_journal_unlock(hfsmp);
6130 return error;
6131 }
6132
6133 /*
6134 * Take bitmap lock to ensure it is not being modified.
6135 * Since we are reading larger than normal blocks from the bitmap, which
6136 * might confuse other parts of the bitmap code using normal blocks, we
6137 * take exclusive lock here.
6138 */
6139 bitmap_ctx.lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
6140
6141 #if !HFS_ALLOC_TEST
6142 bitmap_ctx.lock_start = mach_absolute_time();
6143 #endif
6144
6145 /* Release the journal lock */
6146 hfs_journal_unlock(hfsmp);
6147
6148 /*
6149 * Bitmap is read in large block size (up to 1MB),
6150 * unlike the runtime which reads the bitmap in the
6151 * 4K block size. If the bitmap is read by both ways
6152 * at the same time, it can result in multiple buf_t with
6153 * different sizes and potentially case data corruption.
6154 * To avoid this, we invalidate all the existing buffers
6155 * associated with the bitmap vnode.
6156 */
6157 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
6158 if (error)
6159 goto out;
6160
6161 /*
6162 * Get the list of all free extent ranges. hfs_alloc_scan_range()
6163 * will call hfs_fsinfo_data_add() to account for all the free
6164 * extent ranges found during scan.
6165 */
6166 bitmap_ctx.hfsmp = hfsmp;
6167 bitmap_ctx.run_offset = 0;
6168
6169 while (bitmap_ctx.run_offset < hfsmp->totalBlocks) {
6170
6171 uint32_t start = hfs_bit_offset(&bitmap_ctx);
6172
6173 if ((error = hfs_bit_count_clr(&bitmap_ctx, &count)) != 0)
6174 goto out;
6175
6176 if (count)
6177 callback(callback_arg, hfs_blk_to_bytes(count, hfsmp->blockSize));
6178
6179 if ((error = update_summary_table(&bitmap_ctx, start, count, false)) != 0)
6180 goto out;
6181
6182 start = hfs_bit_offset(&bitmap_ctx);
6183
6184 if ((error = hfs_bit_count_set(&bitmap_ctx, &count)) != 0)
6185 goto out;
6186
6187 if ((error = update_summary_table(&bitmap_ctx, start, count, true)) != 0)
6188 goto out;
6189 }
6190
6191 out:
6192 if (bitmap_ctx.lockflags) {
6193 hfs_systemfile_unlock(hfsmp, bitmap_ctx.lockflags);
6194 }
6195
6196 return error;
6197 }
6198