]> git.saurik.com Git - apple/hfs.git/blob - core/VolumeAllocation.c
hfs-556.100.11.tar.gz
[apple/hfs.git] / core / VolumeAllocation.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 File: VolumeAllocation.c
30
31 Contains: Routines for accessing and modifying the volume bitmap.
32
33 Version: HFS Plus 1.0
34
35 Copyright: (c) 1996-2009 by Apple Inc., all rights reserved.
36
37 */
38
39 /*
40 Public routines:
41 BlockAllocate / hfs_block_alloc
42 Allocate space on a volume. Can allocate space contiguously.
43 If not contiguous, then allocation may be less than what was
44 asked for. Returns the starting block number, and number of
45 blocks. It will only return a single extent.
46
47 BlockDeallocate
48 Deallocate a contiguous run of allocation blocks.
49
50 BlockMarkAllocated
51 Exported wrapper to mark blocks as in-use. This will correctly determine
52 whether or not the red-black tree is enabled and call the appropriate function
53 if applicable.
54 BlockMarkFree
55 Exported wrapper to mark blocks as freed. This will correctly determine whether or
56 not the red-black tree is enabled and call the appropriate function if applicable.
57
58
59 ResetVCBFreeExtCache
60 Since the red-black tree obviates the need to maintain the free extent cache, we do
61 not update it if the tree is also live. As a result, if we ever need to destroy the trees
62 we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
63 bitmap scanning allocator.
64 We also reset and disable the free extent cache when volume resizing is
65 in flight.
66
67 UpdateAllocLimit
68 Adjusts the AllocLimit field in the hfs mount point. This is used when we need to prevent
69 allocations from occupying space in the region we are modifying during a filesystem resize.
70 At other times, it should be consistent with the total number of allocation blocks in the
71 filesystem. It is also used to shrink or grow the number of blocks that the red-black tree should
72 know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
73 number of items in the tree that we can allocate from.
74
75 ScanUnmapBlocks
76 Traverse the entire allocation bitmap. Potentially issue DKIOCUNMAPs to the device as it
77 tracks unallocated ranges when iterating the volume bitmap. Additionally, build up the in-core
78 summary table of the allocation bitmap.
79
80 Internal routines:
81 BlockMarkFreeInternal
82 Mark a contiguous range of blocks as free. The corresponding
83 bits in the volume bitmap will be cleared. This will actually do the work
84 of modifying the bitmap for us.
85
86 BlockMarkAllocatedInternal
87 Mark a contiguous range of blocks as allocated. The cor-
88 responding bits in the volume bitmap are set. Also tests to see
89 if any of the blocks were previously unallocated.
90 BlockFindContiguous
91 Find a contiguous range of blocks of a given size. The caller
92 specifies where to begin the search (by block number). The
93 block number of the first block in the range is returned. This is only
94 called by the bitmap scanning logic as the red-black tree should be able
95 to do this internally by searching its tree.
96 BlockFindAny
97 Find and allocate a contiguous range of blocks up to a given size. The
98 first range of contiguous free blocks found are allocated, even if there
99 are fewer blocks than requested (and even if a contiguous range of blocks
100 of the given size exists elsewhere).
101 BlockFindAnyBitmap
102 Finds a range of blocks per the above requirements without using the
103 Allocation RB Tree. This relies on the bitmap-scanning logic in order to find
104 any valid range of free space needed.
105 BlockFindContig
106 Find a contiguous range of blocks of a given size.
107 If the minimum cannot be satisfied, nothing is
108 returned.
109 BlockFindKnown
110 Try to allocate space from known free space in the volume's
111 free extent cache.
112 ReadBitmapBlock
113 Given an allocation block number, read the bitmap block that
114 contains that allocation block into a caller-supplied buffer.
115
116 ReleaseBitmapBlock
117 Release a bitmap block back into the buffer cache.
118
119 ReadBitmapRange
120 Given an allocation block number, read a range of bitmap that
121 must begin at that allocation block into a caller supplied buffer.
122
123 ReleaseBitmapRange
124 Release and invalidate a buf_t corresponding to the bitmap
125 back into the UBC in order to prevent coherency issues.
126
127 remove_free_extent_cache
128 Remove an extent from the free extent cache. Handles overlaps
129 with multiple extents in the cache, and handles splitting an
130 extent in the cache if the extent to be removed is in the middle
131 of a cached extent.
132
133 add_free_extent_cache
134 Add an extent to the free extent cache. It will merge the
135 input extent with extents already in the cache.
136 CheckUnmappedBytes
137 Check whether or not the current transaction
138 has allocated blocks that were recently freed. This may have data safety implications.
139
140
141
142 Debug/Test Routines
143 hfs_isallocated
144 Test to see if any blocks in a range are allocated. Journal or
145 allocation file lock must be held.
146
147 hfs_isallocated_scan
148 Test to see if any blocks in a range are allocated. Releases and
149 invalidates the block used when finished.
150
151 Optimization Routines
152 hfs_alloc_scan_block
153 Given a starting allocation block number, figures out which physical block contains that
154 allocation block's bit, and scans it from the starting bit until either the ending bit or
155 the end of the block. Free space extents are inserted into the appropriate red-black tree.
156
157 */
158
159
160 #include <sys/types.h>
161 #include <sys/buf.h>
162
163 #if HFS_ALLOC_TEST
164 #define KERNEL_DEBUG_CONSTANT(x, a, b, c, d, e) do {} while (0)
165 #else // !HFS_ALLOC_TEST
166
167 #include "hfs_macos_defs.h"
168 #include <sys/systm.h>
169 #include <sys/ubc.h>
170 /* For VM Page size */
171 #include <libkern/libkern.h>
172 #include "hfs_journal.h"
173 #include "hfs.h"
174 #include "hfs_endian.h"
175 #include "FileMgrInternal.h"
176
177 #endif // !HFS_ALLOC_TEST
178
179 #include <sys/sysctl.h>
180 #include <sys/disk.h>
181 #include <sys/uio.h>
182 #include <sys/malloc.h>
183
184 #include "hfs_dbg.h"
185 #include "hfs_format.h"
186 #include "hfs_kdebug.h"
187 #include "rangelist.h"
188 #include "hfs_extents.h"
189
190 /* Headers for unmap-on-mount support */
191 #include <sys/disk.h>
192
193 /*
194 * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
195 * KERNEL_DEBUG_CONSTANT events are enabled at runtime. (They're
196 * disabled by default because there can be a lot of these events,
197 * and we don't want to overwhelm the kernel debug buffer. If you
198 * want to watch these events in particular, just set the sysctl.)
199 */
200 static int hfs_kdebug_allocation = 0;
201 SYSCTL_DECL(_vfs_generic);
202 HFS_SYSCTL(NODE, _vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system")
203 HFS_SYSCTL(NODE, _vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug")
204 HFS_SYSCTL(INT, _vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations")
205 enum {
206 /*
207 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
208 * BlockDeallocate, including the internal BlockAllocateXxx
209 * routines so we can see how an allocation was satisfied.
210 *
211 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
212 * free extent cache.
213 *
214 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
215 *
216 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
217 * or clearing bits, scanning the bitmap).
218 */
219 HFSDBG_ALLOC_ENABLED = 1,
220 HFSDBG_EXT_CACHE_ENABLED = 2,
221 HFSDBG_UNMAP_ENABLED = 4,
222 HFSDBG_BITMAP_ENABLED = 8
223 };
224
225 enum {
226 kBytesPerWord = 4,
227 kBitsPerByte = 8,
228 kBitsPerWord = 32,
229
230 kBitsWithinWordMask = kBitsPerWord-1
231 };
232
233 #define kLowBitInWordMask 0x00000001ul
234 #define kHighBitInWordMask 0x80000000ul
235 #define kAllBitsSetInWord 0xFFFFFFFFul
236
237 #define HFS_MIN_SUMMARY_BLOCKSIZE 4096
238
239 #define ALLOC_DEBUG 0
240
241 static OSErr ReadBitmapBlock(
242 ExtendedVCB *vcb,
243 u_int32_t bit,
244 u_int32_t **buffer,
245 uintptr_t *blockRef,
246 hfs_block_alloc_flags_t flags);
247
248 static OSErr ReleaseBitmapBlock(
249 ExtendedVCB *vcb,
250 uintptr_t blockRef,
251 Boolean dirty);
252
253 static OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
254 HFSPlusExtentDescriptor *extent,
255 hfs_block_alloc_flags_t flags,
256 hfs_alloc_extra_args_t *ap);
257
258 static OSErr BlockFindAny(
259 ExtendedVCB *vcb,
260 u_int32_t startingBlock,
261 u_int32_t endingBlock,
262 u_int32_t maxBlocks,
263 hfs_block_alloc_flags_t flags,
264 Boolean trustSummary,
265 u_int32_t *actualStartBlock,
266 u_int32_t *actualNumBlocks);
267
268 static OSErr BlockFindAnyBitmap(
269 ExtendedVCB *vcb,
270 u_int32_t startingBlock,
271 u_int32_t endingBlock,
272 u_int32_t maxBlocks,
273 hfs_block_alloc_flags_t flags,
274 u_int32_t *actualStartBlock,
275 u_int32_t *actualNumBlocks);
276
277 static OSErr BlockFindContig(
278 ExtendedVCB *vcb,
279 u_int32_t startingBlock,
280 u_int32_t minBlocks,
281 u_int32_t maxBlocks,
282 hfs_block_alloc_flags_t flags,
283 u_int32_t *actualStartBlock,
284 u_int32_t *actualNumBlocks);
285
286 static OSErr BlockFindContiguous(
287 ExtendedVCB *vcb,
288 u_int32_t startingBlock,
289 u_int32_t endingBlock,
290 u_int32_t minBlocks,
291 u_int32_t maxBlocks,
292 Boolean useMetaZone,
293 Boolean trustSummary,
294 u_int32_t *actualStartBlock,
295 u_int32_t *actualNumBlocks,
296 hfs_block_alloc_flags_t flags);
297
298 static OSErr BlockFindKnown(
299 ExtendedVCB *vcb,
300 u_int32_t maxBlocks,
301 u_int32_t *actualStartBlock,
302 u_int32_t *actualNumBlocks);
303
304 static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
305 HFSPlusExtentDescriptor *extent,
306 uint32_t max_blocks,
307 hfs_block_alloc_flags_t flags);
308
309 static OSErr BlockMarkAllocatedInternal (
310 ExtendedVCB *vcb,
311 u_int32_t startingBlock,
312 u_int32_t numBlocks,
313 hfs_block_alloc_flags_t flags);
314
315 static OSErr BlockMarkFreeInternal(
316 ExtendedVCB *vcb,
317 u_int32_t startingBlock,
318 u_int32_t numBlocks,
319 Boolean do_validate);
320
321
322 static OSErr ReadBitmapRange (struct hfsmount *hfsmp, uint32_t offset, uint32_t iosize,
323 uint32_t **buffer, struct buf **blockRef);
324
325 static OSErr ReleaseScanBitmapRange( struct buf *bp );
326
327 static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset,
328 u_int32_t numBlocks, struct jnl_trim_list *list);
329
330 static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list);
331
332 static int hfs_alloc_scan_range(struct hfsmount *hfsmp,
333 u_int32_t startbit,
334 u_int32_t *bitToScan,
335 struct jnl_trim_list *list);
336
337 static int hfs_scan_range_size (struct hfsmount* hfsmp, uint32_t start, uint32_t *iosize);
338 static uint32_t CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recent, uint32_t *next);
339
340 /* Bitmap Re-use Detection */
341 static inline int extents_overlap (uint32_t start1, uint32_t len1,
342 uint32_t start2, uint32_t len2) {
343 return !( ((start1 + len1) <= start2) || ((start2 + len2) <= start1) );
344 }
345
346
347 int hfs_isallocated_scan (struct hfsmount *hfsmp,
348 u_int32_t startingBlock,
349 u_int32_t *bp_buf);
350
351 /* Summary Table Functions */
352 static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse);
353 static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t *index);
354 static int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
355 static int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t *alloc);
356 static int hfs_release_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t length);
357 static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t *freeblocks);
358 static int hfs_rebuild_summary (struct hfsmount *hfsmp);
359
360 #if 0
361 static int hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
362 #endif
363
364 /* Used in external mount code to initialize the summary table */
365 int hfs_init_summary (struct hfsmount *hfsmp);
366
367 #if ALLOC_DEBUG
368 void hfs_validate_summary (struct hfsmount *hfsmp);
369 #endif
370
371
372 /* Functions for manipulating free extent cache */
373 static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
374 static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
375 static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
376
377 static void hfs_release_reserved(hfsmount_t *hfsmp, struct rl_entry *range, int list);
378
379 /* Functions for getting free exents */
380
381 typedef struct bitmap_context {
382 void *bitmap; // current bitmap chunk
383 uint32_t run_offset; // offset (in bits) from start of bitmap to start of current run
384 uint32_t chunk_current; // next bit to scan in the chunk
385 uint32_t chunk_end; // number of valid bits in this chunk
386 struct hfsmount *hfsmp;
387 struct buf *bp;
388 uint32_t last_free_summary_bit; // last marked free summary bit
389 int lockflags;
390 uint64_t lock_start;
391 } bitmap_context_t;
392
393
394 static errno_t get_more_bits(bitmap_context_t *bitmap_ctx);
395 static int bit_count_set(void *bitmap, int start, int end);
396 static int bit_count_clr(void *bitmap, int start, int end);
397 static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count);
398 static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count);
399 static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count);
400 static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set);
401 static int clzll(uint64_t x);
402
403 #if ALLOC_DEBUG
404 /*
405 * Validation Routine to verify that the TRIM list maintained by the journal
406 * is in good shape relative to what we think the bitmap should have. We should
407 * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
408 * we panic.
409 */
410 int trim_validate_bitmap (struct hfsmount *hfsmp);
411 int trim_validate_bitmap (struct hfsmount *hfsmp) {
412 u_int64_t blockno_offset;
413 u_int64_t numblocks;
414 int i;
415 int count;
416 u_int32_t startblk;
417 u_int32_t blks;
418 int err = 0;
419 uint32_t alloccount = 0;
420
421 if (hfsmp->jnl) {
422 struct journal *jnl = (struct journal*)hfsmp->jnl;
423 if (jnl->active_tr) {
424 struct jnl_trim_list *trim = &(jnl->active_tr->trim);
425 count = trim->extent_count;
426 for (i = 0; i < count; i++) {
427 blockno_offset = trim->extents[i].offset;
428 blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
429 blockno_offset = blockno_offset / hfsmp->blockSize;
430 numblocks = trim->extents[i].length / hfsmp->blockSize;
431
432 startblk = (u_int32_t)blockno_offset;
433 blks = (u_int32_t) numblocks;
434 err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);
435
436 if (err == 0 && alloccount != 0) {
437 panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
438 }
439 }
440 }
441 }
442 return 0;
443 }
444
445 #endif
446
447
448 /*
449 ;________________________________________________________________________________
450 ;
451 ; Routine: hfs_unmap_free_extent
452 ;
453 ; Function: Make note of a range of allocation blocks that should be
454 ; unmapped (trimmed). That is, the given range of blocks no
455 ; longer have useful content, and the device can unmap the
456 ; previous contents. For example, a solid state disk may reuse
457 ; the underlying storage for other blocks.
458 ;
459 ; This routine is only supported for journaled volumes. The extent
460 ; being freed is passed to the journal code, and the extent will
461 ; be unmapped after the current transaction is written to disk.
462 ;
463 ; Input Arguments:
464 ; hfsmp - The volume containing the allocation blocks.
465 ; startingBlock - The first allocation block of the extent being freed.
466 ; numBlocks - The number of allocation blocks of the extent being freed.
467 ;________________________________________________________________________________
468 */
469 static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
470 {
471 u_int64_t offset;
472 u_int64_t length;
473 u_int64_t device_sz;
474 int err = 0;
475
476 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
477 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
478
479 if (ALLOC_DEBUG) {
480 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
481 panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks);
482 }
483 }
484
485 if (hfsmp->jnl != NULL) {
486 device_sz = hfsmp->hfs_logical_bytes;
487 offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
488 length = (u_int64_t) numBlocks * hfsmp->blockSize;
489
490 /* Validate that the trim is in a valid range of bytes */
491 if ((offset >= device_sz) || ((offset + length) > device_sz)) {
492 printf("hfs_unmap_free_ext: ignoring trim vol=%s @ off %lld len %lld \n", hfsmp->vcbVN, offset, length);
493 err = EINVAL;
494 }
495
496 if (err == 0) {
497 err = journal_trim_add_extent(hfsmp->jnl, offset, length);
498 if (err) {
499 printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent for vol=%s", err, hfsmp->vcbVN);
500 }
501 }
502 }
503
504 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
505 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0);
506 }
507
508 /*
509 ;________________________________________________________________________________
510 ;
511 ; Routine: hfs_track_unmap_blocks
512 ;
513 ; Function: Make note of a range of allocation blocks that should be
514 ; unmapped (trimmed). That is, the given range of blocks no
515 ; longer have useful content, and the device can unmap the
516 ; previous contents. For example, a solid state disk may reuse
517 ; the underlying storage for other blocks.
518 ;
519 ; This routine is only supported for journaled volumes.
520 ;
521 ; *****NOTE*****:
522 ; This function should *NOT* be used when the volume is fully
523 ; mounted. This function is intended to support a bitmap iteration
524 ; at mount time to fully inform the SSD driver of the state of all blocks
525 ; at mount time, and assumes that there is no allocation/deallocation
526 ; interference during its iteration.,
527 ;
528 ; Input Arguments:
529 ; hfsmp - The volume containing the allocation blocks.
530 ; offset - The first allocation block of the extent being freed.
531 ; numBlocks - The number of allocation blocks of the extent being freed.
532 ; list - The list of currently tracked trim ranges.
533 ;________________________________________________________________________________
534 */
535 static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start,
536 u_int32_t numBlocks, struct jnl_trim_list *list) {
537
538 u_int64_t offset;
539 u_int64_t length;
540 int error = 0;
541
542 if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL) && list->allocated_count && list->extents != NULL) {
543 int extent_no = list->extent_count;
544 offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
545 length = (u_int64_t) numBlocks * hfsmp->blockSize;
546
547
548 list->extents[extent_no].offset = offset;
549 list->extents[extent_no].length = length;
550 list->extent_count++;
551 if (list->extent_count == list->allocated_count) {
552 error = hfs_issue_unmap (hfsmp, list);
553 }
554 }
555
556 return error;
557 }
558
559 /*
560 ;________________________________________________________________________________
561 ;
562 ; Routine: hfs_issue_unmap
563 ;
564 ; Function: Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list
565 ;
566 ; Input Arguments:
567 ; hfsmp - The volume containing the allocation blocks.
568 ; list - The list of currently tracked trim ranges.
569 ;________________________________________________________________________________
570 */
571
572 static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list)
573 {
574 dk_unmap_t unmap;
575 int error = 0;
576
577 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
578 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
579 }
580
581 if (list->extent_count > 0 && list->extents != NULL) {
582 bzero(&unmap, sizeof(unmap));
583 unmap.extents = list->extents;
584 unmap.extentsCount = list->extent_count;
585
586 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
587 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_NONE, hfsmp->hfs_raw_dev, unmap.extentsCount, 0, 0, 0);
588 }
589
590 #if CONFIG_PROTECT
591 /*
592 * If we have not yet completed the first scan through the bitmap, then
593 * optionally inform the block driver below us that this is an initialization
594 * TRIM scan, if it can deal with this information.
595 */
596 if ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED) == 0) {
597 unmap.options |= _DK_UNMAP_INITIALIZE;
598 }
599 #endif
600 /* Issue a TRIM and flush them out */
601 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
602
603 bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t)));
604 bzero (&unmap, sizeof(unmap));
605 list->extent_count = 0;
606 }
607
608 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
609 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
610 }
611
612 return error;
613 }
614
615 /*
616 ;________________________________________________________________________________
617 ;
618 ; Routine: hfs_unmap_alloc_extent
619 ;
620 ; Function: Make note of a range of allocation blocks, some of
621 ; which may have previously been passed to hfs_unmap_free_extent,
622 ; is now in use on the volume. The given blocks will be removed
623 ; from any pending DKIOCUNMAP.
624 ;
625 ; Input Arguments:
626 ; hfsmp - The volume containing the allocation blocks.
627 ; startingBlock - The first allocation block of the extent being allocated.
628 ; numBlocks - The number of allocation blocks being allocated.
629 ;________________________________________________________________________________
630 */
631
632 static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
633 {
634 u_int64_t offset;
635 u_int64_t length;
636 int err = 0;
637
638 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
639 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
640
641 if (hfsmp->jnl != NULL) {
642 offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
643 length = (u_int64_t) numBlocks * hfsmp->blockSize;
644
645 err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
646 if (err) {
647 printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent for vol=%s", err, hfsmp->vcbVN);
648 }
649 }
650
651 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
652 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0);
653 }
654
655
656 /*
657 ;________________________________________________________________________________
658 ;
659 ; Routine: hfs_trim_callback
660 ;
661 ; Function: This function is called when a transaction that freed extents
662 ; (via hfs_unmap_free_extent/journal_trim_add_extent) has been
663 ; written to the on-disk journal. This routine will add those
664 ; extents to the free extent cache so that they can be reused.
665 ;
666 ; CAUTION: This routine is called while the journal's trim lock
667 ; is held shared, so that no other thread can reuse any portion
668 ; of those extents. We must be very careful about which locks
669 ; we take from within this callback, to avoid deadlock. The
670 ; call to add_free_extent_cache will end up taking the cache's
671 ; lock (just long enough to add these extents to the cache).
672 ;
673 ; CAUTION: If the journal becomes invalid (eg., due to an I/O
674 ; error when trying to write to the journal), this callback
675 ; will stop getting called, even if extents got freed before
676 ; the journal became invalid!
677 ;
678 ; Input Arguments:
679 ; arg - The hfsmount of the volume containing the extents.
680 ; extent_count - The number of extents freed in the transaction.
681 ; extents - An array of extents (byte ranges) that were freed.
682 ;________________________________________________________________________________
683 */
684
685 void
686 hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
687 {
688 uint32_t i;
689 uint32_t startBlock, numBlocks;
690 struct hfsmount *hfsmp = arg;
691
692 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
693 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0);
694
695 for (i=0; i<extent_count; ++i) {
696 /* Convert the byte range in *extents back to a range of allocation blocks. */
697 startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize;
698 numBlocks = extents[i].length / hfsmp->blockSize;
699 (void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
700 }
701
702 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
703 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0);
704 }
705
706
707 /*
708 ;________________________________________________________________________________
709 ;
710 ; Routine: CheckUnmappedBytes
711 ;
712 ; Function: From the specified inputs, determine if the extent in question overlaps
713 ; space that was recently freed, where the recently freed space may still be
714 ; lingering in an uncommitted journal transaction. This may have data safety
715 ; implications. The intended use is to decide whether or not to force a journal flush
716 ; before allowing file data I/O to be issued. If we did not do this
717 ; then it would be possible to issue the file I/O ahead of the
718 ; journal, resulting in data being overwritten if the transaction either
719 ; is not committed or cannot be replayed.
720 ;
721 ; NOTE: This function assumes that the journal and catalog/extent locks are held.
722 ;
723 ; Input Arguments:
724 ; hfsmp - The volume containing the allocation blocks.
725 ; foffset - start of the extent in question (in allocation blocks)
726 ; numbytes - number of blocks in the extent.
727 ; recently_freed: - output pointer containing whether or not the blocks were freed recently
728 ; overlap_end - end of the overlap between the argument extent and the trim list (in allocation blocks)
729 ;
730 ; Output:
731 ;
732 ; Returns 0 if we could determine extent validity for this (or a previous transaction)
733 ; Returns errno if there was an error
734 ;
735 ; If returned 0, then recently freed will contain a boolean that indicates
736 ; that it was recently freed.
737 ;________________________________________________________________________________
738 */
739
740 u_int32_t
741 CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recently_freed, uint32_t *overlap_end) {
742 uint64_t device_offset;
743 uint64_t numbytes;
744 uint32_t err = 0;
745 uint64_t lba_overlap_end;
746
747 if (hfsmp->jnl != NULL) {
748 /*
749 * Convert the allocation block # and the number of blocks into device-relative
750 * offsets so that they can be compared using the TRIM list.
751 */
752 uint64_t device_sz = hfsmp->hfs_logical_bytes;
753 device_offset = blockno * ((uint64_t)hfsmp->blockSize);
754 device_offset += hfsmp->hfsPlusIOPosOffset;
755 numbytes = (((uint64_t)hfsmp->blockSize) * numblocks);
756
757 /*
758 * Since we check that the device_offset isn't too large, it's safe to subtract it
759 * from the size in the second check.
760 */
761 if ((device_offset >= device_sz) || (numbytes > (device_sz - device_offset))) {
762 return EINVAL;
763 }
764
765 /* Ask the journal if this extent overlaps with any pending TRIMs */
766 if (journal_trim_extent_overlap (hfsmp->jnl, device_offset, numbytes, &lba_overlap_end)) {
767 *recently_freed = 1;
768
769 /* Convert lba_overlap_end back into allocation blocks */
770 uint64_t end_offset = lba_overlap_end - hfsmp->hfsPlusIOPosOffset;
771 end_offset = end_offset / ((uint64_t) hfsmp->blockSize);
772 *overlap_end = (uint32_t) end_offset;
773 }
774 else {
775 *recently_freed = 0;
776 }
777 err = 0;
778 }
779 else {
780 /* There may not be a journal. In that case, always return success. */
781 *recently_freed = 0;
782 }
783 return err;
784
785 }
786
787
788 /*
789 ;________________________________________________________________________________
790 ;
791 ; Routine: ScanUnmapBlocks
792 ;
793 ; Function: Traverse the bitmap, and potentially issue DKIOCUNMAPs to the underlying
794 ; device as needed so that the underlying disk device is as
795 ; up-to-date as possible with which blocks are unmapped.
796 ; Additionally build up the summary table as needed.
797 ;
798 ; This function reads the bitmap in large block size
799 ; (up to 1MB) unlike the runtime which reads the bitmap
800 ; in 4K block size. So if this function is being called
801 ; after the volume is mounted and actively modified, the
802 ; caller needs to invalidate all of the existing buffers
803 ; associated with the bitmap vnode before calling this
804 ; function. If the buffers are not invalidated, it can
805 ; cause buf_t collision and potential data corruption.
806 ;
807 ; Input Arguments:
808 ; hfsmp - The volume containing the allocation blocks.
809 ;________________________________________________________________________________
810 */
811
812 u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp)
813 {
814 u_int32_t blocks_scanned = 0;
815 int error = 0;
816 struct jnl_trim_list trimlist;
817
818 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
819 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
820 }
821
822 /*
823 *struct jnl_trim_list {
824 uint32_t allocated_count;
825 uint32_t extent_count;
826 dk_extent_t *extents;
827 };
828 */
829 bzero (&trimlist, sizeof(trimlist));
830
831 /*
832 * Any trim related work should be tied to whether the underlying
833 * storage media supports UNMAP, as any solid state device would
834 * on desktop or embedded.
835 *
836 * We do this because we may want to scan the full bitmap on
837 * desktop for spinning media for the purposes of building up the
838 * summary table.
839 *
840 * We also avoid sending TRIMs down to the underlying media if the
841 * mount is read-only.
842 */
843
844 if ((hfsmp->hfs_flags & HFS_UNMAP) &&
845 ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
846 /* If the underlying device supports unmap and the mount is read-write, initialize */
847 int alloc_count = PAGE_SIZE / sizeof(dk_extent_t);
848 void *extents = hfs_malloc(alloc_count * sizeof(dk_extent_t));
849 trimlist.extents = (dk_extent_t*)extents;
850 trimlist.allocated_count = alloc_count;
851 trimlist.extent_count = 0;
852 }
853
854 while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){
855
856 error = hfs_alloc_scan_range (hfsmp, blocks_scanned, &blocks_scanned, &trimlist);
857
858 if (error) {
859 printf("HFS: bitmap scan range error: %d on vol=%s\n", error, hfsmp->vcbVN);
860 break;
861 }
862 }
863
864 if ((hfsmp->hfs_flags & HFS_UNMAP) &&
865 ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
866 if (error == 0) {
867 hfs_issue_unmap(hfsmp, &trimlist);
868 }
869 if (trimlist.extents) {
870 hfs_free(trimlist.extents, trimlist.allocated_count * sizeof(dk_extent_t));
871 }
872 }
873
874 /*
875 * This is in an #if block because hfs_validate_summary prototype and function body
876 * will only show up if ALLOC_DEBUG is on, to save wired memory ever so slightly.
877 */
878 #if ALLOC_DEBUG
879 sanity_check_free_ext(hfsmp, 1);
880 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
881 /* Validate the summary table too! */
882 hfs_validate_summary(hfsmp);
883 printf("HFS: Summary validation complete on %s\n", hfsmp->vcbVN);
884 }
885 #endif
886
887 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
888 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
889 }
890
891 return error;
892 }
893
894 static void add_to_reserved_list(hfsmount_t *hfsmp, uint32_t start,
895 uint32_t count, int list,
896 struct rl_entry **reservation)
897 {
898 struct rl_entry *range, *next_range;
899
900 if (list == HFS_TENTATIVE_BLOCKS) {
901 int nranges = 0;
902 // Don't allow more than 4 tentative reservations
903 TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
904 rl_link, next_range) {
905 if (++nranges > 3)
906 hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
907 }
908 }
909
910 range = hfs_malloc(sizeof(*range));
911 range->rl_start = start;
912 range->rl_end = start + count - 1;
913 TAILQ_INSERT_HEAD(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
914 *reservation = range;
915 }
916
917 static void hfs_release_reserved(hfsmount_t *hfsmp,
918 struct rl_entry *range,
919 int list)
920 {
921 if (range->rl_start == -1)
922 return;
923
924 TAILQ_REMOVE(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
925
926 if (rl_len(range) > 0) {
927 if (list == HFS_TENTATIVE_BLOCKS)
928 hfsmp->tentativeBlocks -= rl_len(range);
929 else {
930 /*
931 * We don't need to unmap tentative blocks because we won't have
932 * written to them, but we might have written to reserved blocks.
933 * Nothing can refer to those blocks so this doesn't have to be
934 * via the journal. If this proves to be too expensive, we could
935 * consider not sending down the unmap or we could require this
936 * to always be called within a transaction and then we can use
937 * the journal.
938 */
939 dk_extent_t extent = {
940 .offset = (hfs_blk_to_bytes(range->rl_start, hfsmp->blockSize)
941 + hfsmp->hfsPlusIOPosOffset),
942 .length = hfs_blk_to_bytes(rl_len(range), hfsmp->blockSize)
943 };
944 dk_unmap_t unmap = {
945 .extents = &extent,
946 .extentsCount = 1,
947 };
948 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap,
949 0, vfs_context_kernel());
950 hfs_assert(hfsmp->lockedBlocks >= rl_len(range));
951 hfsmp->lockedBlocks -= rl_len(range);
952 }
953 hfs_release_summary(hfsmp, range->rl_start, rl_len(range));
954 add_free_extent_cache(hfsmp, range->rl_start, rl_len(range));
955 }
956
957 range->rl_start = -1;
958 range->rl_end = -2;
959 }
960
961 static void hfs_free_locked_internal(hfsmount_t *hfsmp,
962 struct rl_entry **reservation,
963 int list)
964 {
965 if (*reservation) {
966 hfs_release_reserved(hfsmp, *reservation, list);
967 hfs_free(*reservation, sizeof(**reservation));
968 *reservation = NULL;
969 }
970 }
971
972 void hfs_free_tentative(hfsmount_t *hfsmp, struct rl_entry **reservation)
973 {
974 hfs_free_locked_internal(hfsmp, reservation, HFS_TENTATIVE_BLOCKS);
975 }
976
977 void hfs_free_locked(hfsmount_t *hfsmp, struct rl_entry **reservation)
978 {
979 hfs_free_locked_internal(hfsmp, reservation, HFS_LOCKED_BLOCKS);
980 }
981
982 OSErr BlockAllocate (
983 hfsmount_t *hfsmp, /* which volume to allocate space on */
984 u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */
985 u_int32_t minBlocks, /* desired number of blocks to allocate */
986 u_int32_t maxBlocks, /* maximum number of blocks to allocate */
987 hfs_block_alloc_flags_t flags, /* option flags */
988 u_int32_t *actualStartBlock, /* actual first block of allocation */
989 u_int32_t *actualNumBlocks)
990 {
991 hfs_alloc_extra_args_t extra_args = {
992 .max_blocks = maxBlocks
993 };
994
995 HFSPlusExtentDescriptor extent = { startingBlock, minBlocks };
996
997 OSErr err = hfs_block_alloc_int(hfsmp, &extent, flags, &extra_args);
998
999 *actualStartBlock = extent.startBlock;
1000 *actualNumBlocks = extent.blockCount;
1001
1002 return err;
1003 }
1004
1005 errno_t hfs_block_alloc(hfsmount_t *hfsmp,
1006 HFSPlusExtentDescriptor *extent,
1007 hfs_block_alloc_flags_t flags,
1008 hfs_alloc_extra_args_t *ap)
1009 {
1010 return MacToVFSError(hfs_block_alloc_int(hfsmp, extent, flags, ap));
1011 }
1012
1013 /*
1014 ;________________________________________________________________________________
1015 ;
1016 ; Routine: hfs_block_alloc_int
1017 ;
1018 ; Function: Allocate space on a volume. If contiguous allocation is requested,
1019 ; at least the requested number of bytes will be allocated or an
1020 ; error will be returned. If contiguous allocation is not forced,
1021 ; the space will be allocated with the first largest extent available
1022 ; at the requested starting allocation block. If there is not enough
1023 ; room there, a block allocation of less than the requested size will be
1024 ; allocated.
1025 ;
1026 ; If the requested starting block is 0 (for new file allocations),
1027 ; the volume's allocation block pointer will be used as a starting
1028 ; point.
1029 ;
1030 ; Input Arguments:
1031 ; hfsmp - Pointer to the HFS mount structure.
1032 ; extent - startBlock indicates the block to start
1033 ; searching from and blockCount is the number of
1034 ; blocks required. Depending on the flags used,
1035 ; more or less blocks may be returned. The
1036 ; allocated extent is returned via this
1037 ; parameter.
1038 ; flags - Flags to specify options like contiguous, use
1039 ; metadata zone, skip free block check, etc.
1040 ; ap - Additional arguments used depending on flags.
1041 ; See hfs_alloc_extra_args_t and below.
1042 ;
1043 ; Output:
1044 ; (result) - Error code, zero for successful allocation
1045 ; extent - If successful, the allocated extent.
1046 ;
1047 ; Side effects:
1048 ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
1049 ;
1050 ; HFS_ALLOC_TENTATIVE
1051 ; Blocks will be reserved but not marked allocated. They can be
1052 ; stolen if free space is limited. Tentative blocks can be used by
1053 ; passing HFS_ALLOC_USE_TENTATIVE and passing in the resevation.
1054 ; @ap->reservation_out is used to store the reservation.
1055 ;
1056 ; HFS_ALLOC_USE_TENTATIVE
1057 ; Use blocks previously returned with HFS_ALLOC_TENTATIVE.
1058 ; @ap->reservation_in should be set to whatever @ap->reservation_out
1059 ; was set to when HFS_ALLOC_TENTATIVE was used. If the tentative
1060 ; reservation was stolen, a normal allocation will take place.
1061 ;
1062 ; HFS_ALLOC_LOCKED
1063 ; Blocks will be reserved but not marked allocated. Unlike tentative
1064 ; reservations they cannot be stolen. It is safe to write to these
1065 ; blocks. @ap->reservation_out is used to store the reservation.
1066 ;
1067 ; HFS_ALLOC_COMMIT
1068 ; This will take blocks previously returned with HFS_ALLOC_LOCKED and
1069 ; mark them allocated on disk. @ap->reservation_in is used.
1070 ;
1071 ; HFS_ALLOC_ROLL_BACK
1072 ; Take blocks that were just recently deallocated and mark them
1073 ; allocated. This is for roll back situations. Blocks got
1074 ; deallocated and then something went wrong and we need to roll back
1075 ; by marking the blocks allocated.
1076 ;
1077 ; HFS_ALLOC_FORCECONTIG
1078 ; It will not return fewer than @min_blocks.
1079 ;
1080 ; HFS_ALLOC_TRY_HARD
1081 ; We will perform an exhaustive search to try and find @max_blocks.
1082 ; It will not return fewer than @min_blocks.
1083 ;
1084 ;________________________________________________________________________________
1085 */
1086 OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
1087 HFSPlusExtentDescriptor *extent,
1088 hfs_block_alloc_flags_t flags,
1089 hfs_alloc_extra_args_t *ap)
1090 {
1091 u_int32_t freeBlocks;
1092 OSErr err = 0;
1093 Boolean updateAllocPtr = false; // true if nextAllocation needs to be updated
1094 Boolean forceContiguous = false;
1095 Boolean forceFlush;
1096
1097 uint32_t startingBlock = extent->startBlock;
1098 uint32_t minBlocks = extent->blockCount;
1099 uint32_t maxBlocks = (ap && ap->max_blocks) ? ap->max_blocks : minBlocks;
1100
1101 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1102 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
1103
1104 if (ISSET(flags, HFS_ALLOC_COMMIT)) {
1105 extent->startBlock = (*ap->reservation_in)->rl_start;
1106 extent->blockCount = rl_len(*ap->reservation_in);
1107 goto mark_allocated;
1108 }
1109
1110 if (ISSET(flags, HFS_ALLOC_ROLL_BACK))
1111 goto mark_allocated;
1112
1113 freeBlocks = hfs_freeblks(hfsmp, 0);
1114
1115 if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
1116 struct rl_entry *range = *ap->reservation_in;
1117
1118 if (range && range->rl_start != -1) {
1119 /*
1120 * It's possible that we have a tentative reservation
1121 * but there aren't enough free blocks due to loaned blocks
1122 * or insufficient space in the backing store.
1123 */
1124 uint32_t count = min(min(maxBlocks, rl_len(range)), freeBlocks);
1125
1126 if (count >= minBlocks) {
1127 extent->startBlock = range->rl_start;
1128 extent->blockCount = count;
1129
1130 // Should we go straight to commit?
1131 if (!ISSET(flags, HFS_ALLOC_LOCKED))
1132 SET(flags, HFS_ALLOC_COMMIT);
1133
1134 goto mark_allocated;
1135 }
1136 }
1137
1138 /*
1139 * We can't use the tentative reservation so free it and allocate
1140 * normally.
1141 */
1142 hfs_free_tentative(hfsmp, ap->reservation_in);
1143 CLR(flags, HFS_ALLOC_USE_TENTATIVE);
1144 }
1145
1146 if (ISSET(flags, HFS_ALLOC_FORCECONTIG | HFS_ALLOC_TRY_HARD))
1147 forceContiguous = true;
1148
1149 if (flags & HFS_ALLOC_FLUSHTXN) {
1150 forceFlush = true;
1151 }
1152 else {
1153 forceFlush = false;
1154 }
1155
1156 hfs_assert(hfsmp->freeBlocks >= hfsmp->tentativeBlocks);
1157
1158 // See if we have to steal tentative blocks
1159 if (freeBlocks < hfsmp->tentativeBlocks + minBlocks)
1160 SET(flags, HFS_ALLOC_IGNORE_TENTATIVE);
1161
1162 /* Skip free block check if blocks are being allocated for relocating
1163 * data during truncating a volume.
1164 *
1165 * During hfs_truncatefs(), the volume free block count is updated
1166 * before relocating data to reflect the total number of free blocks
1167 * that will exist on the volume after resize is successful. This
1168 * means that we have reserved allocation blocks required for relocating
1169 * the data and hence there is no need to check the free blocks.
1170 * It will also prevent resize failure when the number of blocks in
1171 * an extent being relocated is more than the free blocks that will
1172 * exist after the volume is resized.
1173 */
1174 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1175 // If the disk is already full, don't bother.
1176 if (freeBlocks == 0) {
1177 err = dskFulErr;
1178 goto exit;
1179 }
1180 if (forceContiguous && freeBlocks < minBlocks) {
1181 err = dskFulErr;
1182 goto exit;
1183 }
1184
1185 /*
1186 * Clip if necessary so we don't over-subscribe the free blocks.
1187 */
1188 if (minBlocks > freeBlocks) {
1189 minBlocks = freeBlocks;
1190 }
1191 if (maxBlocks > freeBlocks) {
1192 maxBlocks = freeBlocks;
1193 }
1194 }
1195
1196 if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
1197 err = hfs_alloc_try_hard(hfsmp, extent, maxBlocks, flags);
1198 if (err)
1199 goto exit;
1200
1201 goto mark_allocated;
1202 }
1203
1204 //
1205 // If caller didn't specify a starting block number, then use the volume's
1206 // next block to allocate from.
1207 //
1208 if (startingBlock == 0) {
1209 hfs_lock_mount (hfsmp);
1210
1211 /* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
1212 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1213 startingBlock = hfsmp->sparseAllocation;
1214 }
1215 else {
1216 startingBlock = hfsmp->nextAllocation;
1217 }
1218 hfs_unlock_mount(hfsmp);
1219 updateAllocPtr = true;
1220 }
1221
1222
1223 if (startingBlock >= hfsmp->allocLimit) {
1224 startingBlock = 0; /* overflow so start at beginning */
1225 }
1226
1227 //
1228 // If the request must be contiguous, then find a sequence of free blocks
1229 // that is long enough. Otherwise, find the first free block.
1230 //
1231 if (forceContiguous) {
1232 err = BlockFindContig(hfsmp, startingBlock, minBlocks, maxBlocks,
1233 flags, &extent->startBlock, &extent->blockCount);
1234 /*
1235 * If we allocated from a new position then also update the roving allocator.
1236 * This will keep the roving allocation pointer up-to-date even
1237 * if we are using the new R/B tree allocator, since
1238 * it doesn't matter to us here, how the underlying allocator found
1239 * the block to vend out.
1240 */
1241 if ((err == noErr) &&
1242 (extent->startBlock > startingBlock) &&
1243 ((extent->startBlock < hfsmp->hfs_metazone_start) ||
1244 (extent->startBlock > hfsmp->hfs_metazone_end))) {
1245 updateAllocPtr = true;
1246 }
1247 } else {
1248 /*
1249 * Scan the bitmap once, gather the N largest free extents, then
1250 * allocate from these largest extents. Repeat as needed until
1251 * we get all the space we needed. We could probably build up
1252 * that list when the higher level caller tried (and failed) a
1253 * contiguous allocation first.
1254 *
1255 * Note that the free-extent cache will be cease to be updated if
1256 * we are using the red-black tree for allocations. If we jettison
1257 * the tree, then we will reset the free-extent cache and start over.
1258 */
1259
1260 /* Disable HFS_ALLOC_FLUSHTXN if needed */
1261 if (forceFlush) {
1262 flags &= ~HFS_ALLOC_FLUSHTXN;
1263 }
1264
1265 /*
1266 * BlockFindKnown only examines the free extent cache; anything in there will
1267 * have been committed to stable storage already.
1268 */
1269 err = BlockFindKnown(hfsmp, maxBlocks, &extent->startBlock,
1270 &extent->blockCount);
1271
1272 /* dskFulErr out of BlockFindKnown indicates an empty Free Extent Cache */
1273
1274 if (err == dskFulErr) {
1275 /*
1276 * Now we have to do a bigger scan. Start at startingBlock and go up until the
1277 * allocation limit. We 'trust' the summary bitmap in this call, if it tells us
1278 * that it could not find any free space.
1279 */
1280 err = BlockFindAny(hfsmp, startingBlock, hfsmp->allocLimit,
1281 maxBlocks, flags, true,
1282 &extent->startBlock, &extent->blockCount);
1283 }
1284 if (err == dskFulErr) {
1285 /*
1286 * Vary the behavior here if the summary table is on or off.
1287 * If it is on, then we don't trust it it if we get into this case and
1288 * basically do a full scan for maximum coverage.
1289 * If it is off, then we trust the above and go up until the startingBlock.
1290 */
1291 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1292 err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
1293 flags, false,
1294 &extent->startBlock, &extent->blockCount);
1295 }
1296 else {
1297 err = BlockFindAny(hfsmp, 1, startingBlock, maxBlocks,
1298 flags, false,
1299 &extent->startBlock, &extent->blockCount);
1300 }
1301
1302 /*
1303 * Last Resort: Find/use blocks that may require a journal flush.
1304 */
1305 if (err == dskFulErr && forceFlush) {
1306 flags |= HFS_ALLOC_FLUSHTXN;
1307 err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
1308 flags, false,
1309 &extent->startBlock, &extent->blockCount);
1310 }
1311 }
1312 }
1313
1314 if (err)
1315 goto exit;
1316
1317 mark_allocated:
1318
1319 // Handle alignment
1320 if (ap && ap->alignment && extent->blockCount < ap->max_blocks) {
1321 /*
1322 * See the comment in FileMgrInternal.h for alignment
1323 * semantics.
1324 */
1325 uint32_t rounding = ((extent->blockCount + ap->alignment_offset)
1326 % ap->alignment);
1327
1328 // @minBlocks is still the minimum
1329 if (extent->blockCount >= minBlocks + rounding)
1330 extent->blockCount -= rounding;
1331 }
1332
1333 err = BlockMarkAllocatedInternal(hfsmp, extent->startBlock,
1334 extent->blockCount, flags);
1335
1336 if (err)
1337 goto exit;
1338
1339 if (ISSET(hfsmp->hfs_flags, HFS_CS) && extent->blockCount != 0
1340 && !ISSET(flags, HFS_ALLOC_TENTATIVE)) {
1341 if (ISSET(flags, HFS_ALLOC_FAST_DEV)) {
1342 #if !HFS_ALLOC_TEST /* need this guard because this file is compiled outside of the kernel */
1343 hfs_pin_block_range(hfsmp, HFS_PIN_IT,
1344 extent->startBlock, extent->blockCount);
1345 #endif
1346 } else {
1347 _dk_cs_map_t cm = {
1348 .cm_extent = {
1349 (hfs_blk_to_bytes(extent->startBlock, hfsmp->blockSize)
1350 + hfsmp->hfsPlusIOPosOffset),
1351 hfs_blk_to_bytes(extent->blockCount, hfsmp->blockSize)
1352 }
1353 };
1354
1355 errno_t err2 = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP,
1356 (caddr_t)&cm, 0, vfs_context_current());
1357
1358 /*
1359 * Ignore errors for now; we are fully provisioned so in
1360 * theory CoreStorage should be able to handle this
1361 * allocation. Should we want to change this in future, then
1362 * we should think carefully how we handle errors. Allowing
1363 * CoreStorage to truncate our allocation is problematic
1364 * because we might have minimum and alignment requirements
1365 * and backing out changes we have already made is
1366 * non-trivial.
1367 */
1368
1369 if (err2 || cm.cm_bytes_mapped < cm.cm_extent.length) {
1370 printf("hfs: _DKIOCCSMAP error: %d, bytes_mapped: %llu\n",
1371 err2, cm.cm_bytes_mapped);
1372 }
1373 }
1374 }
1375
1376 // if we actually allocated something then go update the
1377 // various bits of state that we maintain regardless of
1378 // whether there was an error (i.e. partial allocations
1379 // still need to update things like the free block count).
1380 //
1381 if (extent->blockCount != 0) {
1382 //
1383 // If we used the volume's roving allocation pointer, then we need to update it.
1384 // Adding in the length of the current allocation might reduce the next allocate
1385 // call by avoiding a re-scan of the already allocated space. However, the clump
1386 // just allocated can quite conceivably end up being truncated or released when
1387 // the file is closed or its EOF changed. Leaving the allocation pointer at the
1388 // start of the last allocation will avoid unnecessary fragmentation in this case.
1389 //
1390 hfs_lock_mount (hfsmp);
1391
1392 if (!ISSET(flags, HFS_ALLOC_USE_TENTATIVE | HFS_ALLOC_COMMIT)) {
1393 lck_spin_lock(&hfsmp->vcbFreeExtLock);
1394 if (hfsmp->vcbFreeExtCnt == 0 && hfsmp->hfs_freed_block_count == 0) {
1395 hfsmp->sparseAllocation = extent->startBlock;
1396 }
1397 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
1398 if (extent->blockCount < hfsmp->hfs_freed_block_count) {
1399 hfsmp->hfs_freed_block_count -= extent->blockCount;
1400 } else {
1401 hfsmp->hfs_freed_block_count = 0;
1402 }
1403
1404 if (updateAllocPtr &&
1405 ((extent->startBlock < hfsmp->hfs_metazone_start) ||
1406 (extent->startBlock > hfsmp->hfs_metazone_end))) {
1407 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, extent->startBlock);
1408 }
1409
1410 (void) remove_free_extent_cache(hfsmp, extent->startBlock, extent->blockCount);
1411 }
1412
1413 if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
1414 (*ap->reservation_in)->rl_start += extent->blockCount;
1415 hfsmp->tentativeBlocks -= extent->blockCount;
1416 if (rl_len(*ap->reservation_in) <= 0)
1417 hfs_free_tentative(hfsmp, ap->reservation_in);
1418 } else if (ISSET(flags, HFS_ALLOC_COMMIT)) {
1419 // Handle committing locked extents
1420 hfs_assert(hfsmp->lockedBlocks >= extent->blockCount);
1421 (*ap->reservation_in)->rl_start += extent->blockCount;
1422 hfsmp->lockedBlocks -= extent->blockCount;
1423 hfs_free_locked(hfsmp, ap->reservation_in);
1424 }
1425
1426 /*
1427 * Update the number of free blocks on the volume
1428 *
1429 * Skip updating the free blocks count if the block are
1430 * being allocated to relocate data as part of hfs_truncatefs()
1431 */
1432
1433 if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
1434 hfsmp->tentativeBlocks += extent->blockCount;
1435 } else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
1436 hfsmp->lockedBlocks += extent->blockCount;
1437 } else if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1438 hfsmp->freeBlocks -= extent->blockCount;
1439 }
1440 MarkVCBDirty(hfsmp);
1441 hfs_unlock_mount(hfsmp);
1442
1443 hfs_generate_volume_notifications(hfsmp);
1444
1445 if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
1446 hfs_assert(ap);
1447 add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount,
1448 0, ap->reservation_out);
1449 } else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
1450 hfs_assert(ap);
1451 add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount,
1452 1, ap->reservation_out);
1453 }
1454
1455 if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
1456 /*
1457 * See if we used tentative blocks. Note that we cannot
1458 * free the reservations here because we don't have access
1459 * to the external pointers. All we can do is update the
1460 * reservations and they'll be cleaned up when whatever is
1461 * holding the pointers calls us back.
1462 *
1463 * We use the rangelist code to detect overlaps and
1464 * constrain the tentative block allocation. Note that
1465 * @end is inclusive so that our rangelist code will
1466 * resolve the various cases for us. As a result, we need
1467 * to ensure that we account for it properly when removing
1468 * the blocks from the tentative count in the mount point
1469 * and re-inserting the remainder (either head or tail)
1470 */
1471 struct rl_entry *range, *next_range;
1472 struct rl_head *ranges = &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS];
1473 const uint32_t start = extent->startBlock;
1474 const uint32_t end = start + extent->blockCount - 1;
1475 TAILQ_FOREACH_SAFE(range, ranges, rl_link, next_range) {
1476 switch (rl_overlap(range, start, end)) {
1477 case RL_OVERLAPCONTAINSRANGE:
1478 // Keep the bigger part
1479 if (start - range->rl_start > range->rl_end - end) {
1480 // Discard the tail
1481 hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
1482 hfs_release_summary(hfsmp, end + 1, range->rl_end - end);
1483 const uint32_t old_end = range->rl_end;
1484 range->rl_end = start - 1;
1485 add_free_extent_cache(hfsmp, end + 1, old_end - end);
1486 } else {
1487 // Discard the head
1488 hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
1489 hfs_release_summary(hfsmp, range->rl_start,
1490 start - range->rl_start);
1491 const uint32_t old_start = range->rl_start;
1492 range->rl_start = end + 1;
1493 add_free_extent_cache(hfsmp, old_start,
1494 start - old_start);
1495 }
1496 hfs_assert(range->rl_end >= range->rl_start);
1497 break;
1498 case RL_MATCHINGOVERLAP:
1499 case RL_OVERLAPISCONTAINED:
1500 hfsmp->tentativeBlocks -= rl_len(range);
1501 range->rl_end = range->rl_start - 1;
1502 hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
1503 break;
1504 case RL_OVERLAPSTARTSBEFORE:
1505 hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
1506 range->rl_end = start - 1;
1507 hfs_assert(range->rl_end >= range->rl_start);
1508 break;
1509 case RL_OVERLAPENDSAFTER:
1510 hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
1511 range->rl_start = end + 1;
1512 hfs_assert(range->rl_end >= range->rl_start);
1513 break;
1514 case RL_NOOVERLAP:
1515 break;
1516 }
1517 }
1518 }
1519 }
1520
1521 exit:
1522
1523 if (ALLOC_DEBUG) {
1524 if (err == noErr) {
1525 if (extent->startBlock >= hfsmp->totalBlocks) {
1526 panic ("BlockAllocate: vending invalid blocks!");
1527 }
1528 if (extent->startBlock >= hfsmp->allocLimit) {
1529 panic ("BlockAllocate: vending block past allocLimit!");
1530 }
1531
1532 if ((extent->startBlock + extent->blockCount) >= hfsmp->totalBlocks) {
1533 panic ("BlockAllocate: vending too many invalid blocks!");
1534 }
1535
1536 if ((extent->startBlock + extent->blockCount) >= hfsmp->allocLimit) {
1537 panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
1538 }
1539 }
1540 }
1541
1542 if (err) {
1543 // Just to be safe...
1544 extent->startBlock = 0;
1545 extent->blockCount = 0;
1546 }
1547
1548 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1549 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, extent->startBlock, extent->blockCount, 0, 0);
1550
1551 return err;
1552 }
1553
1554
1555 /*
1556 ;________________________________________________________________________________
1557 ;
1558 ; Routine: BlockDeallocate
1559 ;
1560 ; Function: Update the bitmap to deallocate a run of disk allocation blocks
1561 ;
1562 ; Input Arguments:
1563 ; vcb - Pointer to ExtendedVCB for the volume to free space on
1564 ; firstBlock - First allocation block to be freed
1565 ; numBlocks - Number of allocation blocks to free up (must be > 0!)
1566 ;
1567 ; Output:
1568 ; (result) - Result code
1569 ;
1570 ; Side effects:
1571 ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
1572 ; The Allocator's red-black trees may also be modified as a result.
1573 ;
1574 ;________________________________________________________________________________
1575 */
1576
1577 OSErr BlockDeallocate (
1578 ExtendedVCB *vcb, // Which volume to deallocate space on
1579 u_int32_t firstBlock, // First block in range to deallocate
1580 u_int32_t numBlocks, // Number of contiguous blocks to deallocate
1581 hfs_block_alloc_flags_t flags)
1582 {
1583 if (ISSET(flags, HFS_ALLOC_TENTATIVE | HFS_ALLOC_LOCKED))
1584 return 0;
1585
1586 OSErr err;
1587 struct hfsmount *hfsmp;
1588 hfsmp = VCBTOHFS(vcb);
1589
1590 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1591 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0);
1592
1593 //
1594 // If no blocks to deallocate, then exit early
1595 //
1596 if (numBlocks == 0) {
1597 err = noErr;
1598 goto Exit;
1599 }
1600
1601
1602 if (ALLOC_DEBUG) {
1603 if (firstBlock >= hfsmp->totalBlocks) {
1604 panic ("BlockDeallocate: freeing invalid blocks!");
1605 }
1606
1607 if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {
1608 panic ("BlockDeallocate: freeing too many invalid blocks!");
1609 }
1610 }
1611
1612 /*
1613 * If we're using the summary bitmap, then try to mark the bits
1614 * as potentially usable/free before actually deallocating them.
1615 * It is better to be slightly speculative here for correctness.
1616 */
1617
1618 (void) hfs_release_summary (hfsmp, firstBlock, numBlocks);
1619
1620 err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
1621
1622 if (err) {
1623 goto Exit;
1624 }
1625
1626 //
1627 // Update the volume's free block count, and mark the VCB as dirty.
1628 //
1629 hfs_lock_mount(hfsmp);
1630 /*
1631 * Do not update the free block count. This flags is specified
1632 * when a volume is being truncated.
1633 */
1634 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1635 vcb->freeBlocks += numBlocks;
1636 }
1637
1638 vcb->hfs_freed_block_count += numBlocks;
1639
1640 if (vcb->nextAllocation == (firstBlock + numBlocks)) {
1641 HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
1642 }
1643
1644 if (hfsmp->jnl == NULL) {
1645 /*
1646 * In the journal case, we'll add the free extent once the journal
1647 * calls us back to tell us it wrote the transaction to disk.
1648 */
1649 (void) add_free_extent_cache(vcb, firstBlock, numBlocks);
1650
1651 /*
1652 * If the journal case, we'll only update sparseAllocation once the
1653 * free extent cache becomes empty (when we remove the last entry
1654 * from the cache). Skipping it here means we're less likely to
1655 * find a recently freed extent via the bitmap before it gets added
1656 * to the free extent cache.
1657 */
1658 if (firstBlock < vcb->sparseAllocation) {
1659 vcb->sparseAllocation = firstBlock;
1660 }
1661 }
1662
1663 MarkVCBDirty(vcb);
1664 hfs_unlock_mount(hfsmp);
1665
1666 hfs_generate_volume_notifications(VCBTOHFS(vcb));
1667 Exit:
1668
1669 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1670 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0);
1671
1672 return err;
1673 }
1674
1675
1676 u_int8_t freebitcount[16] = {
1677 4, 3, 3, 2, 3, 2, 2, 1, /* 0 1 2 3 4 5 6 7 */
1678 3, 2, 2, 1, 2, 1, 1, 0, /* 8 9 A B C D E F */
1679 };
1680
1681 u_int32_t
1682 MetaZoneFreeBlocks(ExtendedVCB *vcb)
1683 {
1684 u_int32_t freeblocks;
1685 u_int32_t *currCache;
1686 uintptr_t blockRef;
1687 u_int32_t bit;
1688 u_int32_t lastbit;
1689 int bytesleft;
1690 int bytesperblock;
1691 u_int8_t byte;
1692 u_int8_t *buffer;
1693
1694 blockRef = 0;
1695 bytesleft = freeblocks = 0;
1696 buffer = NULL;
1697 bit = VCBTOHFS(vcb)->hfs_metazone_start;
1698 if (bit == 1)
1699 bit = 0;
1700
1701 lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
1702 bytesperblock = vcb->vcbVBMIOSize;
1703
1704 /*
1705 * Count all the bits from bit to lastbit.
1706 */
1707 while (bit < lastbit) {
1708 /*
1709 * Get next bitmap block.
1710 */
1711 if (bytesleft == 0) {
1712 if (blockRef) {
1713 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1714 blockRef = 0;
1715 }
1716 if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef,
1717 HFS_ALLOC_IGNORE_TENTATIVE) != 0) {
1718 return (0);
1719 }
1720 buffer = (u_int8_t *)currCache;
1721 bytesleft = bytesperblock;
1722 }
1723 byte = *buffer++;
1724 freeblocks += freebitcount[byte & 0x0F];
1725 freeblocks += freebitcount[(byte >> 4) & 0x0F];
1726 bit += kBitsPerByte;
1727 --bytesleft;
1728 }
1729 if (blockRef)
1730 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1731
1732 return (freeblocks);
1733 }
1734
1735
1736 /*
1737 * Obtain the next allocation block (bit) that's
1738 * outside the metadata allocation zone.
1739 */
1740 static u_int32_t NextBitmapBlock(
1741 ExtendedVCB *vcb,
1742 u_int32_t bit)
1743 {
1744 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1745
1746 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
1747 return (bit);
1748 /*
1749 * Skip over metadata allocation zone.
1750 */
1751 if ((bit >= hfsmp->hfs_metazone_start) &&
1752 (bit <= hfsmp->hfs_metazone_end)) {
1753 bit = hfsmp->hfs_metazone_end + 1;
1754 }
1755 return (bit);
1756 }
1757
1758
1759 // Assumes @bitmap is aligned to 8 bytes and multiple of 8 bytes.
1760 static void bits_set(void *bitmap, int start, int end)
1761 {
1762 const int start_bit = start & 63;
1763 const int end_bit = end & 63;
1764
1765 #define LEFT_MASK(bit) OSSwapHostToBigInt64(0xffffffffffffffffull << (64 - bit))
1766 #define RIGHT_MASK(bit) OSSwapHostToBigInt64(0xffffffffffffffffull >> bit)
1767
1768 uint64_t *p = (uint64_t *)bitmap + start / 64;
1769
1770 if ((start & ~63) == (end & ~63)) {
1771 // Start and end in same 64 bits
1772 *p |= RIGHT_MASK(start_bit) & LEFT_MASK(end_bit);
1773 } else {
1774 *p++ |= RIGHT_MASK(start_bit);
1775
1776 int nquads = (end - end_bit - start - 1) / 64;
1777
1778 while (nquads--)
1779 *p++ = 0xffffffffffffffffull;
1780
1781 if (end_bit)
1782 *p |= LEFT_MASK(end_bit);
1783 }
1784 }
1785
1786 // Modifies the buffer and applies any reservations that we might have
1787 static buf_t process_reservations(hfsmount_t *hfsmp, buf_t bp, off_t offset,
1788 hfs_block_alloc_flags_t flags,
1789 bool always_copy)
1790 {
1791 bool taken_copy = false;
1792 void *buffer = (void *)buf_dataptr(bp);
1793 const uint32_t nbytes = buf_count(bp);
1794 const off_t end = offset + nbytes * 8 - 1;
1795
1796 for (int i = (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
1797 ? HFS_LOCKED_BLOCKS : HFS_TENTATIVE_BLOCKS); i < 2; ++i) {
1798 struct rl_entry *entry;
1799 TAILQ_FOREACH(entry, &hfsmp->hfs_reserved_ranges[i], rl_link) {
1800 uint32_t a, b;
1801
1802 enum rl_overlaptype overlap_type = rl_overlap(entry, offset, end);
1803
1804 if (overlap_type == RL_NOOVERLAP)
1805 continue;
1806
1807 /*
1808 * If always_copy is false, we only take a copy if B_LOCKED is
1809 * set because ReleaseScanBitmapRange doesn't invalidate the
1810 * buffer in that case.
1811 */
1812 if (!taken_copy && (always_copy || ISSET(buf_flags(bp), B_LOCKED))) {
1813 buf_t new_bp = buf_create_shadow(bp, true, 0, NULL, NULL);
1814 buf_brelse(bp);
1815 bp = new_bp;
1816 buf_setflags(bp, B_NOCACHE);
1817 buffer = (void *)buf_dataptr(bp);
1818 taken_copy = true;
1819 }
1820
1821 switch (overlap_type) {
1822 case RL_OVERLAPCONTAINSRANGE:
1823 case RL_MATCHINGOVERLAP:
1824 memset(buffer, 0xff, nbytes);
1825 return bp;
1826 case RL_OVERLAPISCONTAINED:
1827 a = entry->rl_start;
1828 b = entry->rl_end;
1829 break;
1830 case RL_OVERLAPSTARTSBEFORE:
1831 a = offset;
1832 b = entry->rl_end;
1833 break;
1834 case RL_OVERLAPENDSAFTER:
1835 a = entry->rl_start;
1836 b = end;
1837 break;
1838 case RL_NOOVERLAP:
1839 __builtin_unreachable();
1840 }
1841
1842 a -= offset;
1843 b -= offset;
1844
1845 hfs_assert(a < buf_count(bp) * 8);
1846 hfs_assert(b < buf_count(bp) * 8);
1847 hfs_assert(b >= a);
1848
1849 // b is inclusive
1850 bits_set(buffer, a, b + 1);
1851 }
1852 } // for (;;)
1853
1854 return bp;
1855 }
1856
1857 /*
1858 ;_______________________________________________________________________
1859 ;
1860 ; Routine: ReadBitmapBlock
1861 ;
1862 ; Function: Read in a bitmap block corresponding to a given allocation
1863 ; block (bit). Return a pointer to the bitmap block.
1864 ;
1865 ; Inputs:
1866 ; vcb -- Pointer to ExtendedVCB
1867 ; bit -- Allocation block whose bitmap block is desired
1868 ;
1869 ; Outputs:
1870 ; buffer -- Pointer to bitmap block corresonding to "block"
1871 ; blockRef
1872 ;_______________________________________________________________________
1873 */
1874 static OSErr ReadBitmapBlock(ExtendedVCB *vcb,
1875 u_int32_t bit,
1876 u_int32_t **buffer,
1877 uintptr_t *blockRef,
1878 hfs_block_alloc_flags_t flags)
1879 {
1880 OSErr err;
1881 struct buf *bp = NULL;
1882 struct vnode *vp = NULL;
1883 daddr64_t block;
1884 u_int32_t blockSize;
1885
1886 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1887 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0);
1888
1889 /*
1890 * volume bitmap blocks are protected by the allocation file lock
1891 */
1892 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1893
1894 blockSize = (u_int32_t)vcb->vcbVBMIOSize;
1895 block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
1896
1897 /* HFS+ / HFSX */
1898 if (vcb->vcbSigWord != kHFSSigWord) {
1899 vp = vcb->hfs_allocation_vp; /* use allocation file vnode */
1900 }
1901 #if CONFIG_HFS_STD
1902 else {
1903 /* HFS Standard */
1904 vp = VCBTOHFS(vcb)->hfs_devvp; /* use device I/O vnode */
1905 block += vcb->vcbVBMSt; /* map to physical block */
1906 }
1907 #endif
1908
1909 err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);
1910
1911 if (bp) {
1912 if (err) {
1913 buf_brelse(bp);
1914 *blockRef = 0;
1915 *buffer = NULL;
1916 } else {
1917 if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED)) {
1918 bp = process_reservations(vcb, bp, block * blockSize * 8,
1919 flags, /* always_copy: */ true);
1920 }
1921
1922 buf_setfsprivate(bp, (void *)(uintptr_t)flags);
1923
1924 *blockRef = (uintptr_t)bp;
1925 *buffer = (u_int32_t *)buf_dataptr(bp);
1926 }
1927 } else
1928 hfs_assert(err);
1929
1930 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1931 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0);
1932
1933 return err;
1934 }
1935
1936
1937 /*
1938 ;_______________________________________________________________________
1939 ;
1940 ; Routine: ReadBitmapRange
1941 ;
1942 ; Function: Read in a range of the bitmap starting at the given offset.
1943 ; Use the supplied size to determine the amount of I/O to generate
1944 ; against the bitmap file. Return a pointer to the bitmap block.
1945 ;
1946 ; Inputs:
1947 ; hfsmp -- Pointer to hfs mount
1948 ; offset -- byte offset into the bitmap file
1949 ; size -- How much I/O to generate against the bitmap file.
1950 ;
1951 ; Outputs:
1952 ; buffer -- Pointer to bitmap block data corresonding to "block"
1953 ; blockRef -- struct 'buf' pointer which MUST be released in a subsequent call.
1954 ;_______________________________________________________________________
1955 */
1956 static OSErr ReadBitmapRange(struct hfsmount *hfsmp, uint32_t offset,
1957 uint32_t iosize, uint32_t **buffer, struct buf **blockRef)
1958 {
1959
1960 OSErr err;
1961 struct buf *bp = NULL;
1962 struct vnode *vp = NULL;
1963 daddr64_t block;
1964
1965 /* This function isn't supported for HFS standard */
1966 if (hfsmp->vcbSigWord != kHFSPlusSigWord) {
1967 return EINVAL;
1968 }
1969
1970 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1971 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_START, offset, iosize, 0, 0, 0);
1972 }
1973
1974 /*
1975 * volume bitmap blocks are protected by the allocation file lock
1976 */
1977 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
1978
1979 vp = hfsmp->hfs_allocation_vp; /* use allocation file vnode */
1980
1981 /*
1982 * The byte offset argument must be converted into bitmap-relative logical
1983 * block numbers before using it in buf_meta_bread.
1984 *
1985 * buf_meta_bread (and the things it calls) will eventually try to
1986 * reconstruct the byte offset into the file by multiplying the logical
1987 * block number passed in below by the vcbVBMIOSize field in the mount
1988 * point. So we prepare for that by converting the byte offset back into
1989 * logical blocks in terms of VBMIOSize units.
1990 *
1991 * The amount of I/O requested and the byte offset should be computed
1992 * based on the helper function in the frame that called us, so we can
1993 * get away with just doing a simple divide here.
1994 */
1995 block = (daddr64_t)(offset / hfsmp->vcbVBMIOSize);
1996
1997 err = (int) buf_meta_bread(vp, block, iosize, NOCRED, &bp);
1998
1999 if (bp) {
2000 if (err) {
2001 buf_brelse(bp);
2002 *blockRef = 0;
2003 *buffer = NULL;
2004 } else {
2005 bp = process_reservations(hfsmp, bp, (offset * 8), 0,
2006 /* always_copy: */ false);
2007
2008 *blockRef = bp;
2009 *buffer = (u_int32_t *)buf_dataptr(bp);
2010 }
2011 }
2012
2013 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
2014 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_END, err, 0, 0, 0, 0);
2015 }
2016
2017 return err;
2018 }
2019
2020
2021 /*
2022 ;_______________________________________________________________________
2023 ;
2024 ; Routine: ReleaseBitmapBlock
2025 ;
2026 ; Function: Relase a bitmap block.
2027 ;
2028 ; Inputs:
2029 ; vcb
2030 ; blockRef
2031 ; dirty
2032 ;_______________________________________________________________________
2033 */
2034 static OSErr ReleaseBitmapBlock(
2035 ExtendedVCB *vcb,
2036 uintptr_t blockRef,
2037 Boolean dirty)
2038 {
2039 struct buf *bp = (struct buf *)blockRef;
2040
2041 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2042 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0);
2043
2044 if (blockRef == 0) {
2045 if (dirty)
2046 panic("hfs: ReleaseBitmapBlock: missing bp");
2047 return (0);
2048 }
2049
2050 if (bp) {
2051 if (dirty) {
2052 hfs_block_alloc_flags_t flags = (uintptr_t)buf_fsprivate(bp);
2053
2054 if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED))
2055 panic("Modified read-only bitmap buffer!");
2056
2057 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2058
2059 if (hfsmp->jnl) {
2060 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
2061 } else {
2062 buf_bdwrite(bp);
2063 }
2064 } else {
2065 buf_brelse(bp);
2066 }
2067 }
2068
2069 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2070 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0);
2071
2072 return (0);
2073 }
2074
2075 /*
2076 * ReleaseScanBitmapRange
2077 *
2078 * This is used to release struct bufs that were created for use by
2079 * bitmap scanning code. Because they may be of sizes different than the
2080 * typical runtime manipulation code, we want to force them to be purged out
2081 * of the buffer cache ASAP, so we'll release them differently than in the
2082 * ReleaseBitmapBlock case.
2083 *
2084 * Additionally, because we know that we're only reading the blocks and that they
2085 * should have been clean prior to reading them, we will never
2086 * issue a write to them (thus dirtying them).
2087 */
2088
2089 static OSErr ReleaseScanBitmapRange(struct buf *bp ) {
2090
2091 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
2092 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, 0, 0, 0, 0, 0);
2093 }
2094
2095 if (bp) {
2096 /* Mark the buffer invalid if it isn't locked, then release it */
2097 if ((buf_flags(bp) & B_LOCKED) == 0) {
2098 buf_markinvalid(bp);
2099 }
2100 buf_brelse(bp);
2101 }
2102
2103 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
2104 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_SCAN_BITMAP | DBG_FUNC_END, 0, 0, 0, 0, 0);
2105 }
2106
2107 return (0);
2108 }
2109
2110 /*
2111 * @extent.startBlock, on input, contains a preferred block for the
2112 * allocation. @extent.blockCount, on input, contains the minimum
2113 * number of blocks acceptable. Upon success, the result is conveyed
2114 * in @extent.
2115 */
2116 static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
2117 HFSPlusExtentDescriptor *extent,
2118 uint32_t max_blocks,
2119 hfs_block_alloc_flags_t flags)
2120 {
2121 OSErr err = dskFulErr;
2122
2123 const uint32_t min_blocks = extent->blockCount;
2124
2125 // It's > rather than >= because the last block is always reserved
2126 if (extent->startBlock > 0 && extent->startBlock < hfsmp->allocLimit
2127 && hfsmp->allocLimit - extent->startBlock > max_blocks) {
2128 /*
2129 * This is just checking to see if there's an extent starting
2130 * at extent->startBlock that will suit. We only check for
2131 * @max_blocks here; @min_blocks is ignored.
2132 */
2133
2134 err = BlockFindContiguous(hfsmp, extent->startBlock, extent->startBlock + max_blocks,
2135 max_blocks, max_blocks, true, true,
2136 &extent->startBlock, &extent->blockCount, flags);
2137
2138 if (err != dskFulErr)
2139 return err;
2140 }
2141
2142 err = BlockFindKnown(hfsmp, max_blocks, &extent->startBlock,
2143 &extent->blockCount);
2144
2145 if (!err) {
2146 if (extent->blockCount >= max_blocks)
2147 return 0;
2148 } else if (err != dskFulErr)
2149 return err;
2150
2151 // Try a more exhaustive search
2152 return BlockFindContiguous(hfsmp, 1, hfsmp->allocLimit,
2153 min_blocks, max_blocks,
2154 /* useMetaZone: */ true,
2155 /* trustSummary: */ true,
2156 &extent->startBlock, &extent->blockCount, flags);
2157 }
2158
2159 /*
2160 _______________________________________________________________________
2161
2162 Routine: BlockFindContig
2163
2164 Function: Find a contiguous group of allocation blocks. If the
2165 minimum cannot be satisfied, nothing is returned. The
2166 caller guarantees that there are enough free blocks
2167 (though they may not be contiguous, in which case this
2168 call will fail).
2169
2170 Inputs:
2171 vcb Pointer to volume where space is to be allocated
2172 startingBlock Preferred first block for allocation
2173 minBlocks Minimum number of contiguous blocks to allocate
2174 maxBlocks Maximum number of contiguous blocks to allocate
2175 flags
2176
2177 Outputs:
2178 actualStartBlock First block of range allocated, or 0 if error
2179 actualNumBlocks Number of blocks allocated, or 0 if error
2180 _______________________________________________________________________
2181 */
2182 static OSErr BlockFindContig(
2183 ExtendedVCB *vcb,
2184 u_int32_t startingBlock,
2185 u_int32_t minBlocks,
2186 u_int32_t maxBlocks,
2187 hfs_block_alloc_flags_t flags,
2188 u_int32_t *actualStartBlock,
2189 u_int32_t *actualNumBlocks)
2190 {
2191 OSErr retval = noErr;
2192 uint32_t currentStart = startingBlock;
2193
2194 uint32_t foundStart = 0; // values to emit to caller
2195 uint32_t foundCount = 0;
2196
2197 uint32_t collision_start = 0; // if we have to re-allocate a recently deleted extent, use this
2198 uint32_t collision_count = 0;
2199
2200 int err;
2201 int allowReuse = (flags & HFS_ALLOC_FLUSHTXN);
2202 Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
2203
2204 int recently_deleted = 0;
2205 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2206
2207 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2208 KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
2209
2210 while ((retval == noErr) && (foundStart == 0) && (foundCount == 0)) {
2211
2212 /* Try and find something that works. */
2213
2214 /*
2215 * NOTE: If the only contiguous free extent of at least minBlocks
2216 * crosses startingBlock (i.e. starts before, ends after), then we
2217 * won't find it. Earlier versions *did* find this case by letting
2218 * the second search look past startingBlock by minBlocks. But
2219 * with the free extent cache, this can lead to duplicate entries
2220 * in the cache, causing the same blocks to be allocated twice.
2221 */
2222 retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks,
2223 maxBlocks, useMetaZone, true, &foundStart, &foundCount, flags);
2224
2225 if (retval == dskFulErr && currentStart != 0) {
2226 /*
2227 * We constrain the endingBlock so we don't bother looking for ranges
2228 * that would overlap those found in the previous call, if the summary bitmap
2229 * is not on for this volume. If it is, then we assume that it was not trust
2230 * -worthy and do a full scan.
2231 */
2232 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2233 retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks,
2234 maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
2235 }
2236 else {
2237 retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks,
2238 maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
2239 }
2240 }
2241
2242 if (retval != noErr) {
2243 goto bailout;
2244 }
2245
2246 /* Do we overlap with the recently found collision extent? */
2247 if (collision_start) {
2248 if (extents_overlap (foundStart, foundCount, collision_start, collision_count)) {
2249 /*
2250 * We've looped around, and the only thing we could use was the collision extent.
2251 * Since we are allowed to use it, go ahead and do so now.
2252 */
2253 if(allowReuse) {
2254 /*
2255 * then we couldn't find anything except values which might have been
2256 * recently deallocated. just return our cached value if we are allowed to.
2257 */
2258 foundStart = collision_start;
2259 foundCount = collision_count;
2260 goto bailout;
2261 }
2262 else {
2263 /* Otherwise, we looped around and couldn't find anything that wouldn't require a journal flush. */
2264 retval = dskFulErr;
2265 goto bailout;
2266 }
2267 }
2268 }
2269
2270 /* OK, we know we must not have collided . See if this one is recently deleted */
2271 if (hfsmp->jnl) {
2272 recently_deleted = 0;
2273 uint32_t nextStart;
2274 err = CheckUnmappedBytes (hfsmp, (uint64_t)foundStart,
2275 (uint64_t) foundCount, &recently_deleted, &nextStart);
2276 if (err == 0) {
2277 if(recently_deleted != 0) {
2278 /*
2279 * these blocks were recently deleted/deallocated. Cache the extent, but
2280 * but keep searching to see if we can find one that won't collide here.
2281 */
2282 if (collision_start == 0) {
2283 collision_start = foundStart;
2284 collision_count = foundCount;
2285 }
2286 recently_deleted = 0;
2287
2288 /*
2289 * advance currentStart to the point just past the overlap we just found. Note that
2290 * we will automatically loop around to start of the bitmap as needed.
2291 */
2292 currentStart = nextStart;
2293 /* Unset foundStart/Count to allow it to loop around again. */
2294 foundStart = 0;
2295 foundCount = 0;
2296 }
2297 }
2298 } // end jnl/deleted case
2299
2300 /*
2301 * If we found something good, we'd break out of the loop at the top; foundCount
2302 * and foundStart should be set.
2303 */
2304
2305 } // end while loop.
2306
2307 bailout:
2308
2309 if (retval == noErr) {
2310 *actualStartBlock = foundStart;
2311 *actualNumBlocks = foundCount;
2312 }
2313
2314 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2315 KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_END, foundStart, foundCount, retval, 0, 0);
2316
2317 return retval;
2318
2319 }
2320
2321
2322 /*
2323 _______________________________________________________________________
2324
2325 Routine: BlockFindAny
2326
2327 Function: Find one or more allocation blocks and may return fewer than
2328 requested. The caller guarantees that there is at least one
2329 free block.
2330
2331 Inputs:
2332 vcb Pointer to volume where space is to be allocated
2333 startingBlock Preferred first block for allocation
2334 endingBlock Last block to check + 1
2335 maxBlocks Maximum number of contiguous blocks to allocate
2336 useMetaZone
2337
2338 Outputs:
2339 actualStartBlock First block of range allocated, or 0 if error
2340 actualNumBlocks Number of blocks allocated, or 0 if error
2341 _______________________________________________________________________
2342 */
2343
2344 static OSErr BlockFindAny(
2345 ExtendedVCB *vcb,
2346 u_int32_t startingBlock,
2347 register u_int32_t endingBlock,
2348 u_int32_t maxBlocks,
2349 hfs_block_alloc_flags_t flags,
2350 Boolean trustSummary,
2351 u_int32_t *actualStartBlock,
2352 u_int32_t *actualNumBlocks)
2353 {
2354
2355 /*
2356 * If it is enabled, scan through the summary table to find the first free block.
2357 *
2358 * If it reports that there are not any free blocks, we could have a false
2359 * positive, so in that case, use the input arguments as a pass through.
2360 */
2361 uint32_t start_blk = startingBlock;
2362 uint32_t end_blk = endingBlock;
2363 struct hfsmount *hfsmp;
2364 OSErr err;
2365
2366 hfsmp = (struct hfsmount*)vcb;
2367 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2368 uint32_t suggested_start;
2369
2370 /*
2371 * If the summary table is enabled, scan through it to find the first free
2372 * block. If there was an error, or we couldn't find anything free in the
2373 * summary table, then just leave the start_blk fields unmodified. We wouldn't
2374 * have gotten to this point if the mount point made it look like there was possibly
2375 * free space in the FS.
2376 */
2377 err = hfs_find_summary_free (hfsmp, startingBlock, &suggested_start);
2378 if (err == 0) {
2379 start_blk = suggested_start;
2380 }
2381 else {
2382 /* Differentiate between ENOSPC and a more esoteric error in the above call. */
2383 if ((err == ENOSPC) && (trustSummary)) {
2384 /*
2385 * The 'trustSummary' argument is for doing a full scan if we really
2386 * really, need the space and we think it's somewhere but can't find it in the
2387 * summary table. If it's true, then we trust the summary table and return
2388 * dskFulErr if we couldn't find it above.
2389 */
2390 return dskFulErr;
2391 }
2392 /*
2393 * If either trustSummary was false or we got a different errno, then we
2394 * want to fall through to the real bitmap single i/o code...
2395 */
2396 }
2397 }
2398
2399 err = BlockFindAnyBitmap(vcb, start_blk, end_blk, maxBlocks,
2400 flags, actualStartBlock, actualNumBlocks);
2401
2402 return err;
2403 }
2404
2405
2406 /*
2407 * BlockFindAnyBitmap finds free ranges by scanning the bitmap to
2408 * figure out where the free allocation blocks are. Inputs and
2409 * outputs are the same as for BlockFindAny.
2410 */
2411
2412 static OSErr BlockFindAnyBitmap(
2413 ExtendedVCB *vcb,
2414 u_int32_t startingBlock,
2415 register u_int32_t endingBlock,
2416 u_int32_t maxBlocks,
2417 hfs_block_alloc_flags_t flags,
2418 u_int32_t *actualStartBlock,
2419 u_int32_t *actualNumBlocks)
2420 {
2421 OSErr err;
2422 register u_int32_t block = 0; // current block number
2423 register u_int32_t currentWord; // Pointer to current word within bitmap block
2424 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
2425 register u_int32_t wordsLeft; // Number of words left in this bitmap block
2426 u_int32_t *buffer = NULL;
2427 u_int32_t *currCache = NULL;
2428 uintptr_t blockRef = 0;
2429 u_int32_t bitsPerBlock;
2430 u_int32_t wordsPerBlock;
2431 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2432 Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
2433 Boolean forceFlush = (flags & HFS_ALLOC_FLUSHTXN);
2434
2435 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2436 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);
2437
2438 restartSearchAny:
2439
2440 /*
2441 * When we're skipping the metadata zone and the start/end
2442 * range overlaps with the metadata zone then adjust the
2443 * start to be outside of the metadata zone. If the range
2444 * is entirely inside the metadata zone then we can deny the
2445 * request (dskFulErr).
2446 */
2447 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
2448 if (startingBlock <= vcb->hfs_metazone_end) {
2449 if (endingBlock > (vcb->hfs_metazone_end + 2))
2450 startingBlock = vcb->hfs_metazone_end + 1;
2451 else {
2452 err = dskFulErr;
2453 goto Exit;
2454 }
2455 }
2456 }
2457
2458 // Since this routine doesn't wrap around
2459 if (maxBlocks > (endingBlock - startingBlock)) {
2460 maxBlocks = endingBlock - startingBlock;
2461 }
2462
2463 //
2464 // Pre-read the first bitmap block
2465 //
2466 err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef, flags);
2467 if (err != noErr) goto Exit;
2468 buffer = currCache;
2469
2470 //
2471 // Set up the current position within the block
2472 //
2473 {
2474 u_int32_t wordIndexInBlock;
2475
2476 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
2477 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2478
2479 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2480 buffer += wordIndexInBlock;
2481 wordsLeft = wordsPerBlock - wordIndexInBlock;
2482 currentWord = SWAP_BE32 (*buffer);
2483 bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
2484 }
2485
2486 /*
2487 * While loop 1:
2488 * Find the first unallocated block starting at 'block'
2489 */
2490 uint32_t summary_block_scan = 0;
2491
2492 block=startingBlock;
2493 while (block < endingBlock) {
2494 if ((currentWord & bitMask) == 0)
2495 break;
2496
2497 // Next bit
2498 ++block;
2499 bitMask >>= 1;
2500 if (bitMask == 0) {
2501 // Next word
2502 bitMask = kHighBitInWordMask;
2503 ++buffer;
2504
2505 if (--wordsLeft == 0) {
2506 // Next block
2507 buffer = currCache = NULL;
2508 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2509 /*
2510 * If summary_block_scan is non-zero, then we must have
2511 * pulled a bitmap file block into core, and scanned through
2512 * the entire thing. Because we're in this loop, we are
2513 * implicitly trusting that the bitmap didn't have any knowledge
2514 * about this particular block. As a result, update the bitmap
2515 * (lazily, now that we've scanned it) with our findings that
2516 * this particular block is completely used up.
2517 */
2518 if (summary_block_scan != 0) {
2519 uint32_t summary_bit;
2520 (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
2521 hfs_set_summary (hfsmp, summary_bit, 1);
2522 }
2523 }
2524
2525 err = ReleaseBitmapBlock(vcb, blockRef, false);
2526 if (err != noErr) goto Exit;
2527
2528 /*
2529 * Skip over metadata blocks.
2530 */
2531 if (!useMetaZone) {
2532 block = NextBitmapBlock(vcb, block);
2533 }
2534 if (block >= endingBlock) {
2535 err = dskFulErr;
2536 goto Exit;
2537 }
2538
2539 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
2540 if (err != noErr) goto Exit;
2541 buffer = currCache;
2542 summary_block_scan = block;
2543 wordsLeft = wordsPerBlock;
2544 }
2545 currentWord = SWAP_BE32 (*buffer);
2546 }
2547 }
2548
2549 // Did we get to the end of the bitmap before finding a free block?
2550 // If so, then couldn't allocate anything.
2551 if (block >= endingBlock) {
2552 err = dskFulErr;
2553 goto Exit;
2554 }
2555
2556
2557 /*
2558 * Don't move forward just yet. Verify that either one of the following
2559 * two conditions is true:
2560 * 1) journaling is not enabled
2561 * 2) block is not currently on any pending TRIM list.
2562 */
2563 if (hfsmp->jnl != NULL && (forceFlush == false)) {
2564 int recently_deleted = 0;
2565 uint32_t nextblk;
2566 err = CheckUnmappedBytes (hfsmp, (uint64_t) block, 1, &recently_deleted, &nextblk);
2567 if ((err == 0) && (recently_deleted)) {
2568
2569 /* release the bitmap block & unset currCache. we may jump past it. */
2570 err = ReleaseBitmapBlock(vcb, blockRef, false);
2571 currCache = NULL;
2572 if (err != noErr) {
2573 goto Exit;
2574 }
2575 /* set our start to nextblk, and re-do the search. */
2576 startingBlock = nextblk;
2577 goto restartSearchAny;
2578 }
2579 }
2580
2581
2582 // Return the first block in the allocated range
2583 *actualStartBlock = block;
2584
2585 // If we could get the desired number of blocks before hitting endingBlock,
2586 // then adjust endingBlock so we won't keep looking. Ideally, the comparison
2587 // would be (block + maxBlocks) < endingBlock, but that could overflow. The
2588 // comparison below yields identical results, but without overflow.
2589 if (block < (endingBlock-maxBlocks)) {
2590 endingBlock = block + maxBlocks; // if we get this far, we've found enough
2591 }
2592
2593 /*
2594 * While loop 2:
2595 * Scan the bitmap, starting at 'currentWord' in the current
2596 * bitmap block. Continue iterating through the bitmap until
2597 * either we hit an allocated block, or until we have accumuluated
2598 * maxBlocks worth of bitmap.
2599 */
2600
2601 /* Continue until we see an allocated block */
2602 while ((currentWord & bitMask) == 0) {
2603 // Move to the next block. If no more, then exit.
2604 ++block;
2605 if (block == endingBlock) {
2606 break;
2607 }
2608
2609 // Next bit
2610 bitMask >>= 1;
2611 if (bitMask == 0) {
2612 // Next word
2613 bitMask = kHighBitInWordMask;
2614 ++buffer;
2615
2616 if (--wordsLeft == 0) {
2617 // Next block
2618 buffer = currCache = NULL;
2619
2620 /* We're only reading the bitmap here, so mark it as clean */
2621 err = ReleaseBitmapBlock(vcb, blockRef, false);
2622 if (err != noErr) {
2623 goto Exit;
2624 }
2625
2626 /*
2627 * Skip over metadata blocks.
2628 */
2629 if (!useMetaZone) {
2630 u_int32_t nextBlock;
2631 nextBlock = NextBitmapBlock(vcb, block);
2632 if (nextBlock != block) {
2633 goto Exit; /* allocation gap, so stop */
2634 }
2635 }
2636
2637 if (block >= endingBlock) {
2638 goto Exit;
2639 }
2640
2641 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
2642 if (err != noErr) {
2643 goto Exit;
2644 }
2645 buffer = currCache;
2646 wordsLeft = wordsPerBlock;
2647 }
2648 currentWord = SWAP_BE32 (*buffer);
2649 }
2650 }
2651
2652 Exit:
2653 if (currCache) {
2654 /* Release the bitmap reference prior to marking bits in-use */
2655 (void) ReleaseBitmapBlock(vcb, blockRef, false);
2656 currCache = NULL;
2657 }
2658
2659 if (err == noErr) {
2660 *actualNumBlocks = block - *actualStartBlock;
2661
2662 // sanity check
2663 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
2664 panic("hfs: BlockFindAnyBitmap: allocation overflow on \"%s\"", vcb->vcbVN);
2665 }
2666 }
2667 else {
2668 *actualStartBlock = 0;
2669 *actualNumBlocks = 0;
2670 }
2671
2672 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2673 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2674
2675 return err;
2676 }
2677
2678
2679 /*
2680 _______________________________________________________________________
2681
2682 Routine: BlockFindKnown
2683
2684 Function: Return a potential extent from the free extent cache. The
2685 returned extent *must* be marked allocated and removed
2686 from the cache by the *caller*.
2687
2688 Inputs:
2689 vcb Pointer to volume where space is to be allocated
2690 maxBlocks Maximum number of contiguous blocks to allocate
2691
2692 Outputs:
2693 actualStartBlock First block of range allocated, or 0 if error
2694 actualNumBlocks Number of blocks allocated, or 0 if error
2695
2696 Returns:
2697 dskFulErr Free extent cache is empty
2698 _______________________________________________________________________
2699 */
2700
2701 static OSErr BlockFindKnown(
2702 ExtendedVCB *vcb,
2703 u_int32_t maxBlocks,
2704 u_int32_t *actualStartBlock,
2705 u_int32_t *actualNumBlocks)
2706 {
2707 OSErr err;
2708 u_int32_t foundBlocks;
2709 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2710
2711 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2712 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
2713
2714 hfs_lock_mount (hfsmp);
2715 lck_spin_lock(&vcb->vcbFreeExtLock);
2716 if ( vcb->vcbFreeExtCnt == 0 ||
2717 vcb->vcbFreeExt[0].blockCount == 0) {
2718 lck_spin_unlock(&vcb->vcbFreeExtLock);
2719 hfs_unlock_mount(hfsmp);
2720 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2721 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
2722 return dskFulErr;
2723 }
2724 lck_spin_unlock(&vcb->vcbFreeExtLock);
2725 hfs_unlock_mount(hfsmp);
2726
2727 lck_spin_lock(&vcb->vcbFreeExtLock);
2728
2729 // Just grab up to maxBlocks of the first (largest) free exent.
2730 *actualStartBlock = vcb->vcbFreeExt[0].startBlock;
2731 foundBlocks = vcb->vcbFreeExt[0].blockCount;
2732 if (foundBlocks > maxBlocks)
2733 foundBlocks = maxBlocks;
2734 *actualNumBlocks = foundBlocks;
2735
2736 lck_spin_unlock(&vcb->vcbFreeExtLock);
2737
2738 // sanity check
2739 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
2740 {
2741 printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
2742 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
2743 err = EIO;
2744 } else
2745 err = 0;
2746
2747 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2748 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2749
2750 return err;
2751 }
2752
2753 /*
2754 * BlockMarkAllocated
2755 *
2756 * This is a wrapper function around the internal calls which will actually mark the blocks
2757 * as in-use. It will mark the blocks in the red-black tree if appropriate. We need to do
2758 * this logic here to avoid callers having to deal with whether or not the red-black tree
2759 * is enabled.
2760 */
2761
2762 OSErr BlockMarkAllocated(
2763 ExtendedVCB *vcb,
2764 u_int32_t startingBlock,
2765 register u_int32_t numBlocks)
2766 {
2767 return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks, 0);
2768 }
2769
2770
2771 /*
2772 _______________________________________________________________________
2773
2774 Routine: BlockMarkAllocatedInternal
2775
2776 Function: Mark a contiguous group of blocks as allocated (set in the
2777 bitmap). It assumes those bits are currently marked
2778 deallocated (clear in the bitmap). Note that this function
2779 must be called regardless of whether or not the bitmap or
2780 tree-based allocator is used, as all allocations must correctly
2781 be marked on-disk. If the tree-based approach is running, then
2782 this will be done before the node is removed from the tree.
2783
2784 Inputs:
2785 vcb Pointer to volume where space is to be allocated
2786 startingBlock First block number to mark as allocated
2787 numBlocks Number of blocks to mark as allocated
2788 _______________________________________________________________________
2789 */
2790 static
2791 OSErr BlockMarkAllocatedInternal (
2792 ExtendedVCB *vcb,
2793 u_int32_t startingBlock,
2794 u_int32_t numBlocks,
2795 hfs_block_alloc_flags_t flags)
2796 {
2797 OSErr err;
2798 register u_int32_t *currentWord; // Pointer to current word within bitmap block
2799 register u_int32_t wordsLeft; // Number of words left in this bitmap block
2800 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
2801 u_int32_t firstBit; // Bit index within word of first bit to allocate
2802 u_int32_t numBits; // Number of bits in word to allocate
2803 u_int32_t *buffer = NULL;
2804 uintptr_t blockRef = 0;
2805 u_int32_t bitsPerBlock;
2806 u_int32_t wordsPerBlock;
2807 // XXXdbg
2808 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2809
2810 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2811 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, flags, 0, 0);
2812
2813 #if DEBUG
2814
2815 if (!ISSET(flags, HFS_ALLOC_COMMIT)
2816 || ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
2817 struct rl_entry *range;
2818 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS], rl_link) {
2819 hfs_assert(rl_overlap(range, startingBlock,
2820 startingBlock + numBlocks - 1) == RL_NOOVERLAP);
2821 }
2822 }
2823
2824 #endif
2825
2826 int force_flush = 0;
2827 /*
2828 * Since we are about to mark these bits as in-use
2829 * in the bitmap, decide if we need to alert the caller
2830 * that a journal flush might be appropriate. It's safe to
2831 * poke at the journal pointer here since we MUST have
2832 * called start_transaction by the time this function is invoked.
2833 * If the journal is enabled, then it will have taken the requisite
2834 * journal locks. If it is not enabled, then we have taken
2835 * a shared lock on the global lock.
2836 */
2837 if (hfsmp->jnl) {
2838 uint32_t ignore;
2839 err = CheckUnmappedBytes (hfsmp, (uint64_t) startingBlock, (uint64_t)numBlocks, &force_flush, &ignore);
2840 if ((err == 0) && (force_flush)) {
2841 journal_request_immediate_flush (hfsmp->jnl);
2842 }
2843 }
2844
2845 hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
2846
2847 /*
2848 * Don't make changes to the disk if we're just reserving. Note that
2849 * we could do better in the tentative case because we could, in theory,
2850 * avoid the journal flush above. However, that would mean that we would
2851 * need to catch the callback to stop it incorrectly addding the extent
2852 * to our free cache.
2853 */
2854 if (ISSET(flags, HFS_ALLOC_LOCKED | HFS_ALLOC_TENTATIVE)) {
2855 err = 0;
2856 goto Exit;
2857 }
2858
2859 //
2860 // Pre-read the bitmap block containing the first word of allocation
2861 //
2862
2863 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2864 HFS_ALLOC_IGNORE_RESERVED);
2865 if (err != noErr) goto Exit;
2866 //
2867 // Initialize currentWord, and wordsLeft.
2868 //
2869 {
2870 u_int32_t wordIndexInBlock;
2871
2872 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
2873 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2874
2875 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2876 currentWord = buffer + wordIndexInBlock;
2877 wordsLeft = wordsPerBlock - wordIndexInBlock;
2878 }
2879
2880 // XXXdbg
2881 if (hfsmp->jnl) {
2882 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2883 }
2884
2885 //
2886 // If the first block to allocate doesn't start on a word
2887 // boundary in the bitmap, then treat that first word
2888 // specially.
2889 //
2890
2891 firstBit = startingBlock % kBitsPerWord;
2892 if (firstBit != 0) {
2893 bitMask = kAllBitsSetInWord >> firstBit; // turn off all bits before firstBit
2894 numBits = kBitsPerWord - firstBit; // number of remaining bits in this word
2895 if (numBits > numBlocks) {
2896 numBits = numBlocks; // entire allocation is inside this one word
2897 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); // turn off bits after last
2898 }
2899 #if DEBUG
2900 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2901 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2902 }
2903 #endif
2904 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
2905 numBlocks -= numBits; // adjust number of blocks left to allocate
2906
2907 ++currentWord; // move to next word
2908 --wordsLeft; // one less word left in this block
2909 }
2910
2911 //
2912 // Allocate whole words (32 blocks) at a time.
2913 //
2914
2915 bitMask = kAllBitsSetInWord; // put this in a register for 68K
2916 while (numBlocks >= kBitsPerWord) {
2917 if (wordsLeft == 0) {
2918 // Read in the next bitmap block
2919 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
2920
2921 buffer = NULL;
2922 err = ReleaseBitmapBlock(vcb, blockRef, true);
2923 if (err != noErr) goto Exit;
2924
2925 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2926 HFS_ALLOC_IGNORE_RESERVED);
2927 if (err != noErr) goto Exit;
2928
2929 // XXXdbg
2930 if (hfsmp->jnl) {
2931 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2932 }
2933
2934 // Readjust currentWord and wordsLeft
2935 currentWord = buffer;
2936 wordsLeft = wordsPerBlock;
2937 }
2938 #if DEBUG
2939 if (*currentWord != 0) {
2940 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2941 }
2942 #endif
2943 *currentWord = SWAP_BE32 (bitMask);
2944 numBlocks -= kBitsPerWord;
2945
2946 ++currentWord; // move to next word
2947 --wordsLeft; // one less word left in this block
2948 }
2949
2950 //
2951 // Allocate any remaining blocks.
2952 //
2953
2954 if (numBlocks != 0) {
2955 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
2956 if (wordsLeft == 0) {
2957 // Read in the next bitmap block
2958 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
2959
2960 buffer = NULL;
2961 err = ReleaseBitmapBlock(vcb, blockRef, true);
2962 if (err != noErr) goto Exit;
2963
2964 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2965 HFS_ALLOC_IGNORE_RESERVED);
2966 if (err != noErr) goto Exit;
2967
2968 // XXXdbg
2969 if (hfsmp->jnl) {
2970 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2971 }
2972
2973 currentWord = buffer;
2974 }
2975 #if DEBUG
2976 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2977 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2978 }
2979 #endif
2980 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
2981
2982 // No need to update currentWord or wordsLeft
2983 }
2984
2985 Exit:
2986
2987 if (buffer)
2988 (void)ReleaseBitmapBlock(vcb, blockRef, true);
2989
2990 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2991 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
2992
2993 return err;
2994 }
2995
2996
2997 /*
2998 * BlockMarkFree
2999 *
3000 * This is a wrapper function around the internal calls which will actually mark the blocks
3001 * as freed. It will mark the blocks in the red-black tree if appropriate. We need to do
3002 * this logic here to avoid callers having to deal with whether or not the red-black tree
3003 * is enabled.
3004 *
3005 */
3006 OSErr BlockMarkFree(
3007 ExtendedVCB *vcb,
3008 u_int32_t startingBlock,
3009 register u_int32_t numBlocks)
3010 {
3011 return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
3012 }
3013
3014
3015 /*
3016 * BlockMarkFreeUnused
3017 *
3018 * Scan the bitmap block beyond end of current file system for bits
3019 * that are marked as used. If any of the bits are marked as used,
3020 * this function marks them free.
3021 *
3022 * Note: This was specifically written to mark all bits beyond
3023 * end of current file system during hfs_extendfs(), which makes
3024 * sure that all the new blocks added to the file system are
3025 * marked as free. We expect that all the blocks beyond end of
3026 * current file system are always marked as free, but there might
3027 * be cases where are marked as used. This function assumes that
3028 * the number of blocks marked as used incorrectly are relatively
3029 * small, otherwise this can overflow journal transaction size
3030 * on certain file system configurations (example, large unused
3031 * bitmap with relatively small journal).
3032 *
3033 * Input:
3034 * startingBlock: First block of the range to mark unused
3035 * numBlocks: Number of blocks in the range to mark unused
3036 *
3037 * Returns: zero on success, non-zero on error.
3038 */
3039 OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t numBlocks)
3040 {
3041 int error = 0;
3042 struct hfsmount *hfsmp = VCBTOHFS(vcb);
3043 u_int32_t curNumBlocks;
3044 u_int32_t bitsPerBlock;
3045 u_int32_t lastBit;
3046
3047 /* Use the optimal bitmap I/O size instead of bitmap block size */
3048 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
3049
3050 /*
3051 * First clear any non bitmap allocation block aligned bits
3052 *
3053 * Calculate the first bit in the bitmap block next to
3054 * the bitmap block containing the bit for startingBlock.
3055 * Using this value, we calculate the total number of
3056 * bits to be marked unused from startingBlock to the
3057 * end of bitmap block containing startingBlock.
3058 */
3059 lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
3060 curNumBlocks = lastBit - startingBlock;
3061 if (curNumBlocks > numBlocks) {
3062 curNumBlocks = numBlocks;
3063 }
3064 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
3065 if (error) {
3066 return error;
3067 }
3068 startingBlock += curNumBlocks;
3069 numBlocks -= curNumBlocks;
3070
3071 /*
3072 * Check a full bitmap block for any 'used' bit. If any bit is used,
3073 * mark all the bits only in that bitmap block as free. This ensures
3074 * that we do not write unmodified bitmap blocks and do not
3075 * overwhelm the journal.
3076 *
3077 * The code starts by checking full bitmap block at a time, and
3078 * marks entire bitmap block as free only if any bit in that bitmap
3079 * block is marked as used. In the end, it handles the last bitmap
3080 * block which might be partially full by only checking till the
3081 * caller-specified last bit and if any bit is set, only mark that
3082 * range as free.
3083 */
3084 while (numBlocks) {
3085 if (numBlocks >= bitsPerBlock) {
3086 curNumBlocks = bitsPerBlock;
3087 } else {
3088 curNumBlocks = numBlocks;
3089 }
3090 if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
3091 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
3092 if (error) {
3093 return error;
3094 }
3095 }
3096 startingBlock += curNumBlocks;
3097 numBlocks -= curNumBlocks;
3098 }
3099
3100 return error;
3101 }
3102
3103 /*
3104 _______________________________________________________________________
3105
3106 Routine: BlockMarkFreeInternal
3107
3108 Function: Mark a contiguous group of blocks as free (clear in the
3109 bitmap). It assumes those bits are currently marked
3110 allocated (set in the bitmap).
3111
3112 Inputs:
3113 vcb Pointer to volume where space is to be freed
3114 startingBlock First block number to mark as freed
3115 numBlocks Number of blocks to mark as freed
3116 do_validate If true, validate that the blocks being
3117 deallocated to check if they are within totalBlocks
3118 for current volume and whether they were allocated
3119 before they are marked free.
3120 _______________________________________________________________________
3121 */
3122 static
3123 OSErr BlockMarkFreeInternal(
3124 ExtendedVCB *vcb,
3125 u_int32_t startingBlock_in,
3126 register u_int32_t numBlocks_in,
3127 Boolean do_validate)
3128 {
3129 OSErr err;
3130 u_int32_t startingBlock = startingBlock_in;
3131 u_int32_t numBlocks = numBlocks_in;
3132 uint32_t unmapStart = startingBlock_in;
3133 uint32_t unmapCount = numBlocks_in;
3134 uint32_t wordIndexInBlock;
3135 u_int32_t *currentWord; // Pointer to current word within bitmap block
3136 u_int32_t wordsLeft; // Number of words left in this bitmap block
3137 u_int32_t bitMask; // Word with given bits already set (ready to OR in)
3138 u_int32_t currentBit; // Bit index within word of current bit to allocate
3139 u_int32_t numBits; // Number of bits in word to allocate
3140 u_int32_t *buffer = NULL;
3141 uintptr_t blockRef = 0;
3142 u_int32_t bitsPerBlock;
3143 u_int32_t wordsPerBlock;
3144 // XXXdbg
3145 struct hfsmount *hfsmp = VCBTOHFS(vcb);
3146
3147 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3148 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);
3149
3150 /*
3151 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
3152 * need to be able to free blocks being relocated during hfs_truncatefs.
3153 */
3154 if ((do_validate == true) &&
3155 (startingBlock + numBlocks > vcb->totalBlocks)) {
3156 #if ALLOC_DEBUG || DEBUG
3157 panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
3158 __builtin_unreachable();
3159 #else
3160 printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
3161 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
3162 err = EIO;
3163 goto Exit;
3164 #endif
3165 }
3166
3167 //
3168 // Pre-read the bitmap block containing the first word of allocation
3169 //
3170
3171 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
3172 HFS_ALLOC_IGNORE_RESERVED);
3173 if (err != noErr) goto Exit;
3174 // XXXdbg
3175 if (hfsmp->jnl) {
3176 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
3177 }
3178
3179 uint32_t min_unmap = 0, max_unmap = UINT32_MAX;
3180
3181 // Work out the bounds of any unmap we can send down
3182 struct rl_entry *range;
3183 for (int i = 0; i < 2; ++i) {
3184 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
3185 if (range->rl_start < startingBlock
3186 && range->rl_end >= min_unmap) {
3187 min_unmap = range->rl_end + 1;
3188 }
3189 if (range->rl_end >= startingBlock + numBlocks
3190 && range->rl_start < max_unmap) {
3191 max_unmap = range->rl_start;
3192 }
3193 }
3194 }
3195
3196 //
3197 // Figure out how many bits and words per bitmap block.
3198 //
3199 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
3200 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
3201 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3202
3203 //
3204 // Look for a range of free blocks immediately before startingBlock
3205 // (up to the start of the current bitmap block). Set unmapStart to
3206 // the first free block.
3207 //
3208 currentWord = buffer + wordIndexInBlock;
3209 currentBit = startingBlock % kBitsPerWord;
3210 bitMask = kHighBitInWordMask >> currentBit;
3211 while (unmapStart > min_unmap) {
3212 // Move currentWord/bitMask back by one bit
3213 bitMask <<= 1;
3214 if (bitMask == 0) {
3215 if (--currentWord < buffer)
3216 break;
3217 bitMask = kLowBitInWordMask;
3218 }
3219
3220 if (*currentWord & SWAP_BE32(bitMask))
3221 break; // Found an allocated block. Stop searching.
3222 --unmapStart;
3223 ++unmapCount;
3224 }
3225
3226 //
3227 // If the first block to free doesn't start on a word
3228 // boundary in the bitmap, then treat that first word
3229 // specially.
3230 //
3231
3232 currentWord = buffer + wordIndexInBlock;
3233 wordsLeft = wordsPerBlock - wordIndexInBlock;
3234 currentBit = startingBlock % kBitsPerWord;
3235 if (currentBit != 0) {
3236 bitMask = kAllBitsSetInWord >> currentBit; // turn off all bits before currentBit
3237 numBits = kBitsPerWord - currentBit; // number of remaining bits in this word
3238 if (numBits > numBlocks) {
3239 numBits = numBlocks; // entire allocation is inside this one word
3240 bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits)); // turn off bits after last
3241 }
3242 if ((do_validate == true) &&
3243 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
3244 goto Corruption;
3245 }
3246 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
3247 numBlocks -= numBits; // adjust number of blocks left to free
3248
3249 ++currentWord; // move to next word
3250 --wordsLeft; // one less word left in this block
3251 }
3252
3253 //
3254 // Free whole words (32 blocks) at a time.
3255 //
3256
3257 while (numBlocks >= kBitsPerWord) {
3258 if (wordsLeft == 0) {
3259 // Read in the next bitmap block
3260 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
3261
3262 buffer = NULL;
3263 err = ReleaseBitmapBlock(vcb, blockRef, true);
3264 if (err != noErr) goto Exit;
3265
3266 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
3267 HFS_ALLOC_IGNORE_RESERVED);
3268 if (err != noErr) goto Exit;
3269
3270 // XXXdbg
3271 if (hfsmp->jnl) {
3272 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
3273 }
3274
3275 // Readjust currentWord and wordsLeft
3276 currentWord = buffer;
3277 wordsLeft = wordsPerBlock;
3278 }
3279 if ((do_validate == true) &&
3280 (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
3281 goto Corruption;
3282 }
3283 *currentWord = 0; // clear the entire word
3284 numBlocks -= kBitsPerWord;
3285
3286 ++currentWord; // move to next word
3287 --wordsLeft; // one less word left in this block
3288 }
3289
3290 //
3291 // Free any remaining blocks.
3292 //
3293
3294 if (numBlocks != 0) {
3295 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
3296 if (wordsLeft == 0) {
3297 // Read in the next bitmap block
3298 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
3299
3300 buffer = NULL;
3301 err = ReleaseBitmapBlock(vcb, blockRef, true);
3302 if (err != noErr) goto Exit;
3303
3304 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
3305 HFS_ALLOC_IGNORE_RESERVED);
3306 if (err != noErr) goto Exit;
3307
3308 // XXXdbg
3309 if (hfsmp->jnl) {
3310 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
3311 }
3312
3313 currentWord = buffer;
3314 }
3315 if ((do_validate == true) &&
3316 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
3317 goto Corruption;
3318 }
3319 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
3320
3321 // No need to update currentWord or wordsLeft
3322 }
3323
3324 //
3325 // Look for a range of free blocks immediately after the range we just freed
3326 // (up to the end of the current bitmap block).
3327 //
3328 wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord;
3329 wordsLeft = wordsPerBlock - wordIndexInBlock;
3330 currentWord = buffer + wordIndexInBlock;
3331 currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord;
3332 bitMask = kHighBitInWordMask >> currentBit;
3333 while (unmapStart + unmapCount < max_unmap) {
3334 // Move currentWord/bitMask/wordsLeft forward one bit
3335 bitMask >>= 1;
3336 if (bitMask == 0) {
3337 if (--wordsLeft == 0)
3338 break;
3339 ++currentWord;
3340 bitMask = kHighBitInWordMask;
3341 }
3342
3343 if (*currentWord & SWAP_BE32(bitMask))
3344 break; // Found an allocated block. Stop searching.
3345 ++unmapCount;
3346 }
3347
3348 Exit:
3349
3350 if (buffer)
3351 (void)ReleaseBitmapBlock(vcb, blockRef, true);
3352
3353 if (err == noErr) {
3354 hfs_unmap_free_extent(vcb, unmapStart, unmapCount);
3355 }
3356
3357 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3358 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
3359
3360 return err;
3361
3362 Corruption:
3363 #if DEBUG
3364 panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
3365 __builtin_unreachable();
3366 #else
3367 printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s <%u, %u>\n",
3368 vcb->vcbVN, startingBlock_in, numBlocks_in);
3369 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
3370 err = EIO;
3371 goto Exit;
3372 #endif
3373 }
3374
3375
3376 /*
3377 _______________________________________________________________________
3378
3379 Routine: BlockFindContiguous
3380
3381 Function: Find a contiguous range of blocks that are free (bits
3382 clear in the bitmap). If a contiguous range of the
3383 minimum size can't be found, an error will be returned.
3384 This is only needed to support the bitmap-scanning logic,
3385 as the red-black tree should be able to do this by internally
3386 searching its tree.
3387
3388 Inputs:
3389 vcb Pointer to volume where space is to be allocated
3390 startingBlock Preferred first block of range
3391 endingBlock Last possible block in range + 1
3392 minBlocks Minimum number of blocks needed. Must be > 0.
3393 maxBlocks Maximum (ideal) number of blocks desired
3394 useMetaZone OK to dip into metadata allocation zone
3395
3396 Outputs:
3397 actualStartBlock First block of range found, or 0 if error
3398 actualNumBlocks Number of blocks found, or 0 if error
3399
3400 Returns:
3401 noErr Found at least minBlocks contiguous
3402 dskFulErr No contiguous space found, or all less than minBlocks
3403 _______________________________________________________________________
3404 */
3405
3406 static OSErr BlockFindContiguous(
3407 ExtendedVCB *vcb,
3408 u_int32_t startingBlock,
3409 u_int32_t endingBlock,
3410 u_int32_t minBlocks,
3411 u_int32_t maxBlocks,
3412 Boolean useMetaZone,
3413 Boolean trustSummary,
3414 u_int32_t *actualStartBlock,
3415 u_int32_t *actualNumBlocks,
3416 hfs_block_alloc_flags_t flags)
3417 {
3418 OSErr err;
3419 register u_int32_t currentBlock; // Block we're currently looking at.
3420 u_int32_t firstBlock; // First free block in current extent.
3421 u_int32_t stopBlock; // If we get to this block, stop searching for first free block.
3422 u_int32_t foundBlocks; // Number of contiguous free blocks in current extent.
3423 u_int32_t *buffer = NULL;
3424 register u_int32_t *currentWord;
3425 register u_int32_t bitMask;
3426 register u_int32_t wordsLeft;
3427 register u_int32_t tempWord;
3428 uintptr_t blockRef = 0;
3429 u_int32_t wordsPerBlock;
3430 u_int32_t updated_free_extent = 0;
3431 struct hfsmount *hfsmp = (struct hfsmount*) vcb;
3432 HFSPlusExtentDescriptor best = { 0, 0 };
3433
3434 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
3435 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);
3436
3437 /*
3438 * When we're skipping the metadata zone and the start/end
3439 * range overlaps with the metadata zone then adjust the
3440 * start to be outside of the metadata zone. If the range
3441 * is entirely inside the metadata zone then we can deny the
3442 * request (dskFulErr).
3443 */
3444 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
3445 if (startingBlock <= vcb->hfs_metazone_end) {
3446 if (endingBlock > (vcb->hfs_metazone_end + 2))
3447 startingBlock = vcb->hfs_metazone_end + 1;
3448 else
3449 goto DiskFull;
3450 }
3451 }
3452
3453 if ((endingBlock - startingBlock) < minBlocks)
3454 {
3455 // The set of blocks we're checking is smaller than the minimum number
3456 // of blocks, so we couldn't possibly find a good range.
3457 goto DiskFull;
3458 }
3459
3460 stopBlock = endingBlock - minBlocks + 1;
3461 currentBlock = startingBlock;
3462 firstBlock = 0;
3463
3464 /*
3465 * Skip over metadata blocks.
3466 */
3467 if (!useMetaZone)
3468 currentBlock = NextBitmapBlock(vcb, currentBlock);
3469
3470 /*
3471 * Use the summary table if we can. Skip over any totally
3472 * allocated blocks. currentBlock should now point to the first
3473 * block beyond the metadata zone if the metazone allocations are not
3474 * allowed in this invocation.
3475 */
3476 if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
3477 uint32_t suggestion;
3478 err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
3479 if (err && err != ENOSPC)
3480 goto ErrorExit;
3481 if (err == ENOSPC || suggestion >= stopBlock)
3482 goto DiskFull;
3483 currentBlock = suggestion;
3484 }
3485
3486
3487 //
3488 // Pre-read the first bitmap block.
3489 //
3490 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
3491 if ( err != noErr ) goto ErrorExit;
3492
3493 //
3494 // Figure out where currentBlock is within the buffer.
3495 //
3496 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
3497
3498 wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1); // Current index into buffer
3499 currentWord = buffer + wordsLeft;
3500 wordsLeft = wordsPerBlock - wordsLeft;
3501
3502 uint32_t remaining = (hfsmp->freeBlocks - hfsmp->lockedBlocks
3503 - (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
3504 ? 0 : hfsmp->tentativeBlocks));
3505
3506 /*
3507 * This outer do-while loop is the main body of this function. Its job is
3508 * to search through the blocks (until we hit 'stopBlock'), and iterate
3509 * through swaths of allocated bitmap until it finds free regions.
3510 */
3511
3512 do
3513 {
3514 foundBlocks = 0;
3515 /*
3516 * We will try and update the summary table as we search
3517 * below. Note that we will never update the summary table
3518 * for the first and last blocks that the summary table
3519 * covers. Ideally, we should, but the benefits probably
3520 * aren't that significant so we leave things alone for now.
3521 */
3522 uint32_t summary_block_scan = 0;
3523 /*
3524 * Inner while loop 1:
3525 * Look for free blocks, skipping over allocated ones.
3526 *
3527 * Initialization starts with checking the initial partial word
3528 * if applicable.
3529 */
3530 bitMask = currentBlock & kBitsWithinWordMask;
3531 if (bitMask)
3532 {
3533 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
3534 bitMask = kHighBitInWordMask >> bitMask;
3535 while (tempWord & bitMask)
3536 {
3537 bitMask >>= 1;
3538 ++currentBlock;
3539 }
3540
3541 // Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)?
3542 if (bitMask)
3543 goto FoundUnused;
3544
3545 // Didn't find any unused bits, so we're done with this word.
3546 ++currentWord;
3547 --wordsLeft;
3548 }
3549
3550 //
3551 // Check whole words
3552 //
3553 while (currentBlock < stopBlock)
3554 {
3555 // See if it's time to read another block.
3556 if (wordsLeft == 0)
3557 {
3558 buffer = NULL;
3559 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3560 /*
3561 * If summary_block_scan is non-zero, then we must have
3562 * pulled a bitmap file block into core, and scanned through
3563 * the entire thing. Because we're in this loop, we are
3564 * implicitly trusting that the bitmap didn't have any knowledge
3565 * about this particular block. As a result, update the bitmap
3566 * (lazily, now that we've scanned it) with our findings that
3567 * this particular block is completely used up.
3568 */
3569 if (summary_block_scan != 0) {
3570 uint32_t summary_bit;
3571 (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
3572 hfs_set_summary (hfsmp, summary_bit, 1);
3573 }
3574 }
3575 err = ReleaseBitmapBlock(vcb, blockRef, false);
3576 if (err != noErr) goto ErrorExit;
3577
3578 /*
3579 * Skip over metadata blocks.
3580 */
3581 if (!useMetaZone) {
3582 currentBlock = NextBitmapBlock(vcb, currentBlock);
3583 if (currentBlock >= stopBlock) {
3584 goto LoopExit;
3585 }
3586 }
3587
3588 /* Skip over fully allocated bitmap blocks if we can */
3589 if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
3590 uint32_t suggestion;
3591 err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
3592 if (err && err != ENOSPC)
3593 goto ErrorExit;
3594 if (err == ENOSPC || suggestion >= stopBlock)
3595 goto LoopExit;
3596 currentBlock = suggestion;
3597 }
3598
3599 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
3600 if ( err != noErr ) goto ErrorExit;
3601
3602 /*
3603 * Set summary_block_scan to be the block we just read into the block cache.
3604 *
3605 * At this point, we've just read an allocation block worth of bitmap file
3606 * into the buffer above, but we don't know if it is completely allocated or not.
3607 * If we find that it is completely allocated/full then we will jump
3608 * through this loop again and set the appropriate summary bit as fully allocated.
3609 */
3610 summary_block_scan = currentBlock;
3611 currentWord = buffer;
3612 wordsLeft = wordsPerBlock;
3613 }
3614
3615 // See if any of the bits are clear
3616 if ((tempWord = SWAP_BE32(*currentWord)) + 1) // non-zero if any bits were clear
3617 {
3618 // Figure out which bit is clear
3619 bitMask = kHighBitInWordMask;
3620 while (tempWord & bitMask)
3621 {
3622 bitMask >>= 1;
3623 ++currentBlock;
3624 }
3625
3626 break; // Found the free bit; break out to FoundUnused.
3627 }
3628
3629 // Keep looking at the next word
3630 currentBlock += kBitsPerWord;
3631 ++currentWord;
3632 --wordsLeft;
3633 }
3634
3635 FoundUnused:
3636 // Make sure the unused bit is early enough to use
3637 if (currentBlock >= stopBlock)
3638 {
3639 break;
3640 }
3641
3642 // Remember the start of the extent
3643 firstBlock = currentBlock;
3644
3645
3646 /*
3647 * Inner while loop 2:
3648 * We get here if we find a free block. Count the number
3649 * of contiguous free blocks observed.
3650 *
3651 * Initialization starts with checking the initial partial word
3652 * if applicable.
3653 */
3654 bitMask = currentBlock & kBitsWithinWordMask;
3655 if (bitMask)
3656 {
3657 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
3658 bitMask = kHighBitInWordMask >> bitMask;
3659 while (bitMask && !(tempWord & bitMask))
3660 {
3661 bitMask >>= 1;
3662 ++currentBlock;
3663 }
3664
3665 // Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)?
3666 if (bitMask)
3667 goto FoundUsed;
3668
3669 // Didn't find any used bits, so we're done with this word.
3670 ++currentWord;
3671 --wordsLeft;
3672 }
3673
3674 //
3675 // Check whole words
3676 //
3677 while (currentBlock < endingBlock)
3678 {
3679 // See if it's time to read another block.
3680 if (wordsLeft == 0)
3681 {
3682 buffer = NULL;
3683 err = ReleaseBitmapBlock(vcb, blockRef, false);
3684 if (err != noErr) goto ErrorExit;
3685
3686 /*
3687 * Skip over metadata blocks.
3688 */
3689 if (!useMetaZone) {
3690 u_int32_t nextBlock;
3691
3692 nextBlock = NextBitmapBlock(vcb, currentBlock);
3693 if (nextBlock != currentBlock) {
3694 goto LoopExit; /* allocation gap, so stop */
3695 }
3696 }
3697
3698 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
3699 if ( err != noErr ) goto ErrorExit;
3700
3701 currentWord = buffer;
3702 wordsLeft = wordsPerBlock;
3703 }
3704
3705 // See if any of the bits are set
3706 if ((tempWord = SWAP_BE32(*currentWord)) != 0)
3707 {
3708 // Figure out which bit is set
3709 bitMask = kHighBitInWordMask;
3710 while (!(tempWord & bitMask))
3711 {
3712 bitMask >>= 1;
3713 ++currentBlock;
3714 }
3715
3716 break; // Found the used bit; break out to FoundUsed.
3717 }
3718
3719 // Keep looking at the next word
3720 currentBlock += kBitsPerWord;
3721 ++currentWord;
3722 --wordsLeft;
3723
3724 // If we found at least maxBlocks, we can quit early.
3725 if ((currentBlock - firstBlock) >= maxBlocks)
3726 break;
3727 }
3728
3729 FoundUsed:
3730 // Make sure we didn't run out of bitmap looking for a used block.
3731 // If so, pin to the end of the bitmap.
3732 if (currentBlock > endingBlock)
3733 currentBlock = endingBlock;
3734
3735 // Figure out how many contiguous free blocks there were.
3736 // Pin the answer to maxBlocks.
3737 foundBlocks = currentBlock - firstBlock;
3738 if (foundBlocks > maxBlocks)
3739 foundBlocks = maxBlocks;
3740
3741 if (remaining) {
3742 if (foundBlocks > remaining) {
3743 hfs_debug("hfs: found more blocks than are indicated free!\n");
3744 remaining = UINT32_MAX;
3745 } else
3746 remaining -= foundBlocks;
3747 }
3748
3749 if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
3750 if (foundBlocks > best.blockCount) {
3751 best.startBlock = firstBlock;
3752 best.blockCount = foundBlocks;
3753 }
3754
3755 if (foundBlocks >= maxBlocks || best.blockCount >= remaining)
3756 break;
3757
3758 /*
3759 * Note that we will go ahead and add this free extent to our
3760 * cache below but that's OK because we'll remove it again if we
3761 * decide to use this extent.
3762 */
3763 } else if (foundBlocks >= minBlocks)
3764 break; // Found what we needed!
3765
3766 /*
3767 * We did not find the total blocks we were looking for, but
3768 * add this free block run to our free extent cache list, if possible.
3769 */
3770
3771 // If we're ignoring tentative ranges, we need to account for them here
3772 if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
3773 struct rl_entry free_extent = rl_make(firstBlock, firstBlock + foundBlocks - 1);
3774 struct rl_entry *range;;
3775 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS], rl_link) {
3776 rl_subtract(&free_extent, range);
3777 if (rl_len(range) == 0)
3778 break;
3779 }
3780 firstBlock = free_extent.rl_start;
3781 foundBlocks = rl_len(&free_extent);
3782 }
3783
3784 if (foundBlocks) {
3785 if (hfsmp->jnl == NULL) {
3786 /* If there is no journal, go ahead and add to the free ext cache. */
3787 updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
3788 }
3789 else {
3790 /*
3791 * If journaled, only add to the free extent cache if this block is not
3792 * waiting for a TRIM to complete; that implies that the transaction that freed it
3793 * has not yet been committed to stable storage.
3794 */
3795 int recently_deleted = 0;
3796 uint32_t nextblock;
3797 err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock,
3798 (uint64_t)foundBlocks, &recently_deleted, &nextblock);
3799 if ((err) || (recently_deleted == 0)) {
3800 /* if we hit an error, or the blocks not recently freed, go ahead and insert it */
3801 updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
3802 }
3803 }
3804 }
3805 } while (currentBlock < stopBlock);
3806 LoopExit:
3807
3808 if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
3809 firstBlock = best.startBlock;
3810 foundBlocks = best.blockCount;
3811 }
3812
3813 // Return the outputs.
3814 if (foundBlocks < minBlocks)
3815 {
3816 DiskFull:
3817 err = dskFulErr;
3818 ErrorExit:
3819 *actualStartBlock = 0;
3820 *actualNumBlocks = 0;
3821 }
3822 else
3823 {
3824 err = noErr;
3825 *actualStartBlock = firstBlock;
3826 *actualNumBlocks = foundBlocks;
3827 /*
3828 * Sanity check for overflow
3829 */
3830 if ((firstBlock + foundBlocks) > vcb->allocLimit) {
3831 panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
3832 vcb->vcbVN, startingBlock, endingBlock, currentBlock,
3833 firstBlock, stopBlock, minBlocks, foundBlocks);
3834 }
3835 }
3836
3837 if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
3838 int i;
3839 u_int32_t min_start = vcb->totalBlocks;
3840
3841 // set the nextAllocation pointer to the smallest free block number
3842 // we've seen so on the next mount we won't rescan unnecessarily
3843 lck_spin_lock(&vcb->vcbFreeExtLock);
3844 for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
3845 if (vcb->vcbFreeExt[i].startBlock < min_start) {
3846 min_start = vcb->vcbFreeExt[i].startBlock;
3847 }
3848 }
3849 lck_spin_unlock(&vcb->vcbFreeExtLock);
3850 if (min_start != vcb->totalBlocks) {
3851 if (min_start < vcb->nextAllocation) {
3852 vcb->nextAllocation = min_start;
3853 }
3854 if (min_start < vcb->sparseAllocation) {
3855 vcb->sparseAllocation = min_start;
3856 }
3857 }
3858 }
3859
3860 if (buffer)
3861 (void) ReleaseBitmapBlock(vcb, blockRef, false);
3862
3863 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
3864 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
3865
3866 return err;
3867 }
3868
3869
3870 /*
3871 * Count number of bits set in the given 32-bit unsigned number
3872 *
3873 * Returns:
3874 * Number of bits set
3875 */
3876 static int num_bits_set(u_int32_t num)
3877 {
3878 int count;
3879
3880 for (count = 0; num; count++) {
3881 num &= num - 1;
3882 }
3883
3884 return count;
3885 }
3886
3887 /*
3888 * For a given range of blocks, find the total number of blocks
3889 * allocated. If 'stop_on_first' is true, it stops as soon as it
3890 * encounters the first allocated block. This option is useful
3891 * to determine if any block is allocated or not.
3892 *
3893 * Inputs:
3894 * startingBlock First allocation block number of the range to be scanned.
3895 * numBlocks Total number of blocks that need to be scanned.
3896 * stop_on_first Stop the search after the first allocated block is found.
3897 *
3898 * Output:
3899 * allocCount Total number of allocation blocks allocated in the given range.
3900 *
3901 * On error, it is the number of allocated blocks found
3902 * before the function got an error.
3903 *
3904 * If 'stop_on_first' is set,
3905 * allocCount = 1 if any allocated block was found.
3906 * allocCount = 0 if no allocated block was found.
3907 *
3908 * Returns:
3909 * 0 on success, non-zero on failure.
3910 */
3911 static int
3912 hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock,
3913 u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
3914 {
3915 u_int32_t *currentWord; // Pointer to current word within bitmap block
3916 u_int32_t wordsLeft; // Number of words left in this bitmap block
3917 u_int32_t bitMask; // Word with given bits already set (ready to test)
3918 u_int32_t firstBit; // Bit index within word of first bit to allocate
3919 u_int32_t numBits; // Number of bits in word to allocate
3920 u_int32_t *buffer = NULL;
3921 uintptr_t blockRef;
3922 u_int32_t bitsPerBlock;
3923 u_int32_t wordsPerBlock;
3924 u_int32_t blockCount = 0;
3925 int error;
3926
3927 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3928 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0);
3929
3930 /*
3931 * Pre-read the bitmap block containing the first word of allocation
3932 */
3933 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
3934 HFS_ALLOC_IGNORE_TENTATIVE);
3935 if (error)
3936 goto JustReturn;
3937
3938 /*
3939 * Initialize currentWord, and wordsLeft.
3940 */
3941 {
3942 u_int32_t wordIndexInBlock;
3943
3944 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
3945 wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
3946
3947 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3948 currentWord = buffer + wordIndexInBlock;
3949 wordsLeft = wordsPerBlock - wordIndexInBlock;
3950 }
3951
3952 /*
3953 * First test any non word aligned bits.
3954 */
3955 firstBit = startingBlock % kBitsPerWord;
3956 if (firstBit != 0) {
3957 bitMask = kAllBitsSetInWord >> firstBit;
3958 numBits = kBitsPerWord - firstBit;
3959 if (numBits > numBlocks) {
3960 numBits = numBlocks;
3961 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
3962 }
3963 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3964 if (stop_on_first) {
3965 blockCount = 1;
3966 goto Exit;
3967 }
3968 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3969 }
3970 numBlocks -= numBits;
3971 ++currentWord;
3972 --wordsLeft;
3973 }
3974
3975 /*
3976 * Test whole words (32 blocks) at a time.
3977 */
3978 while (numBlocks >= kBitsPerWord) {
3979 if (wordsLeft == 0) {
3980 /* Read in the next bitmap block. */
3981 startingBlock += bitsPerBlock;
3982
3983 buffer = NULL;
3984 error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3985 if (error) goto Exit;
3986
3987 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
3988 HFS_ALLOC_IGNORE_TENTATIVE);
3989 if (error) goto Exit;
3990
3991 /* Readjust currentWord and wordsLeft. */
3992 currentWord = buffer;
3993 wordsLeft = wordsPerBlock;
3994 }
3995 if (*currentWord != 0) {
3996 if (stop_on_first) {
3997 blockCount = 1;
3998 goto Exit;
3999 }
4000 blockCount += num_bits_set(*currentWord);
4001 }
4002 numBlocks -= kBitsPerWord;
4003 ++currentWord;
4004 --wordsLeft;
4005 }
4006
4007 /*
4008 * Test any remaining blocks.
4009 */
4010 if (numBlocks != 0) {
4011 bitMask = ~(kAllBitsSetInWord >> numBlocks);
4012 if (wordsLeft == 0) {
4013 /* Read in the next bitmap block */
4014 startingBlock += bitsPerBlock;
4015
4016 buffer = NULL;
4017 error = ReleaseBitmapBlock(hfsmp, blockRef, false);
4018 if (error) goto Exit;
4019
4020 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
4021 HFS_ALLOC_IGNORE_TENTATIVE);
4022 if (error) goto Exit;
4023
4024 currentWord = buffer;
4025 }
4026 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
4027 if (stop_on_first) {
4028 blockCount = 1;
4029 goto Exit;
4030 }
4031 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
4032 }
4033 }
4034 Exit:
4035 if (buffer) {
4036 (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
4037 }
4038 if (allocCount) {
4039 *allocCount = blockCount;
4040 }
4041
4042 JustReturn:
4043 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
4044 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0);
4045
4046 return (error);
4047 }
4048
4049 /*
4050 * Count total number of blocks that are allocated in the given
4051 * range from the bitmap. This is used to preflight total blocks
4052 * that need to be relocated during volume resize.
4053 *
4054 * The journal or allocation file lock must be held.
4055 *
4056 * Returns:
4057 * 0 on success, non-zero on failure.
4058 * On failure, allocCount is zero.
4059 */
4060 int
4061 hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
4062 u_int32_t numBlocks, u_int32_t *allocCount)
4063 {
4064 return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
4065 }
4066
4067 /*
4068 * Test to see if any blocks in a range are allocated.
4069 *
4070 * Note: On error, this function returns 1, which means that
4071 * one or more blocks in the range are allocated. This function
4072 * is primarily used for volume resize and we do not want
4073 * to report to the caller that the blocks are free when we
4074 * were not able to deterministically find it out. So on error,
4075 * we always report that the blocks are allocated.
4076 *
4077 * The journal or allocation file lock must be held.
4078 *
4079 * Returns
4080 * 0 if all blocks in the range are free.
4081 * 1 if blocks in the range are allocated, or there was an error.
4082 */
4083 int
4084 hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
4085 {
4086 int error;
4087 u_int32_t allocCount;
4088
4089 error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
4090 if (error) {
4091 /* On error, we always say that the blocks are allocated
4092 * so that volume resize does not return false success.
4093 */
4094 return 1;
4095 } else {
4096 /* The function was deterministically able to find out
4097 * if there was any block allocated or not. In that case,
4098 * the value in allocCount is good enough to be returned
4099 * back to the caller.
4100 */
4101 return allocCount;
4102 }
4103 }
4104
4105 /*
4106 * CONFIG_HFS_RBTREE
4107 * Check to see if the red-black tree is live. Allocation file lock must be held
4108 * shared or exclusive to call this function. Note that we may call this even if
4109 * HFS is built without activating the red-black tree code.
4110 */
4111 int
4112 hfs_isrbtree_active(struct hfsmount *hfsmp){
4113
4114 #pragma unused (hfsmp)
4115
4116 /* Just return 0 for now */
4117 return 0;
4118 }
4119
4120
4121
4122 /* Summary Table Functions */
4123 /*
4124 * hfs_check_summary:
4125 *
4126 * This function should be used to query the summary table to see if we can
4127 * bypass a bitmap block or not when we're trying to find a free allocation block.
4128 *
4129 *
4130 * Inputs:
4131 * allocblock - allocation block number. Will be used to infer the correct summary bit.
4132 * hfsmp -- filesystem in question.
4133 *
4134 * Output Arg:
4135 * *freeblocks - set to 1 if we believe at least one free blocks in this vcbVBMIOSize
4136 * page of bitmap file.
4137 *
4138 *
4139 * Returns:
4140 * 0 on success
4141 * EINVAL on error
4142 *
4143 */
4144
4145 static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t allocblock, uint32_t *freeblocks) {
4146
4147 int err = EINVAL;
4148 if (hfsmp->vcbVBMIOSize) {
4149 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4150 uint32_t index;
4151 if (hfs_get_summary_index (hfsmp, allocblock, &index)) {
4152 *freeblocks = 0;
4153 return EINVAL;
4154 }
4155
4156 /* Ok, now that we have the bit index into the array, what byte is it in ? */
4157 uint32_t byteindex = index / kBitsPerByte;
4158 uint8_t current_byte = hfsmp->hfs_summary_table[byteindex];
4159 uint8_t bit_in_byte = index % kBitsPerByte;
4160
4161 if (current_byte & (1 << bit_in_byte)) {
4162 /*
4163 * We do not believe there is anything free in the
4164 * entire vcbVBMIOSize'd block.
4165 */
4166 *freeblocks = 0;
4167 }
4168 else {
4169 /* Looks like there might be a free block here... */
4170 *freeblocks = 1;
4171 }
4172 }
4173 err = 0;
4174 }
4175
4176 return err;
4177 }
4178
4179
4180 #if 0
4181 /*
4182 * hfs_get_next_summary
4183 *
4184 * From a given allocation block, jump to the allocation block at the start of the
4185 * next vcbVBMIOSize boundary. This is useful when trying to quickly skip over
4186 * large swaths of bitmap once we have determined that the bitmap is relatively full.
4187 *
4188 * Inputs: hfsmount, starting allocation block number
4189 * Output Arg: *newblock will contain the allocation block number to start
4190 * querying.
4191 *
4192 * Returns:
4193 * 0 on success
4194 * EINVAL if the block argument is too large to be used, or the summary table not live.
4195 * EFBIG if there are no more summary bits to be queried
4196 */
4197 static int
4198 hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
4199
4200 u_int32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4201 u_int32_t start_offset;
4202 u_int32_t next_offset;
4203 int err = EINVAL;
4204
4205 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4206 if ((err = hfs_get_summary_index(hfsmp, block, &start_offset))) {
4207 return err;
4208 }
4209
4210 next_offset = start_offset++;
4211
4212 if ((start_offset >= hfsmp->hfs_summary_size) || (next_offset >= hfsmp->hfs_summary_size)) {
4213 /* Can't jump to the next summary bit. */
4214 return EINVAL;
4215 }
4216
4217 /* Otherwise, compute and return */
4218 *newblock = next_offset * bits_per_iosize;
4219 if (*newblock >= hfsmp->totalBlocks) {
4220 return EINVAL;
4221 }
4222 err = 0;
4223 }
4224
4225 return err;
4226 }
4227
4228 #endif
4229
4230 /*
4231 * hfs_release_summary
4232 *
4233 * Given an extent that is about to be de-allocated on-disk, determine the number
4234 * of summary bitmap bits that need to be marked as 'potentially available'.
4235 * Then go ahead and mark them as free.
4236 *
4237 * Inputs:
4238 * hfsmp - hfs mount
4239 * block - starting allocation block.
4240 * length - length of the extent.
4241 *
4242 * Returns:
4243 * EINVAL upon any errors.
4244 */
4245 static int hfs_release_summary(struct hfsmount *hfsmp, uint32_t start_blk, uint32_t length) {
4246 int err = EINVAL;
4247 uint32_t end_blk = (start_blk + length) - 1;
4248
4249 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4250 /* Figure out what the starting / ending block's summary bits are */
4251 uint32_t start_bit;
4252 uint32_t end_bit;
4253 uint32_t current_bit;
4254
4255 err = hfs_get_summary_index (hfsmp, start_blk, &start_bit);
4256 if (err) {
4257 goto release_err;
4258 }
4259 err = hfs_get_summary_index (hfsmp, end_blk, &end_bit);
4260 if (err) {
4261 goto release_err;
4262 }
4263
4264 if (ALLOC_DEBUG) {
4265 if (start_bit > end_bit) {
4266 panic ("HFS: start > end!, %d %d ", start_bit, end_bit);
4267 }
4268 }
4269 current_bit = start_bit;
4270 while (current_bit <= end_bit) {
4271 err = hfs_set_summary (hfsmp, current_bit, 0);
4272 current_bit++;
4273 }
4274 }
4275
4276 release_err:
4277 return err;
4278 }
4279
4280 /*
4281 * hfs_find_summary_free
4282 *
4283 * Given a allocation block as input, returns an allocation block number as output as a
4284 * suggestion for where to start scanning the bitmap in order to find free blocks. It will
4285 * determine the vcbVBMIOsize of the input allocation block, convert that into a summary
4286 * bit, then keep iterating over the summary bits in order to find the first free one.
4287 *
4288 * Inputs:
4289 * hfsmp - hfs mount
4290 * block - starting allocation block
4291 * newblock - output block as suggestion
4292 *
4293 * Returns:
4294 * 0 on success
4295 * ENOSPC if we could not find a free block
4296 */
4297
4298 int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
4299
4300 int err = ENOSPC;
4301 uint32_t bit_index = 0;
4302 uint32_t maybe_has_blocks = 0;
4303
4304 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4305 uint32_t byte_index;
4306 uint8_t curbyte;
4307 uint8_t bit_in_byte;
4308 uint32_t summary_cap;
4309
4310 /*
4311 * We generate a cap for the summary search because the summary table
4312 * always represents a full summary of the bitmap FILE, which may
4313 * be way more bits than are necessary for the actual filesystem
4314 * whose allocations are mapped by the bitmap.
4315 *
4316 * Compute how much of hfs_summary_size is useable for the given number
4317 * of allocation blocks eligible on this FS.
4318 */
4319 err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit - 1, &summary_cap);
4320 if (err) {
4321 goto summary_exit;
4322 }
4323
4324 /* Check the starting block first */
4325 err = hfs_check_summary (hfsmp, block, &maybe_has_blocks);
4326 if (err) {
4327 goto summary_exit;
4328 }
4329
4330 if (maybe_has_blocks) {
4331 /*
4332 * It looks like the initial start block could have something.
4333 * Short-circuit and just use that.
4334 */
4335 *newblock = block;
4336 goto summary_exit;
4337 }
4338
4339 /*
4340 * OK, now we know that the first block was useless.
4341 * Get the starting summary bit, and find it in the array
4342 */
4343 maybe_has_blocks = 0;
4344 err = hfs_get_summary_index (hfsmp, block, &bit_index);
4345 if (err) {
4346 goto summary_exit;
4347 }
4348
4349 /* Iterate until we find something. */
4350 while (bit_index <= summary_cap) {
4351 byte_index = bit_index / kBitsPerByte;
4352 curbyte = hfsmp->hfs_summary_table[byte_index];
4353 bit_in_byte = bit_index % kBitsPerByte;
4354
4355 if (curbyte & (1 << bit_in_byte)) {
4356 /* nothing here. increment and move on */
4357 bit_index++;
4358 }
4359 else {
4360 /*
4361 * found something! convert bit_index back into
4362 * an allocation block for use. 'newblock' will now
4363 * contain the proper allocation block # based on the bit
4364 * index.
4365 */
4366 err = hfs_get_summary_allocblock (hfsmp, bit_index, newblock);
4367 if (err) {
4368 goto summary_exit;
4369 }
4370 maybe_has_blocks = 1;
4371 break;
4372 }
4373 }
4374
4375 /* If our loop didn't find anything, set err to ENOSPC */
4376 if (maybe_has_blocks == 0) {
4377 err = ENOSPC;
4378 }
4379 }
4380
4381 /* If the summary table is not active for this mount, we'll just return ENOSPC */
4382 summary_exit:
4383 if (maybe_has_blocks) {
4384 err = 0;
4385 }
4386
4387 return err;
4388 }
4389
4390 /*
4391 * hfs_get_summary_allocblock
4392 *
4393 * Convert a summary bit into an allocation block number to use to start searching for free blocks.
4394 *
4395 * Inputs:
4396 * hfsmp - hfs mount
4397 * summarybit - summmary bit index
4398 * *alloc - allocation block number in the bitmap file.
4399 *
4400 * Output:
4401 * 0 on success
4402 * EINVAL on failure
4403 */
4404 int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t
4405 summarybit, uint32_t *alloc) {
4406 uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4407 uint32_t allocblk;
4408
4409 allocblk = summarybit * bits_per_iosize;
4410
4411 if (allocblk >= hfsmp->totalBlocks) {
4412 return EINVAL;
4413 }
4414 else {
4415 *alloc = allocblk;
4416 }
4417
4418 return 0;
4419 }
4420
4421
4422 /*
4423 * hfs_set_summary:
4424 *
4425 * This function should be used to manipulate the summary table
4426 *
4427 * The argument 'inuse' will set the value of the bit in question to one or zero
4428 * depending on its value.
4429 *
4430 * Inputs:
4431 * hfsmp - hfs mount
4432 * summarybit - the bit index into the summary table to set/unset.
4433 * inuse - the value to assign to the bit.
4434 *
4435 * Returns:
4436 * 0 on success
4437 * EINVAL on error
4438 *
4439 */
4440
4441 static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse) {
4442
4443 int err = EINVAL;
4444 if (hfsmp->vcbVBMIOSize) {
4445 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4446
4447 if (ALLOC_DEBUG) {
4448 if (hfsmp->hfs_summary_table == NULL) {
4449 panic ("hfs_set_summary: no table for %p ", hfsmp);
4450 }
4451 }
4452
4453 /* Ok, now that we have the bit index into the array, what byte is it in ? */
4454 uint32_t byte_index = summarybit / kBitsPerByte;
4455 uint8_t current_byte = hfsmp->hfs_summary_table[byte_index];
4456 uint8_t bit_in_byte = summarybit % kBitsPerByte;
4457
4458 if (inuse) {
4459 current_byte = (current_byte | (1 << bit_in_byte));
4460 }
4461 else {
4462 current_byte = (current_byte & ~(1 << bit_in_byte));
4463 }
4464
4465 hfsmp->hfs_summary_table[byte_index] = current_byte;
4466 }
4467 err = 0;
4468 }
4469
4470 return err;
4471 }
4472
4473
4474 /*
4475 * hfs_get_summary_index:
4476 *
4477 * This is a helper function which determines what summary bit represents the vcbVBMIOSize worth
4478 * of IO against the bitmap file.
4479 *
4480 * Returns:
4481 * 0 on success
4482 * EINVAL on failure
4483 */
4484 static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t* index) {
4485 uint32_t summary_bit;
4486 uint32_t bits_per_iosize;
4487 int err = EINVAL;
4488
4489 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4490 /* Is the input block bigger than the total number of blocks? */
4491 if (block >= hfsmp->totalBlocks) {
4492 return EINVAL;
4493 }
4494
4495 /* Is there even a vbmIOSize set? */
4496 if (hfsmp->vcbVBMIOSize == 0) {
4497 return EINVAL;
4498 }
4499
4500 bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4501
4502 summary_bit = block / bits_per_iosize;
4503
4504 *index = summary_bit;
4505 err = 0;
4506 }
4507
4508 return err;
4509 }
4510
4511 /*
4512 * hfs_init_summary
4513 *
4514 * From a given mount structure, compute how big the summary table should be for the given
4515 * filesystem, then allocate and bzero the memory.
4516 *
4517 * Returns:
4518 * 0 on success
4519 * EINVAL on failure
4520 */
4521 int
4522 hfs_init_summary (struct hfsmount *hfsmp) {
4523
4524 uint32_t summary_size;
4525 uint32_t summary_size_bytes;
4526 uint8_t *summary_table;
4527
4528 if (hfsmp->hfs_allocation_cp == NULL) {
4529 if (ALLOC_DEBUG) {
4530 printf("hfs: summary table cannot progress without a bitmap cnode! \n");
4531 }
4532 return EINVAL;
4533 }
4534 /*
4535 * The practical maximum size of the summary table is 16KB:
4536 *
4537 * (512MB maximum bitmap size / (4k -- min alloc block size)) / 8 bits/byte.
4538 *
4539 * HFS+ will allow filesystems with allocation block sizes smaller than 4k, but
4540 * the end result is that we'll start to issue I/O in 2k or 1k sized chunks, which makes
4541 * supporting this much worse. The math would instead look like this:
4542 * (512MB / 2k) / 8 == 32k.
4543 *
4544 * So, we will disallow the summary table if the allocation block size is < 4k.
4545 */
4546
4547 if (hfsmp->blockSize < HFS_MIN_SUMMARY_BLOCKSIZE) {
4548 printf("hfs: summary table not allowed on FS with block size of %d\n", hfsmp->blockSize);
4549 return EINVAL;
4550 }
4551
4552 summary_size = hfsmp->hfs_allocation_cp->c_blocks;
4553
4554 if (ALLOC_DEBUG) {
4555 printf("HFS Summary Table Initialization: Bitmap %u blocks\n",
4556 hfsmp->hfs_allocation_cp->c_blocks);
4557 }
4558
4559 /*
4560 * If the bitmap IO size is not the same as the allocation block size then
4561 * then re-compute the number of summary bits necessary. Note that above, the
4562 * the default size is the number of allocation blocks in the bitmap *FILE*
4563 * (not the number of bits in the bitmap itself). If the allocation block size
4564 * is large enough though, we may need to increase this.
4565 */
4566 if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
4567 uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
4568 lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
4569
4570 /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
4571 summary_size = (uint32_t) lrg_size;
4572 }
4573
4574 /*
4575 * If the block size is the same as the IO Size, then the total number of blocks
4576 * is already equal to the number of IO units, which is our number of summary bits.
4577 */
4578
4579 summary_size_bytes = summary_size / kBitsPerByte;
4580 /* Always add one byte, just in case we have a dangling number of bits */
4581 summary_size_bytes++;
4582
4583 if (ALLOC_DEBUG) {
4584 printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, summary_size);
4585 printf("HFS Summary Table Size (in bytes) %d \n", summary_size_bytes);
4586 }
4587
4588 /* Store the field in the mount point */
4589 hfsmp->hfs_summary_size = summary_size;
4590 hfsmp->hfs_summary_bytes = summary_size_bytes;
4591
4592 summary_table = hfs_mallocz(summary_size_bytes);
4593
4594 /* enable the summary table */
4595 hfsmp->hfs_flags |= HFS_SUMMARY_TABLE;
4596 hfsmp->hfs_summary_table = summary_table;
4597
4598 if (ALLOC_DEBUG) {
4599 if (hfsmp->hfs_summary_table == NULL) {
4600 panic ("HFS Summary Init: no table for %p\n", hfsmp);
4601 }
4602 }
4603 return 0;
4604 }
4605
4606 /*
4607 * hfs_rebuild_summary
4608 *
4609 * This function should be used to allocate a new hunk of memory for use as a summary
4610 * table, then copy the existing data into it. We use it whenever the filesystem's size
4611 * changes. When a resize is in progress, you can still use the extant summary
4612 * table if it is active.
4613 *
4614 * Inputs:
4615 * hfsmp -- FS in question
4616 * newlength -- new length of the FS in allocation blocks.
4617 *
4618 * Outputs:
4619 * 0 on success, EINVAL on failure. If this function fails, the summary table
4620 * will be disabled for future use.
4621 *
4622 */
4623 static int hfs_rebuild_summary (struct hfsmount *hfsmp) {
4624
4625 uint32_t new_summary_size;
4626
4627 new_summary_size = hfsmp->hfs_allocation_cp->c_blocks;
4628
4629
4630 if (ALLOC_DEBUG) {
4631 printf("HFS Summary Table Re-init: bitmap %u blocks\n", new_summary_size);
4632 }
4633
4634 /*
4635 * If the bitmap IO size is not the same as the allocation block size, then re-compute
4636 * the number of summary bits necessary. Note that above, the default size is the number
4637 * of allocation blocks in the bitmap *FILE* (not the number of bits that the bitmap manages).
4638 * If the allocation block size is large enough though, we may need to increase this, as
4639 * bitmap IO is capped at 64k per IO
4640 */
4641 if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
4642 uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
4643 lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
4644
4645 /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
4646 new_summary_size = (uint32_t) lrg_size;
4647 }
4648
4649 /*
4650 * Ok, we have the new summary bitmap theoretical max size. See if it's the same as
4651 * what we've got already...
4652 */
4653 if (new_summary_size != hfsmp->hfs_summary_size) {
4654 uint32_t summarybytes = new_summary_size / kBitsPerByte;
4655 uint32_t copysize;
4656 uint8_t *newtable;
4657 /* Add one byte for slop */
4658 summarybytes++;
4659
4660 if (ALLOC_DEBUG) {
4661 printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, new_summary_size);
4662 printf("HFS Summary Table Size (in bytes) %d \n", summarybytes);
4663 }
4664
4665 newtable = hfs_mallocz(summarybytes);
4666
4667 /*
4668 * The new table may be smaller than the old one. If this is true, then
4669 * we can't copy the full size of the existing summary table into the new
4670 * one.
4671 *
4672 * The converse is not an issue since we bzeroed the table above.
4673 */
4674 copysize = hfsmp->hfs_summary_bytes;
4675 if (summarybytes < hfsmp->hfs_summary_bytes) {
4676 copysize = summarybytes;
4677 }
4678 memcpy (newtable, hfsmp->hfs_summary_table, copysize);
4679
4680 /* We're all good. Destroy the old copy and update ptrs */
4681 hfs_free(hfsmp->hfs_summary_table, hfsmp->hfs_summary_bytes);
4682
4683 hfsmp->hfs_summary_table = newtable;
4684 hfsmp->hfs_summary_size = new_summary_size;
4685 hfsmp->hfs_summary_bytes = summarybytes;
4686 }
4687
4688 return 0;
4689 }
4690
4691
4692 #if ALLOC_DEBUG
4693 /*
4694 * hfs_validate_summary
4695 *
4696 * Validation routine for the summary table. Debug-only function.
4697 *
4698 * Bitmap lock must be held.
4699 *
4700 */
4701 void hfs_validate_summary (struct hfsmount *hfsmp) {
4702 uint32_t i;
4703 int err;
4704
4705 /*
4706 * Iterate over all of the bits in the summary table, and verify if
4707 * there really are free blocks in the pages that we believe may
4708 * may contain free blocks.
4709 */
4710
4711 if (hfsmp->hfs_summary_table == NULL) {
4712 panic ("HFS Summary: No HFS summary table!");
4713 }
4714
4715 /* 131072 bits == 16384 bytes. This is the theoretical max size of the summary table. we add 1 byte for slop */
4716 if (hfsmp->hfs_summary_size == 0 || hfsmp->hfs_summary_size > 131080) {
4717 panic("HFS Summary: Size is bad! %d", hfsmp->hfs_summary_size);
4718 }
4719
4720 if (hfsmp->vcbVBMIOSize == 0) {
4721 panic("HFS Summary: no VCB VBM IO Size !");
4722 }
4723
4724 printf("hfs: summary validation beginning on %s\n", hfsmp->vcbVN);
4725 printf("hfs: summary validation %d summary bits, %d summary blocks\n", hfsmp->hfs_summary_size, hfsmp->totalBlocks);
4726
4727
4728 /* iterate through all possible summary bits */
4729 for (i = 0; i < hfsmp->hfs_summary_size ; i++) {
4730
4731 uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4732 uint32_t byte_offset = hfsmp->vcbVBMIOSize * i;
4733
4734 /* Compute the corresponding allocation block for the summary bit. */
4735 uint32_t alloc_block = i * bits_per_iosize;
4736
4737 /*
4738 * We use a uint32_t pointer here because it will speed up
4739 * access to the real bitmap data on disk.
4740 */
4741 uint32_t *block_data;
4742 struct buf *bp;
4743 int counter;
4744 int counter_max;
4745 int saw_free_bits = 0;
4746
4747 /* Get the block */
4748 if ((err = ReadBitmapRange (hfsmp, byte_offset, hfsmp->vcbVBMIOSize, &block_data, &bp))) {
4749 panic ("HFS Summary: error (%d) in ReadBitmapRange!", err);
4750 }
4751
4752 /* Query the status of the bit and then make sure we match */
4753 uint32_t maybe_has_free_blocks;
4754 err = hfs_check_summary (hfsmp, alloc_block, &maybe_has_free_blocks);
4755 if (err) {
4756 panic ("HFS Summary: hfs_check_summary returned error (%d) ", err);
4757 }
4758 counter_max = hfsmp->vcbVBMIOSize / kBytesPerWord;
4759
4760 for (counter = 0; counter < counter_max; counter++) {
4761 uint32_t word = block_data[counter];
4762
4763 /* We assume that we'll not find any free bits here. */
4764 if (word != kAllBitsSetInWord) {
4765 if (maybe_has_free_blocks) {
4766 /* All done */
4767 saw_free_bits = 1;
4768 break;
4769 }
4770 else {
4771 panic ("HFS Summary: hfs_check_summary saw free bits!");
4772 }
4773 }
4774 }
4775
4776 if (maybe_has_free_blocks && (saw_free_bits == 0)) {
4777 panic ("HFS Summary: did not see free bits !");
4778 }
4779
4780 /* Release the block. */
4781 if ((err = ReleaseScanBitmapRange (bp))) {
4782 panic ("HFS Summary: Error (%d) in ReleaseScanBitmapRange", err);
4783 }
4784 }
4785
4786 printf("hfs: summary validation completed successfully on %s\n", hfsmp->vcbVN);
4787
4788 return;
4789 }
4790 #endif
4791
4792 /*
4793 * hfs_alloc_scan_range:
4794 *
4795 * This function should be used to scan large ranges of the allocation bitmap
4796 * at one time. It makes two key assumptions:
4797 *
4798 * 1) Bitmap lock is held during the duration of the call (exclusive)
4799 * 2) There are no pages in the buffer cache for any of the bitmap
4800 * blocks that we may encounter. It *MUST* be completely empty.
4801 *
4802 * The expected use case is when we are scanning the bitmap in full while we are
4803 * still mounting the filesystem in order to issue TRIMs or build up the summary
4804 * table for the mount point. It should be done after any potential journal replays
4805 * are completed and their I/Os fully issued.
4806 *
4807 * The key reason for assumption (2) above is that this function will try to issue
4808 * I/O against the bitmap file in chunks as large a possible -- essentially as
4809 * much as the buffer layer will handle (1MB). Because the size of these I/Os
4810 * is larger than what would be expected during normal runtime we must invalidate
4811 * the buffers as soon as we are done with them so that they do not persist in
4812 * the buffer cache for other threads to find, as they'll typically be doing
4813 * allocation-block size I/Os instead.
4814 *
4815 * Input Args:
4816 * hfsmp - hfs mount data structure
4817 * startbit - allocation block # to start our scan. It must be aligned
4818 * on a vcbVBMIOsize boundary.
4819 * list - journal trim list data structure for issuing TRIMs
4820 *
4821 * Output Args:
4822 * bitToScan - Return the next bit to scan if this function is called again.
4823 * Caller will supply this into the next invocation
4824 * of this call as 'startbit'.
4825 */
4826
4827 static int hfs_alloc_scan_range(struct hfsmount *hfsmp, u_int32_t startbit,
4828 u_int32_t *bitToScan, struct jnl_trim_list *list) {
4829
4830 int error;
4831 int readwrite = 1;
4832 u_int32_t curAllocBlock;
4833 struct buf *blockRef = NULL;
4834 u_int32_t *buffer = NULL;
4835 u_int32_t free_offset = 0; //tracks the start of the current free range
4836 u_int32_t size = 0; // tracks the length of the current free range.
4837 u_int32_t iosize = 0; //how much io we should generate against the bitmap
4838 u_int32_t byte_off; // byte offset into the bitmap file.
4839 u_int32_t completed_size; // how much io was actually completed
4840 u_int32_t last_bitmap_block;
4841 u_int32_t current_word;
4842 u_int32_t word_index = 0;
4843
4844 /* summary table building */
4845 uint32_t summary_bit = 0;
4846 uint32_t saw_free_blocks = 0;
4847 uint32_t last_marked = 0;
4848
4849 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
4850 readwrite = 0;
4851 }
4852
4853 /*
4854 * Compute how much I/O we should generate here.
4855 * hfs_scan_range_size will validate that the start bit
4856 * converted into a byte offset into the bitmap file,
4857 * is aligned on a VBMIOSize boundary.
4858 */
4859 error = hfs_scan_range_size (hfsmp, startbit, &iosize);
4860 if (error) {
4861 if (ALLOC_DEBUG) {
4862 panic ("hfs_alloc_scan_range: hfs_scan_range_size error %d\n", error);
4863 }
4864 return error;
4865 }
4866
4867 if (iosize < hfsmp->vcbVBMIOSize) {
4868 if (ALLOC_DEBUG) {
4869 panic ("hfs_alloc_scan_range: iosize too small! (iosize %d)\n", iosize);
4870 }
4871 return EINVAL;
4872 }
4873
4874 /* hfs_scan_range_size should have verified startbit. Convert it to bytes */
4875 byte_off = startbit / kBitsPerByte;
4876
4877 /*
4878 * When the journal replays blocks, it does so by writing directly to the disk
4879 * device (bypassing any filesystem vnodes and such). When it finishes its I/Os
4880 * it also immediately re-reads and invalidates the range covered by the bp so
4881 * it does not leave anything lingering in the cache (for iosize reasons).
4882 *
4883 * As such, it is safe to do large I/Os here with ReadBitmapRange.
4884 *
4885 * NOTE: It is not recommended, but it is possible to call the function below
4886 * on sections of the bitmap that may be in core already as long as the pages are not
4887 * dirty. In that case, we'd notice that something starting at that
4888 * logical block of the bitmap exists in the metadata cache, and we'd check
4889 * if the iosize requested is the same as what was already allocated for it.
4890 * Odds are pretty good we're going to request something larger. In that case,
4891 * we just free the existing memory associated with the buf and reallocate a
4892 * larger range. This function should immediately invalidate it as soon as we're
4893 * done scanning, so this shouldn't cause any coherency issues.
4894 */
4895
4896 error = ReadBitmapRange(hfsmp, byte_off, iosize, &buffer, &blockRef);
4897 if (error) {
4898 if (ALLOC_DEBUG) {
4899 panic ("hfs_alloc_scan_range: start %d iosize %d ReadBitmapRange error %d\n", startbit, iosize, error);
4900 }
4901 return error;
4902 }
4903
4904 /*
4905 * At this point, we have a giant wired buffer that represents some portion of
4906 * the bitmap file that we want to analyze. We may not have gotten all 'iosize'
4907 * bytes though, so clip our ending bit to what we actually read in.
4908 */
4909 completed_size = buf_count(blockRef);
4910 last_bitmap_block = completed_size * kBitsPerByte;
4911 last_bitmap_block = last_bitmap_block + startbit;
4912
4913 /* Cap the last block to the total number of blocks if required */
4914 if (last_bitmap_block > hfsmp->totalBlocks) {
4915 last_bitmap_block = hfsmp->totalBlocks;
4916 }
4917
4918 /* curAllocBlock represents the logical block we're analyzing. */
4919 curAllocBlock = startbit;
4920 word_index = 0;
4921 size = 0;
4922
4923 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4924 if (hfs_get_summary_index (hfsmp, startbit, &summary_bit)) {
4925 error = EINVAL;
4926 if (ALLOC_DEBUG) {
4927 panic ("hfs_alloc_scan_range: Could not acquire summary index for %u", startbit);
4928 }
4929 return error;
4930 }
4931 /*
4932 * summary_bit should now be set to the summary bit corresponding to
4933 * the allocation block of the first bit that we're supposed to scan
4934 */
4935 }
4936 saw_free_blocks = 0;
4937
4938 while (curAllocBlock < last_bitmap_block) {
4939 u_int32_t bit;
4940
4941 /* Update the summary table as needed */
4942 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4943 if (ALLOC_DEBUG) {
4944 if (hfsmp->hfs_summary_table == NULL) {
4945 panic ("hfs_alloc_scan_range: no summary table!");
4946 }
4947 }
4948
4949 uint32_t temp_summary;
4950 error = hfs_get_summary_index (hfsmp, curAllocBlock, &temp_summary);
4951 if (error) {
4952 if (ALLOC_DEBUG) {
4953 panic ("hfs_alloc_scan_range: could not get summary index for %u", curAllocBlock);
4954 }
4955 return EINVAL;
4956 }
4957
4958 if (ALLOC_DEBUG) {
4959 if (temp_summary < summary_bit) {
4960 panic ("hfs_alloc_scan_range: backwards summary bit?\n");
4961 }
4962 }
4963
4964 /*
4965 * If temp_summary is greater than summary_bit, then this
4966 * means that the next allocation block crosses a vcbVBMIOSize boundary
4967 * and we should treat this range of on-disk data as part of a new summary
4968 * bit.
4969 */
4970 if (temp_summary > summary_bit) {
4971 if (saw_free_blocks == 0) {
4972 /* Mark the bit as totally consumed in the summary table */
4973 hfs_set_summary (hfsmp, summary_bit, 1);
4974 }
4975 else {
4976 /* Mark the bit as potentially free in summary table */
4977 hfs_set_summary (hfsmp, summary_bit, 0);
4978 }
4979 last_marked = summary_bit;
4980 /*
4981 * Any time we set the summary table, update our counter which tracks
4982 * what the last bit that was fully marked in the summary table.
4983 *
4984 * Then reset our marker which says we haven't seen a free bit yet.
4985 */
4986 saw_free_blocks = 0;
4987 summary_bit = temp_summary;
4988 }
4989 } /* End summary table conditions */
4990
4991 current_word = SWAP_BE32(buffer[word_index]);
4992 /* Iterate through the word 1 bit at a time... */
4993 for (bit = 0 ; bit < kBitsPerWord ; bit++, curAllocBlock++) {
4994 if (curAllocBlock >= last_bitmap_block) {
4995 break;
4996 }
4997 u_int32_t allocated = (current_word & (kHighBitInWordMask >> bit));
4998
4999 if (allocated) {
5000 if (size != 0) {
5001 if (readwrite) {
5002 /* Insert the previously tracked range of free blocks to the trim list */
5003 hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
5004 }
5005 add_free_extent_cache (hfsmp, free_offset, size);
5006 size = 0;
5007 free_offset = 0;
5008 }
5009 }
5010 else {
5011 /* Not allocated */
5012 size++;
5013 if (free_offset == 0) {
5014 /* Start a new run of free spcae at curAllocBlock */
5015 free_offset = curAllocBlock;
5016 }
5017 if (saw_free_blocks == 0) {
5018 saw_free_blocks = 1;
5019 }
5020 }
5021 } /* end for loop iterating through the word */
5022
5023 if (curAllocBlock < last_bitmap_block) {
5024 word_index++;
5025 }
5026
5027 } /* End while loop (iterates through last_bitmap_block) */
5028
5029
5030 /*
5031 * We've (potentially) completed our pass through this region of bitmap,
5032 * but one thing we may not have done is updated that last summary bit for
5033 * the last page we scanned, because we would have never transitioned across
5034 * a vcbVBMIOSize boundary again. Check for that and update the last bit
5035 * as needed.
5036 *
5037 * Note that 'last_bitmap_block' is *not* inclusive WRT the very last bit in the bitmap
5038 * for the region of bitmap on-disk that we were scanning. (it is one greater).
5039 */
5040 if ((curAllocBlock >= last_bitmap_block) &&
5041 (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
5042 uint32_t temp_summary;
5043 /* temp_block should be INSIDE the region we just scanned, so subtract 1 */
5044 uint32_t temp_block = last_bitmap_block - 1;
5045 error = hfs_get_summary_index (hfsmp, temp_block, &temp_summary);
5046 if (error) {
5047 if (ALLOC_DEBUG) {
5048 panic ("hfs_alloc_scan_range: end bit curAllocBlock %u, last_bitmap_block %u", curAllocBlock, last_bitmap_block);
5049 }
5050 return EINVAL;
5051 }
5052
5053 /* Did we already update this in the table? */
5054 if (temp_summary > last_marked) {
5055 if (saw_free_blocks == 0) {
5056 hfs_set_summary (hfsmp, temp_summary, 1);
5057 }
5058 else {
5059 hfs_set_summary (hfsmp, temp_summary, 0);
5060 }
5061 }
5062 }
5063
5064 /*
5065 * We may have been tracking a range of free blocks that hasn't been inserted yet.
5066 * Keep the logic for the TRIM and free extent separate from that of the summary
5067 * table management even though they are closely linked.
5068 */
5069 if (size != 0) {
5070 if (readwrite) {
5071 hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
5072 }
5073 add_free_extent_cache (hfsmp, free_offset, size);
5074 }
5075
5076 /*
5077 * curAllocBlock represents the next block we need to scan when we return
5078 * to this function.
5079 */
5080 *bitToScan = curAllocBlock;
5081 ReleaseScanBitmapRange(blockRef);
5082
5083 return 0;
5084
5085 }
5086
5087
5088
5089 /*
5090 * Compute the maximum I/O size to generate against the bitmap file
5091 * Will attempt to generate at LEAST VBMIOsize I/Os for interior ranges of the bitmap.
5092 *
5093 * Inputs:
5094 * hfsmp -- hfsmount to look at
5095 * bitmap_off -- bit offset into the bitmap file
5096 *
5097 * Outputs:
5098 * iosize -- iosize to generate.
5099 *
5100 * Returns:
5101 * 0 on success; EINVAL otherwise
5102 */
5103 static int hfs_scan_range_size (struct hfsmount *hfsmp, uint32_t bitmap_st, uint32_t *iosize) {
5104
5105 /*
5106 * The maximum bitmap size is 512MB regardless of ABN size, so we can get away
5107 * with 32 bit math in this function.
5108 */
5109
5110 uint32_t bitmap_len;
5111 uint32_t remaining_bitmap;
5112 uint32_t target_iosize;
5113 uint32_t bitmap_off;
5114
5115 /* Is this bit index not word aligned? If so, immediately fail. */
5116 if (bitmap_st % kBitsPerWord) {
5117 if (ALLOC_DEBUG) {
5118 panic ("hfs_scan_range_size unaligned start bit! bitmap_st %d \n", bitmap_st);
5119 }
5120 return EINVAL;
5121 }
5122
5123 /* bitmap_off is in bytes, not allocation blocks/bits */
5124 bitmap_off = bitmap_st / kBitsPerByte;
5125
5126 if ((hfsmp->totalBlocks <= bitmap_st) || (bitmap_off > (512 * 1024 * 1024))) {
5127 if (ALLOC_DEBUG) {
5128 panic ("hfs_scan_range_size: invalid start! bitmap_st %d, bitmap_off %d\n", bitmap_st, bitmap_off);
5129 }
5130 return EINVAL;
5131 }
5132
5133 /*
5134 * Also invalid if it's not at least aligned to HFS bitmap logical
5135 * block boundaries. We don't have to emit an iosize that's an
5136 * exact multiple of the VBMIOSize, but it must start on such
5137 * a boundary.
5138 *
5139 * The vcbVBMIOSize may be SMALLER than the allocation block size
5140 * on a FS with giant allocation blocks, but it will never be
5141 * greater than it, so it should be safe to start I/O
5142 * aligned on a VBMIOsize boundary.
5143 */
5144 if (bitmap_off & (hfsmp->vcbVBMIOSize - 1)) {
5145 if (ALLOC_DEBUG) {
5146 panic ("hfs_scan_range_size: unaligned start! bitmap_off %d\n", bitmap_off);
5147 }
5148 return EINVAL;
5149 }
5150
5151 /*
5152 * Generate the total bitmap file length in bytes, then round up
5153 * that value to the end of the last allocation block, if needed (It
5154 * will probably be needed). We won't scan past the last actual
5155 * allocation block.
5156 *
5157 * Unless we're completing the bitmap scan (or bitmap < 1MB), we
5158 * have to complete the I/O on VBMIOSize boundaries, but we can only read
5159 * up until the end of the bitmap file.
5160 */
5161 bitmap_len = roundup(hfsmp->totalBlocks, hfsmp->blockSize * 8) / 8;
5162
5163 remaining_bitmap = bitmap_len - bitmap_off;
5164
5165 /*
5166 * io size is the MIN of the maximum I/O we can generate or the
5167 * remaining amount of bitmap.
5168 */
5169 target_iosize = MIN((MAXBSIZE), remaining_bitmap);
5170 *iosize = target_iosize;
5171
5172 return 0;
5173 }
5174
5175
5176
5177
5178 /*
5179 * This function is basically the same as hfs_isallocated, except it's designed for
5180 * use with the red-black tree validation code. It assumes we're only checking whether
5181 * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
5182 * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
5183 *
5184 * This should not be called in general purpose scanning code.
5185 */
5186 int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
5187
5188 u_int32_t *currentWord; // Pointer to current word within bitmap block
5189 u_int32_t bitMask; // Word with given bits already set (ready to test)
5190 u_int32_t firstBit; // Bit index within word of first bit to allocate
5191 u_int32_t numBits; // Number of bits in word to allocate
5192 u_int32_t bitsPerBlock;
5193 uintptr_t blockRef = 0;
5194 u_int32_t numBlocks = 1;
5195 u_int32_t *buffer = NULL;
5196
5197 int inuse = 0;
5198 int error;
5199
5200
5201 if (bp_buf) {
5202 /* just use passed-in buffer if avail. */
5203 buffer = bp_buf;
5204 }
5205 else {
5206 /*
5207 * Pre-read the bitmap block containing the first word of allocation
5208 */
5209 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
5210 HFS_ALLOC_IGNORE_TENTATIVE);
5211 if (error)
5212 return (error);
5213 }
5214
5215 /*
5216 * Initialize currentWord, and wordsLeft.
5217 */
5218 u_int32_t wordIndexInBlock;
5219
5220 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
5221
5222 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
5223 currentWord = buffer + wordIndexInBlock;
5224
5225 /*
5226 * First test any non word aligned bits.
5227 */
5228 firstBit = startingBlock % kBitsPerWord;
5229 bitMask = kAllBitsSetInWord >> firstBit;
5230 numBits = kBitsPerWord - firstBit;
5231 if (numBits > numBlocks) {
5232 numBits = numBlocks;
5233 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
5234 }
5235 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
5236 inuse = 1;
5237 goto Exit;
5238 }
5239 ++currentWord;
5240
5241 Exit:
5242 if(bp_buf == NULL) {
5243 if (buffer) {
5244 (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
5245 }
5246 }
5247 return (inuse);
5248
5249
5250
5251 }
5252
5253 /*
5254 * This function resets all of the data structures relevant to the
5255 * free extent cache stored in the hfsmount struct.
5256 *
5257 * If we are using the red-black tree code then we need to account for the fact that
5258 * we may encounter situations where we need to jettison the tree. If that is the
5259 * case, then we fail-over to the bitmap scanning logic, but we need to ensure that
5260 * the free ext cache is zeroed before we start using it.
5261 *
5262 * We also reset and disable the cache when allocLimit is updated... which
5263 * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()).
5264 * It is independent of the type of allocator being used currently.
5265 */
5266 void ResetVCBFreeExtCache(struct hfsmount *hfsmp)
5267 {
5268 int bytes;
5269 void *freeExt;
5270
5271 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5272 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0);
5273
5274 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5275
5276 /* reset Free Extent Count */
5277 hfsmp->vcbFreeExtCnt = 0;
5278
5279 /* reset the actual array */
5280 bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor);
5281 freeExt = (void*)(hfsmp->vcbFreeExt);
5282
5283 bzero (freeExt, bytes);
5284
5285 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5286
5287 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5288 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0);
5289
5290 return;
5291 }
5292
5293 /*
5294 * This function is used to inform the allocator if we have to effectively shrink
5295 * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs.
5296 *
5297 * The bitmap lock must be held when calling this function. This function also modifies the
5298 * allocLimit field in the hfs mount point structure in the general case.
5299 *
5300 * In the shrinking case, we'll have to remove all free extents from the red-black
5301 * tree past the specified offset new_end_block. In the growth case, we'll have to force
5302 * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
5303 *
5304 * new_end_block represents the total number of blocks available for allocation in the resized
5305 * filesystem. Block #new_end_block should not be allocatable in the resized filesystem since it
5306 * will be out of the (0, n-1) range that are indexable in the bitmap.
5307 *
5308 * Returns 0 on success
5309 * errno on failure
5310 */
5311 u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
5312
5313 /*
5314 * Update allocLimit to the argument specified
5315 */
5316 hfsmp->allocLimit = new_end_block;
5317
5318 /* Invalidate the free extent cache completely so that
5319 * it does not have any extents beyond end of current
5320 * volume.
5321 */
5322 ResetVCBFreeExtCache(hfsmp);
5323
5324 /* Force a rebuild of the summary table. */
5325 (void) hfs_rebuild_summary (hfsmp);
5326
5327 // Delete any tentative ranges that are in the area we're shrinking
5328 struct rl_entry *range, *next_range;
5329 TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
5330 rl_link, next_range) {
5331 if (rl_overlap(range, new_end_block, RL_INFINITY) != RL_NOOVERLAP)
5332 hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
5333 }
5334
5335 return 0;
5336 }
5337
5338 /*
5339 * Remove an extent from the list of free extents.
5340 *
5341 * This is a low-level routine. It does not handle overlaps or splitting;
5342 * that is the responsibility of the caller. The input extent must exactly
5343 * match an extent already in the list; it will be removed, and any following
5344 * extents in the list will be shifted up.
5345 *
5346 * Inputs:
5347 * startBlock - Start of extent to remove
5348 * blockCount - Number of blocks in extent to remove
5349 *
5350 * Result:
5351 * The index of the extent that was removed.
5352 */
5353 static void remove_free_extent_list(struct hfsmount *hfsmp, int index)
5354 {
5355 if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) {
5356 if (ALLOC_DEBUG)
5357 panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
5358 else
5359 printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
5360 return;
5361 }
5362 int shift_count = hfsmp->vcbFreeExtCnt - index - 1;
5363 if (shift_count > 0) {
5364 memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
5365 }
5366 hfsmp->vcbFreeExtCnt--;
5367 }
5368
5369
5370 /*
5371 * Add an extent to the list of free extents.
5372 *
5373 * This is a low-level routine. It does not handle overlaps or coalescing;
5374 * that is the responsibility of the caller. This routine *does* make
5375 * sure that the extent it is adding is inserted in the correct location.
5376 * If the list is full, this routine will handle either removing the last
5377 * extent in the list to make room for the new extent, or ignoring the
5378 * new extent if it is "worse" than the last extent in the list.
5379 *
5380 * Inputs:
5381 * startBlock - Start of extent to add
5382 * blockCount - Number of blocks in extent to add
5383 *
5384 * Result:
5385 * The index where the extent that was inserted, or kMaxFreeExtents
5386 * if the extent was not inserted (the list was full, and the extent
5387 * being added was "worse" than everything in the list).
5388 */
5389 static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
5390 {
5391 uint32_t i;
5392
5393 /* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */
5394 if (ALLOC_DEBUG) {
5395 uint32_t endBlock = startBlock + blockCount;
5396 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
5397 if (endBlock < hfsmp->vcbFreeExt[i].startBlock ||
5398 startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) {
5399 continue;
5400 }
5401 panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d",
5402 hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i);
5403 }
5404 }
5405
5406 /* Figure out what index the new extent should be inserted at. */
5407 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
5408 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
5409 /* The list is sorted by increasing offset. */
5410 if (startBlock < hfsmp->vcbFreeExt[i].startBlock) {
5411 break;
5412 }
5413 } else {
5414 /* The list is sorted by decreasing size. */
5415 if (blockCount > hfsmp->vcbFreeExt[i].blockCount) {
5416 break;
5417 }
5418 }
5419 }
5420
5421 /* When we get here, i is the index where the extent should be inserted. */
5422 if (i == kMaxFreeExtents) {
5423 /*
5424 * The new extent is worse than anything already in the list,
5425 * and the list is full, so just ignore the extent to be added.
5426 */
5427 return i;
5428 }
5429
5430 /*
5431 * Grow the list (if possible) to make room for an insert.
5432 */
5433 if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents)
5434 hfsmp->vcbFreeExtCnt++;
5435
5436 /*
5437 * If we'll be keeping any extents after the insert position, then shift them.
5438 */
5439 int shift_count = hfsmp->vcbFreeExtCnt - i - 1;
5440 if (shift_count > 0) {
5441 memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
5442 }
5443
5444 /* Finally, store the new extent at its correct position. */
5445 hfsmp->vcbFreeExt[i].startBlock = startBlock;
5446 hfsmp->vcbFreeExt[i].blockCount = blockCount;
5447 return i;
5448 }
5449
5450
5451 /*
5452 * Remove an entry from free extent cache after it has been allocated.
5453 *
5454 * This is a high-level routine. It handles removing a portion of a
5455 * cached extent, potentially splitting it into two (if the cache was
5456 * already full, throwing away the extent that would sort last). It
5457 * also handles removing an extent that overlaps multiple extents in
5458 * the cache.
5459 *
5460 * Inputs:
5461 * hfsmp - mount point structure
5462 * startBlock - starting block of the extent to be removed.
5463 * blockCount - number of blocks of the extent to be removed.
5464 */
5465 static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
5466 {
5467 u_int32_t i, insertedIndex;
5468 u_int32_t currentStart, currentEnd, endBlock;
5469 int extentsRemoved = 0;
5470
5471 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5472 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
5473
5474 endBlock = startBlock + blockCount;
5475
5476 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5477
5478 /*
5479 * Iterate over all of the extents in the free extent cache, removing or
5480 * updating any entries that overlap with the input extent.
5481 */
5482 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
5483 currentStart = hfsmp->vcbFreeExt[i].startBlock;
5484 currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount;
5485
5486 /*
5487 * If the current extent is entirely before or entirely after the
5488 * the extent to be removed, then we keep it as-is.
5489 */
5490 if (currentEnd <= startBlock || currentStart >= endBlock) {
5491 continue;
5492 }
5493
5494 /*
5495 * If the extent being removed entirely contains the current extent,
5496 * then remove the current extent.
5497 */
5498 if (startBlock <= currentStart && endBlock >= currentEnd) {
5499 remove_free_extent_list(hfsmp, i);
5500
5501 /*
5502 * We just removed the extent at index i. The extent at
5503 * index i+1 just got shifted to index i. So decrement i
5504 * to undo the loop's "++i", and the next iteration will
5505 * examine index i again, which contains the next extent
5506 * in the list.
5507 */
5508 --i;
5509 ++extentsRemoved;
5510 continue;
5511 }
5512
5513 /*
5514 * If the extent being removed is strictly "in the middle" of the
5515 * current extent, then we need to split the current extent into
5516 * two discontiguous extents (the "head" and "tail"). The good
5517 * news is that we don't need to examine any other extents in
5518 * the list.
5519 */
5520 if (startBlock > currentStart && endBlock < currentEnd) {
5521 remove_free_extent_list(hfsmp, i);
5522 add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
5523 add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
5524 break;
5525 }
5526
5527 /*
5528 * The only remaining possibility is that the extent to be removed
5529 * overlaps the start or end (but not both!) of the current extent.
5530 * So we need to replace the current extent with a shorter one.
5531 *
5532 * The only tricky part is that the updated extent might be at a
5533 * different index than the original extent. If the updated extent
5534 * was inserted after the current extent, then we need to re-examine
5535 * the entry at index i, since it now contains the extent that was
5536 * previously at index i+1. If the updated extent was inserted
5537 * before or at the same index as the removed extent, then the
5538 * following extents haven't changed position.
5539 */
5540 remove_free_extent_list(hfsmp, i);
5541 if (startBlock > currentStart) {
5542 /* Remove the tail of the current extent. */
5543 insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
5544 } else {
5545 /* Remove the head of the current extent. */
5546 insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
5547 }
5548 if (insertedIndex > i) {
5549 --i; /* Undo the "++i" in the loop, so we examine the entry at index i again. */
5550 }
5551 }
5552
5553 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5554
5555 sanity_check_free_ext(hfsmp, 0);
5556
5557 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5558 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);
5559
5560 return;
5561 }
5562
5563
5564 /*
5565 * Add an entry to free extent cache after it has been deallocated.
5566 *
5567 * This is a high-level routine. It will merge overlapping or contiguous
5568 * extents into a single, larger extent.
5569 *
5570 * If the extent provided has blocks beyond current allocLimit, it is
5571 * clipped to allocLimit (so that we won't accidentally find and allocate
5572 * space beyond allocLimit).
5573 *
5574 * Inputs:
5575 * hfsmp - mount point structure
5576 * startBlock - starting block of the extent to be removed.
5577 * blockCount - number of blocks of the extent to be removed.
5578 *
5579 * Returns:
5580 * true - if the extent was added successfully to the list
5581 * false - if the extent was not added to the list, maybe because
5582 * the extent was beyond allocLimit, or is not best
5583 * candidate to be put in the cache.
5584 */
5585 static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
5586 {
5587 Boolean retval = false;
5588 uint32_t endBlock;
5589 uint32_t currentEnd;
5590 uint32_t i;
5591
5592 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5593 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
5594
5595 #if DEBUG
5596 for (i = 0; i < 2; ++i) {
5597 struct rl_entry *range;
5598 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
5599 hfs_assert(rl_overlap(range, startBlock,
5600 startBlock + blockCount - 1) == RL_NOOVERLAP);
5601 }
5602 }
5603 #endif
5604
5605 /* No need to add extent that is beyond current allocLimit */
5606 if (startBlock >= hfsmp->allocLimit) {
5607 goto out_not_locked;
5608 }
5609
5610 /* If end of the free extent is beyond current allocLimit, clip the extent */
5611 if ((startBlock + blockCount) > hfsmp->allocLimit) {
5612 blockCount = hfsmp->allocLimit - startBlock;
5613 }
5614
5615 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5616
5617 /*
5618 * Make a pass through the free extent cache, looking for known extents that
5619 * overlap or are contiguous with the extent to be added. We'll remove those
5620 * extents from the cache, and incorporate them into the new extent to be added.
5621 */
5622 endBlock = startBlock + blockCount;
5623 for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) {
5624 currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount;
5625 if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) {
5626 /* Extent i does not overlap and is not contiguous, so keep it. */
5627 continue;
5628 } else {
5629 /* We need to remove extent i and combine it with the input extent. */
5630 if (hfsmp->vcbFreeExt[i].startBlock < startBlock)
5631 startBlock = hfsmp->vcbFreeExt[i].startBlock;
5632 if (currentEnd > endBlock)
5633 endBlock = currentEnd;
5634
5635 remove_free_extent_list(hfsmp, i);
5636 /*
5637 * We just removed the extent at index i. The extent at
5638 * index i+1 just got shifted to index i. So decrement i
5639 * to undo the loop's "++i", and the next iteration will
5640 * examine index i again, which contains the next extent
5641 * in the list.
5642 */
5643 --i;
5644 }
5645 }
5646 add_free_extent_list(hfsmp, startBlock, endBlock - startBlock);
5647
5648 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5649
5650 out_not_locked:
5651 sanity_check_free_ext(hfsmp, 0);
5652
5653 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5654 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);
5655
5656 return retval;
5657 }
5658
5659 /* Debug function to check if the free extent cache is good or not */
5660 static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
5661 {
5662 u_int32_t i, j;
5663
5664 /* Do not do anything if debug is not on */
5665 if (ALLOC_DEBUG == 0) {
5666 return;
5667 }
5668
5669 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5670
5671 if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents)
5672 panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt);
5673
5674 /*
5675 * Iterate the Free extent cache and ensure no entries are bogus or refer to
5676 * allocated blocks.
5677 */
5678 for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
5679 u_int32_t start, nblocks;
5680
5681 start = hfsmp->vcbFreeExt[i].startBlock;
5682 nblocks = hfsmp->vcbFreeExt[i].blockCount;
5683
5684 /* Check if any of the blocks in free extent cache are allocated.
5685 * This should not be enabled always because it might take
5686 * very long for large extents that get added to the list.
5687 *
5688 * We have to drop vcbFreeExtLock while we call hfs_isallocated
5689 * because it is going to do I/O. Note that the free extent
5690 * cache could change. That's a risk we take when using this
5691 * debugging code. (Another alternative would be to try to
5692 * detect when the free extent cache changed, and perhaps
5693 * restart if the list changed while we dropped the lock.)
5694 */
5695 if (check_allocated) {
5696 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5697 if (hfs_isallocated(hfsmp, start, nblocks)) {
5698 panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
5699 hfsmp, i, start, nblocks);
5700 }
5701 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5702 }
5703
5704 /* Check if any part of the extent is beyond allocLimit */
5705 if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
5706 panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
5707 hfsmp, i, start, nblocks, hfsmp->allocLimit);
5708 }
5709
5710 /* Check if there are any duplicate start blocks */
5711 for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
5712 if (start == hfsmp->vcbFreeExt[j].startBlock) {
5713 panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n",
5714 hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock,
5715 hfsmp->vcbFreeExt[j].blockCount);
5716 }
5717 }
5718
5719 /* Check if the entries are out of order */
5720 if ((i+1) != hfsmp->vcbFreeExtCnt) {
5721 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
5722 /* sparse devices are sorted by starting block number (ascending) */
5723 if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) {
5724 panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n",
5725 hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
5726 hfsmp->vcbFreeExt[i+1].blockCount);
5727 }
5728 } else {
5729 /* normally sorted by block count (descending) */
5730 if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
5731 panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n",
5732 hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
5733 hfsmp->vcbFreeExt[i+1].blockCount);
5734 }
5735 }
5736 }
5737 }
5738 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5739 }
5740
5741 #define BIT_RIGHT_MASK(bit) (0xffffffffffffffffull >> (bit))
5742
5743 static int clzll(uint64_t x)
5744 {
5745 if (x == 0)
5746 return 64;
5747 else
5748 return __builtin_clzll(x);
5749 }
5750
5751 #if !HFS_ALLOC_TEST
5752
5753 static errno_t get_more_bits(bitmap_context_t *bitmap_ctx)
5754 {
5755 uint32_t start_bit;
5756 uint32_t iosize = 0;
5757 uint32_t byte_offset;
5758 uint32_t last_bitmap_block;
5759 int error;
5760 struct hfsmount *hfsmp = bitmap_ctx->hfsmp;
5761 #if !HFS_ALLOC_TEST
5762 uint64_t lock_elapsed;
5763 #endif
5764
5765
5766 if (bitmap_ctx->bp)
5767 ReleaseScanBitmapRange(bitmap_ctx->bp);
5768
5769 if (msleep(NULL, NULL, PINOD | PCATCH,
5770 "hfs_fsinfo", NULL) == EINTR) {
5771 return EINTR;
5772 }
5773
5774 #if !HFS_ALLOC_TEST
5775 /*
5776 * Let someone else use the allocation map after we've processed over HFS_FSINFO_MAX_LOCKHELD_TIME .
5777 * lock_start is initialized in hfs_find_free_extents().
5778 */
5779 absolutetime_to_nanoseconds(mach_absolute_time() - bitmap_ctx->lock_start, &lock_elapsed);
5780
5781 if (lock_elapsed >= HFS_FSINFO_MAX_LOCKHELD_TIME) {
5782
5783 hfs_systemfile_unlock(hfsmp, bitmap_ctx->lockflags);
5784
5785 /* add tsleep here to force context switch and fairness */
5786 tsleep((caddr_t)get_more_bits, PRIBIO, "hfs_fsinfo", 1);
5787
5788 hfs_journal_lock(hfsmp);
5789
5790 /* Flush the journal and wait for all I/Os to finish up */
5791 error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
5792 if (error) {
5793 hfs_journal_unlock(hfsmp);
5794 return error;
5795 }
5796
5797 /*
5798 * Take bitmap lock to ensure it is not being modified while journal is still held.
5799 * Since we are reading larger than normal blocks from the bitmap, which
5800 * might confuse other parts of the bitmap code using normal blocks, we
5801 * take exclusive lock here.
5802 */
5803 bitmap_ctx->lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5804
5805 bitmap_ctx->lock_start = mach_absolute_time();
5806
5807 /* Release the journal lock */
5808 hfs_journal_unlock(hfsmp);
5809
5810 /*
5811 * Bitmap is read in large block size (up to 1MB),
5812 * unlike the runtime which reads the bitmap in the
5813 * 4K block size. If the bitmap is read by both ways
5814 * at the same time, it can result in multiple buf_t with
5815 * different sizes and potentially case data corruption.
5816 * To avoid this, we invalidate all the existing buffers
5817 * associated with the bitmap vnode.
5818 */
5819 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
5820 if (error) {
5821 /* hfs_systemfile_unlock will be called in the caller */
5822 return error;
5823 }
5824 }
5825 #endif
5826
5827 start_bit = bitmap_ctx->run_offset;
5828
5829 if (start_bit >= bitmap_ctx->hfsmp->totalBlocks) {
5830 bitmap_ctx->chunk_end = 0;
5831 bitmap_ctx->bp = NULL;
5832 bitmap_ctx->bitmap = NULL;
5833 return 0;
5834 }
5835
5836 hfs_assert(start_bit % 8 == 0);
5837
5838 /*
5839 * Compute how much I/O we should generate here.
5840 * hfs_scan_range_size will validate that the start bit
5841 * converted into a byte offset into the bitmap file,
5842 * is aligned on a VBMIOSize boundary.
5843 */
5844 error = hfs_scan_range_size (bitmap_ctx->hfsmp, start_bit, &iosize);
5845 if (error)
5846 return error;
5847
5848 hfs_assert(iosize != 0);
5849
5850 /* hfs_scan_range_size should have verified startbit. Convert it to bytes */
5851 byte_offset = start_bit / kBitsPerByte;
5852
5853 /*
5854 * When the journal replays blocks, it does so by writing directly to the disk
5855 * device (bypassing any filesystem vnodes and such). When it finishes its I/Os
5856 * it also immediately re-reads and invalidates the range covered by the bp so
5857 * it does not leave anything lingering in the cache (for iosize reasons).
5858 *
5859 * As such, it is safe to do large I/Os here with ReadBitmapRange.
5860 *
5861 * NOTE: It is not recommended, but it is possible to call the function below
5862 * on sections of the bitmap that may be in core already as long as the pages are not
5863 * dirty. In that case, we'd notice that something starting at that
5864 * logical block of the bitmap exists in the metadata cache, and we'd check
5865 * if the iosize requested is the same as what was already allocated for it.
5866 * Odds are pretty good we're going to request something larger. In that case,
5867 * we just free the existing memory associated with the buf and reallocate a
5868 * larger range. This function should immediately invalidate it as soon as we're
5869 * done scanning, so this shouldn't cause any coherency issues.
5870 */
5871 error = ReadBitmapRange(bitmap_ctx->hfsmp, byte_offset, iosize, (uint32_t **)&bitmap_ctx->bitmap, &bitmap_ctx->bp);
5872 if (error)
5873 return error;
5874
5875 /*
5876 * At this point, we have a giant wired buffer that represents some portion of
5877 * the bitmap file that we want to analyze. We may not have gotten all 'iosize'
5878 * bytes though, so clip our ending bit to what we actually read in.
5879 */
5880 last_bitmap_block = start_bit + buf_count(bitmap_ctx->bp) * kBitsPerByte;
5881
5882 /* Cap the last block to the total number of blocks if required */
5883 if (last_bitmap_block > bitmap_ctx->hfsmp->totalBlocks)
5884 last_bitmap_block = bitmap_ctx->hfsmp->totalBlocks;
5885
5886 bitmap_ctx->chunk_current = 0; // new chunk of bitmap
5887 bitmap_ctx->chunk_end = last_bitmap_block - start_bit;
5888
5889 return 0;
5890 }
5891
5892 #endif // !HFS_ALLOC_TEST
5893
5894 // Returns number of contiguous bits set at start
5895 static int bit_count_set(void *bitmap, int start, int end)
5896 {
5897 if (start == end)
5898 return 0;
5899
5900 hfs_assert(end > start);
5901
5902 const int start_bit = start & 63;
5903 const int end_bit = end & 63;
5904
5905 uint64_t *p = (uint64_t *)bitmap + start / 64;
5906 uint64_t x = ~OSSwapBigToHostInt64(*p);
5907
5908 if ((start & ~63) == (end & ~63)) {
5909 // Start and end in same 64 bits
5910 x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit);
5911 return clzll(x) - start_bit;
5912 }
5913
5914 // Deal with initial unaligned bit
5915 x &= BIT_RIGHT_MASK(start_bit);
5916
5917 if (x)
5918 return clzll(x) - start_bit;
5919
5920 // Go fast
5921 ++p;
5922 int count = 64 - start_bit;
5923 int nquads = (end - end_bit - start - 1) / 64;
5924
5925 while (nquads--) {
5926 if (*p != 0xffffffffffffffffull) {
5927 x = ~OSSwapBigToHostInt64(*p);
5928 return count + clzll(x);
5929 }
5930 ++p;
5931 count += 64;
5932 }
5933
5934 if (end_bit) {
5935 x = ~OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit);
5936 count += clzll(x);
5937 }
5938
5939 return count;
5940 }
5941
5942 /* Returns the number of a run of cleared bits:
5943 * bitmap is a single chunk of memory being examined
5944 * start: the start bit relative to the current buffer to be examined; start is inclusive.
5945 * end: the end bit relative to the current buffer to be examined; end is not inclusive.
5946 */
5947 static int bit_count_clr(void *bitmap, int start, int end)
5948 {
5949 if (start == end)
5950 return 0;
5951
5952 hfs_assert(end > start);
5953
5954 const int start_bit = start & 63;
5955 const int end_bit = end & 63;
5956
5957 uint64_t *p = (uint64_t *)bitmap + start / 64;
5958 uint64_t x = OSSwapBigToHostInt64(*p);
5959
5960 if ((start & ~63) == (end & ~63)) {
5961 // Start and end in same 64 bits
5962 x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit);
5963
5964 return clzll(x) - start_bit;
5965 }
5966
5967 // Deal with initial unaligned bit
5968 x &= BIT_RIGHT_MASK(start_bit);
5969
5970 if (x)
5971 return clzll(x) - start_bit;
5972
5973 // Go fast
5974 ++p;
5975 int count = 64 - start_bit;
5976 int nquads = (end - end_bit - start - 1) / 64;
5977
5978 while (nquads--) {
5979 if (*p) {
5980 x = OSSwapBigToHostInt64(*p);
5981 return count + clzll(x);
5982 }
5983 ++p;
5984 count += 64;
5985 }
5986
5987 if (end_bit) {
5988 x = OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit);
5989
5990 count += clzll(x);
5991 }
5992
5993 return count;
5994 }
5995
5996 #if !HFS_ALLOC_TEST
5997 static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set)
5998 {
5999 uint32_t end, start_summary_bit, end_summary_bit;
6000 errno_t error = 0;
6001
6002 if (count == 0)
6003 goto out;
6004
6005 if (!ISSET(bitmap_ctx->hfsmp->hfs_flags, HFS_SUMMARY_TABLE))
6006 return 0;
6007
6008 if (hfs_get_summary_index (bitmap_ctx->hfsmp, start, &start_summary_bit)) {
6009 error = EINVAL;
6010 goto out;
6011 }
6012
6013 end = start + count - 1;
6014 if (hfs_get_summary_index (bitmap_ctx->hfsmp, end, &end_summary_bit)) {
6015 error = EINVAL;
6016 goto out;
6017 }
6018
6019 // if summary table bit has been updated with free block previously, leave it.
6020 if ((start_summary_bit == bitmap_ctx->last_free_summary_bit) && set)
6021 start_summary_bit++;
6022
6023 for (uint32_t summary_bit = start_summary_bit; summary_bit <= end_summary_bit; summary_bit++)
6024 hfs_set_summary (bitmap_ctx->hfsmp, summary_bit, set);
6025
6026 if (!set)
6027 bitmap_ctx->last_free_summary_bit = end_summary_bit;
6028
6029 out:
6030 return error;
6031
6032 }
6033 #endif //!HFS_ALLOC_TEST
6034
6035 /*
6036 * Read in chunks of the bitmap into memory, and find a run of cleared/set bits;
6037 * the run can extend across chunk boundaries.
6038 * bit_count_clr can be passed to get a run of cleared bits.
6039 * bit_count_set can be passed to get a run of set bits.
6040 */
6041 static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count)
6042 {
6043 int count;
6044 errno_t error = 0;
6045
6046 *bit_count = 0;
6047
6048 do {
6049 if (bitmap_ctx->run_offset == 0 || bitmap_ctx->chunk_current == bitmap_ctx->chunk_end) {
6050 if ((error = get_more_bits(bitmap_ctx)) != 0)
6051 goto out;
6052 }
6053
6054 if (bitmap_ctx->chunk_end == 0)
6055 break;
6056
6057 count = fn(bitmap_ctx->bitmap, bitmap_ctx->chunk_current, bitmap_ctx->chunk_end);
6058
6059 bitmap_ctx->run_offset += count;
6060 bitmap_ctx->chunk_current += count;
6061 *bit_count += count;
6062
6063 } while (bitmap_ctx->chunk_current >= bitmap_ctx->chunk_end && count);
6064
6065 out:
6066 return error;
6067
6068 }
6069
6070 // Returns count of number of bits clear
6071 static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count)
6072 {
6073 return hfs_bit_count(bitmap_ctx, bit_count_clr, count);
6074 }
6075
6076 // Returns count of number of bits set
6077 static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count)
6078 {
6079 return hfs_bit_count(bitmap_ctx, bit_count_set, count);
6080 }
6081
6082 static uint32_t hfs_bit_offset(bitmap_context_t *bitmap_ctx)
6083 {
6084 return bitmap_ctx->run_offset;
6085 }
6086
6087 /*
6088 * Perform a full scan of the bitmap file.
6089 * Note: during the scan of bitmap file, it may drop and reacquire the
6090 * bitmap lock to let someone else use the bitmap for fairness.
6091 * Currently it is used by HFS_GET_FSINFO statistic gathing, which
6092 * is run while other processes might perform HFS operations.
6093 */
6094
6095 errno_t hfs_find_free_extents(struct hfsmount *hfsmp,
6096 void (*callback)(void *data, off_t free_extent_size), void *callback_arg)
6097 {
6098 struct bitmap_context bitmap_ctx;
6099 uint32_t count;
6100 errno_t error = 0;
6101
6102 if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
6103 error = hfs_init_summary(hfsmp);
6104 if (error)
6105 return error;
6106 }
6107
6108 bzero(&bitmap_ctx, sizeof(struct bitmap_context));
6109
6110 /*
6111 * The journal maintains list of recently deallocated blocks to
6112 * issue DKIOCUNMAPs when the corresponding journal transaction is
6113 * flushed to the disk. To avoid any race conditions, we only
6114 * want one active trim list. Therefore we make sure that the
6115 * journal trim list is sync'ed, empty, and not modifiable for
6116 * the duration of our scan.
6117 *
6118 * Take the journal lock before flushing the journal to the disk.
6119 * We will keep on holding the journal lock till we don't get the
6120 * bitmap lock to make sure that no new journal transactions can
6121 * start. This will make sure that the journal trim list is not
6122 * modified after the journal flush and before getting bitmap lock.
6123 * We can release the journal lock after we acquire the bitmap
6124 * lock as it will prevent any further block deallocations.
6125 */
6126 hfs_journal_lock(hfsmp);
6127
6128 /* Flush the journal and wait for all I/Os to finish up */
6129 error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
6130 if (error) {
6131 hfs_journal_unlock(hfsmp);
6132 return error;
6133 }
6134
6135 /*
6136 * Take bitmap lock to ensure it is not being modified.
6137 * Since we are reading larger than normal blocks from the bitmap, which
6138 * might confuse other parts of the bitmap code using normal blocks, we
6139 * take exclusive lock here.
6140 */
6141 bitmap_ctx.lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
6142
6143 #if !HFS_ALLOC_TEST
6144 bitmap_ctx.lock_start = mach_absolute_time();
6145 #endif
6146
6147 /* Release the journal lock */
6148 hfs_journal_unlock(hfsmp);
6149
6150 /*
6151 * Bitmap is read in large block size (up to 1MB),
6152 * unlike the runtime which reads the bitmap in the
6153 * 4K block size. If the bitmap is read by both ways
6154 * at the same time, it can result in multiple buf_t with
6155 * different sizes and potentially case data corruption.
6156 * To avoid this, we invalidate all the existing buffers
6157 * associated with the bitmap vnode.
6158 */
6159 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
6160 if (error)
6161 goto out;
6162
6163 /*
6164 * Get the list of all free extent ranges. hfs_alloc_scan_range()
6165 * will call hfs_fsinfo_data_add() to account for all the free
6166 * extent ranges found during scan.
6167 */
6168 bitmap_ctx.hfsmp = hfsmp;
6169 bitmap_ctx.run_offset = 0;
6170
6171 while (bitmap_ctx.run_offset < hfsmp->totalBlocks) {
6172
6173 uint32_t start = hfs_bit_offset(&bitmap_ctx);
6174
6175 if ((error = hfs_bit_count_clr(&bitmap_ctx, &count)) != 0)
6176 goto out;
6177
6178 if (count)
6179 callback(callback_arg, hfs_blk_to_bytes(count, hfsmp->blockSize));
6180
6181 if ((error = update_summary_table(&bitmap_ctx, start, count, false)) != 0)
6182 goto out;
6183
6184 start = hfs_bit_offset(&bitmap_ctx);
6185
6186 if ((error = hfs_bit_count_set(&bitmap_ctx, &count)) != 0)
6187 goto out;
6188
6189 if ((error = update_summary_table(&bitmap_ctx, start, count, true)) != 0)
6190 goto out;
6191 }
6192
6193 out:
6194 if (bitmap_ctx.lockflags) {
6195 hfs_systemfile_unlock(hfsmp, bitmap_ctx.lockflags);
6196 }
6197
6198 return error;
6199 }
6200