]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfscommon/Misc/VolumeAllocation.c
xnu-3247.10.11.tar.gz
[apple/xnu.git] / bsd / hfs / hfscommon / Misc / VolumeAllocation.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 File: VolumeAllocation.c
30
31 Contains: Routines for accessing and modifying the volume bitmap.
32
33 Version: HFS Plus 1.0
34
35 Copyright: � 1996-2009 by Apple Computer, Inc., all rights reserved.
36
37 */
38
39 /*
40 Public routines:
41 BlockAllocate / hfs_block_alloc
42 Allocate space on a volume. Can allocate space contiguously.
43 If not contiguous, then allocation may be less than what was
44 asked for. Returns the starting block number, and number of
45 blocks. It will only return a single extent.
46
47 BlockDeallocate
48 Deallocate a contiguous run of allocation blocks.
49
50 BlockMarkAllocated
51 Exported wrapper to mark blocks as in-use. This will correctly determine
52 whether or not the red-black tree is enabled and call the appropriate function
53 if applicable.
54 BlockMarkFree
55 Exported wrapper to mark blocks as freed. This will correctly determine whether or
56 not the red-black tree is enabled and call the appropriate function if applicable.
57
58
59 ResetVCBFreeExtCache
60 Since the red-black tree obviates the need to maintain the free extent cache, we do
61 not update it if the tree is also live. As a result, if we ever need to destroy the trees
62 we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
63 bitmap scanning allocator.
64 We also reset and disable the free extent cache when volume resizing is
65 in flight.
66
67 UpdateAllocLimit
68 Adjusts the AllocLimit field in the hfs mount point. This is used when we need to prevent
69 allocations from occupying space in the region we are modifying during a filesystem resize.
70 At other times, it should be consistent with the total number of allocation blocks in the
71 filesystem. It is also used to shrink or grow the number of blocks that the red-black tree should
72 know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
73 number of items in the tree that we can allocate from.
74
75 ScanUnmapBlocks
76 Traverse the entire allocation bitmap. Potentially issue DKIOCUNMAPs to the device as it
77 tracks unallocated ranges when iterating the volume bitmap. Additionally, build up the in-core
78 summary table of the allocation bitmap.
79
80 Internal routines:
81 BlockMarkFreeInternal
82 Mark a contiguous range of blocks as free. The corresponding
83 bits in the volume bitmap will be cleared. This will actually do the work
84 of modifying the bitmap for us.
85
86 BlockMarkAllocatedInternal
87 Mark a contiguous range of blocks as allocated. The cor-
88 responding bits in the volume bitmap are set. Also tests to see
89 if any of the blocks were previously unallocated.
90 BlockFindContiguous
91 Find a contiguous range of blocks of a given size. The caller
92 specifies where to begin the search (by block number). The
93 block number of the first block in the range is returned. This is only
94 called by the bitmap scanning logic as the red-black tree should be able
95 to do this internally by searching its tree.
96 BlockFindAny
97 Find and allocate a contiguous range of blocks up to a given size. The
98 first range of contiguous free blocks found are allocated, even if there
99 are fewer blocks than requested (and even if a contiguous range of blocks
100 of the given size exists elsewhere).
101 BlockFindAnyBitmap
102 Finds a range of blocks per the above requirements without using the
103 Allocation RB Tree. This relies on the bitmap-scanning logic in order to find
104 any valid range of free space needed.
105 BlockFindContig
106 Find a contiguous range of blocks of a given size.
107 If the minimum cannot be satisfied, nothing is
108 returned.
109 BlockFindKnown
110 Try to allocate space from known free space in the volume's
111 free extent cache.
112 ReadBitmapBlock
113 Given an allocation block number, read the bitmap block that
114 contains that allocation block into a caller-supplied buffer.
115
116 ReleaseBitmapBlock
117 Release a bitmap block back into the buffer cache.
118
119 ReadBitmapRange
120 Given an allocation block number, read a range of bitmap that
121 must begin at that allocation block into a caller supplied buffer.
122
123 ReleaseBitmapRange
124 Release and invalidate a buf_t corresponding to the bitmap
125 back into the UBC in order to prevent coherency issues.
126
127 remove_free_extent_cache
128 Remove an extent from the free extent cache. Handles overlaps
129 with multiple extents in the cache, and handles splitting an
130 extent in the cache if the extent to be removed is in the middle
131 of a cached extent.
132
133 add_free_extent_cache
134 Add an extent to the free extent cache. It will merge the
135 input extent with extents already in the cache.
136 CheckUnmappedBytes
137 Check whether or not the current transaction
138 has allocated blocks that were recently freed. This may have data safety implications.
139
140
141
142 Debug/Test Routines
143 hfs_isallocated
144 Test to see if any blocks in a range are allocated. Journal or
145 allocation file lock must be held.
146
147 hfs_isallocated_scan
148 Test to see if any blocks in a range are allocated. Releases and
149 invalidates the block used when finished.
150
151 Optimization Routines
152 hfs_alloc_scan_block
153 Given a starting allocation block number, figures out which physical block contains that
154 allocation block's bit, and scans it from the starting bit until either the ending bit or
155 the end of the block. Free space extents are inserted into the appropriate red-black tree.
156
157 */
158
159
160 #include <sys/types.h>
161 #include <sys/buf.h>
162
163 #if !HFS_ALLOC_TEST
164
165 #include "../../hfs_macos_defs.h"
166 #include <sys/systm.h>
167 #include <sys/ubc.h>
168 #include <kern/kalloc.h>
169 /* For VM Page size */
170 #include <libkern/libkern.h>
171 #include <vfs/vfs_journal.h>
172 #include "../../hfs.h"
173 #include "../../hfs_endian.h"
174 #include "../headers/FileMgrInternal.h"
175
176 #endif // !HFS_ALLOC_TEST
177
178 #include <sys/sysctl.h>
179 #include <sys/disk.h>
180 #include <sys/uio.h>
181 #include <sys/malloc.h>
182
183 #include "../../hfs_dbg.h"
184 #include "../../hfs_format.h"
185 #include "../../hfs_kdebug.h"
186 #include "../../rangelist.h"
187 #include "../../hfs_extents.h"
188
189 /* Headers for unmap-on-mount support */
190 #include <sys/disk.h>
191
192 #ifndef CONFIG_HFS_TRIM
193 #define CONFIG_HFS_TRIM 0
194 #endif
195
196 /*
197 * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
198 * KERNEL_DEBUG_CONSTANT events are enabled at runtime. (They're
199 * disabled by default because there can be a lot of these events,
200 * and we don't want to overwhelm the kernel debug buffer. If you
201 * want to watch these events in particular, just set the sysctl.)
202 */
203 static int hfs_kdebug_allocation = 0;
204 SYSCTL_DECL(_vfs_generic);
205 SYSCTL_NODE(_vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system");
206 SYSCTL_NODE(_vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug");
207 SYSCTL_INT(_vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations");
208 enum {
209 /*
210 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
211 * BlockDeallocate, including the internal BlockAllocateXxx
212 * routines so we can see how an allocation was satisfied.
213 *
214 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
215 * free extent cache.
216 *
217 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
218 *
219 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
220 * or clearing bits, scanning the bitmap).
221 */
222 HFSDBG_ALLOC_ENABLED = 1,
223 HFSDBG_EXT_CACHE_ENABLED = 2,
224 HFSDBG_UNMAP_ENABLED = 4,
225 HFSDBG_BITMAP_ENABLED = 8
226 };
227
228 enum {
229 kBytesPerWord = 4,
230 kBitsPerByte = 8,
231 kBitsPerWord = 32,
232
233 kBitsWithinWordMask = kBitsPerWord-1
234 };
235
236 #define kLowBitInWordMask 0x00000001ul
237 #define kHighBitInWordMask 0x80000000ul
238 #define kAllBitsSetInWord 0xFFFFFFFFul
239
240 #define HFS_MIN_SUMMARY_BLOCKSIZE 4096
241
242 #define ALLOC_DEBUG 0
243
244 static OSErr ReadBitmapBlock(
245 ExtendedVCB *vcb,
246 u_int32_t bit,
247 u_int32_t **buffer,
248 uintptr_t *blockRef,
249 hfs_block_alloc_flags_t flags);
250
251 static OSErr ReleaseBitmapBlock(
252 ExtendedVCB *vcb,
253 uintptr_t blockRef,
254 Boolean dirty);
255
256 static OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
257 HFSPlusExtentDescriptor *extent,
258 hfs_block_alloc_flags_t flags,
259 hfs_alloc_extra_args_t *ap);
260
261 static OSErr BlockFindAny(
262 ExtendedVCB *vcb,
263 u_int32_t startingBlock,
264 u_int32_t endingBlock,
265 u_int32_t maxBlocks,
266 hfs_block_alloc_flags_t flags,
267 Boolean trustSummary,
268 u_int32_t *actualStartBlock,
269 u_int32_t *actualNumBlocks);
270
271 static OSErr BlockFindAnyBitmap(
272 ExtendedVCB *vcb,
273 u_int32_t startingBlock,
274 u_int32_t endingBlock,
275 u_int32_t maxBlocks,
276 hfs_block_alloc_flags_t flags,
277 u_int32_t *actualStartBlock,
278 u_int32_t *actualNumBlocks);
279
280 static OSErr BlockFindContig(
281 ExtendedVCB *vcb,
282 u_int32_t startingBlock,
283 u_int32_t minBlocks,
284 u_int32_t maxBlocks,
285 hfs_block_alloc_flags_t flags,
286 u_int32_t *actualStartBlock,
287 u_int32_t *actualNumBlocks);
288
289 static OSErr BlockFindContiguous(
290 ExtendedVCB *vcb,
291 u_int32_t startingBlock,
292 u_int32_t endingBlock,
293 u_int32_t minBlocks,
294 u_int32_t maxBlocks,
295 Boolean useMetaZone,
296 Boolean trustSummary,
297 u_int32_t *actualStartBlock,
298 u_int32_t *actualNumBlocks,
299 hfs_block_alloc_flags_t flags);
300
301 static OSErr BlockFindKnown(
302 ExtendedVCB *vcb,
303 u_int32_t maxBlocks,
304 u_int32_t *actualStartBlock,
305 u_int32_t *actualNumBlocks);
306
307 static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
308 HFSPlusExtentDescriptor *extent,
309 uint32_t max_blocks,
310 hfs_block_alloc_flags_t flags);
311
312 static OSErr BlockMarkAllocatedInternal (
313 ExtendedVCB *vcb,
314 u_int32_t startingBlock,
315 u_int32_t numBlocks,
316 hfs_block_alloc_flags_t flags);
317
318 static OSErr BlockMarkFreeInternal(
319 ExtendedVCB *vcb,
320 u_int32_t startingBlock,
321 u_int32_t numBlocks,
322 Boolean do_validate);
323
324
325 static OSErr ReadBitmapRange (struct hfsmount *hfsmp, uint32_t offset, uint32_t iosize,
326 uint32_t **buffer, struct buf **blockRef);
327
328 static OSErr ReleaseScanBitmapRange( struct buf *bp );
329
330 static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset,
331 u_int32_t numBlocks, struct jnl_trim_list *list);
332
333 static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list);
334
335 static int hfs_alloc_scan_range(struct hfsmount *hfsmp,
336 u_int32_t startbit,
337 u_int32_t *bitToScan,
338 struct jnl_trim_list *list);
339
340 static int hfs_scan_range_size (struct hfsmount* hfsmp, uint32_t start, uint32_t *iosize);
341 static uint32_t CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recent, uint32_t *next);
342
343 /* Bitmap Re-use Detection */
344 static inline int extents_overlap (uint32_t start1, uint32_t len1,
345 uint32_t start2, uint32_t len2) {
346 return !( ((start1 + len1) <= start2) || ((start2 + len2) <= start1) );
347 }
348
349
350 int hfs_isallocated_scan (struct hfsmount *hfsmp,
351 u_int32_t startingBlock,
352 u_int32_t *bp_buf);
353
354 /* Summary Table Functions */
355 static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse);
356 static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t *index);
357 static int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
358 static int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t *alloc);
359 static int hfs_release_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t length);
360 static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t *freeblocks);
361 static int hfs_rebuild_summary (struct hfsmount *hfsmp);
362
363 #if 0
364 static int hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
365 #endif
366
367 /* Used in external mount code to initialize the summary table */
368 int hfs_init_summary (struct hfsmount *hfsmp);
369
370 #if ALLOC_DEBUG
371 void hfs_validate_summary (struct hfsmount *hfsmp);
372 #endif
373
374
375 /* Functions for manipulating free extent cache */
376 static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
377 static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
378 static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
379
380 static void hfs_release_reserved(hfsmount_t *hfsmp, struct rl_entry *range, int list);
381
382 /* Functions for getting free exents */
383
384 typedef struct bitmap_context {
385 void *bitmap; // current bitmap chunk
386 uint32_t run_offset; // offset (in bits) from start of bitmap to start of current run
387 uint32_t chunk_current; // next bit to scan in the chunk
388 uint32_t chunk_end; // number of valid bits in this chunk
389 struct hfsmount *hfsmp;
390 struct buf *bp;
391 uint32_t last_free_summary_bit; // last marked free summary bit
392 int lockflags;
393 uint64_t lock_start;
394 } bitmap_context_t;
395
396
397 static errno_t get_more_bits(bitmap_context_t *bitmap_ctx);
398 static int bit_count_set(void *bitmap, int start, int end);
399 static int bit_count_clr(void *bitmap, int start, int end);
400 static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count);
401 static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count);
402 static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count);
403 static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set);
404 static int clzll(uint64_t x);
405
406 #if ALLOC_DEBUG
407 /*
408 * Validation Routine to verify that the TRIM list maintained by the journal
409 * is in good shape relative to what we think the bitmap should have. We should
410 * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
411 * we panic.
412 */
413 int trim_validate_bitmap (struct hfsmount *hfsmp);
414 int trim_validate_bitmap (struct hfsmount *hfsmp) {
415 u_int64_t blockno_offset;
416 u_int64_t numblocks;
417 int i;
418 int count;
419 u_int32_t startblk;
420 u_int32_t blks;
421 int err = 0;
422 uint32_t alloccount = 0;
423
424 if (hfsmp->jnl) {
425 struct journal *jnl = (struct journal*)hfsmp->jnl;
426 if (jnl->active_tr) {
427 struct jnl_trim_list *trim = &(jnl->active_tr->trim);
428 count = trim->extent_count;
429 for (i = 0; i < count; i++) {
430 blockno_offset = trim->extents[i].offset;
431 blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
432 blockno_offset = blockno_offset / hfsmp->blockSize;
433 numblocks = trim->extents[i].length / hfsmp->blockSize;
434
435 startblk = (u_int32_t)blockno_offset;
436 blks = (u_int32_t) numblocks;
437 err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);
438
439 if (err == 0 && alloccount != 0) {
440 panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
441 }
442 }
443 }
444 }
445 return 0;
446 }
447
448 #endif
449
450
451 /*
452 ;________________________________________________________________________________
453 ;
454 ; Routine: hfs_unmap_free_extent
455 ;
456 ; Function: Make note of a range of allocation blocks that should be
457 ; unmapped (trimmed). That is, the given range of blocks no
458 ; longer have useful content, and the device can unmap the
459 ; previous contents. For example, a solid state disk may reuse
460 ; the underlying storage for other blocks.
461 ;
462 ; This routine is only supported for journaled volumes. The extent
463 ; being freed is passed to the journal code, and the extent will
464 ; be unmapped after the current transaction is written to disk.
465 ;
466 ; Input Arguments:
467 ; hfsmp - The volume containing the allocation blocks.
468 ; startingBlock - The first allocation block of the extent being freed.
469 ; numBlocks - The number of allocation blocks of the extent being freed.
470 ;________________________________________________________________________________
471 */
472 static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
473 {
474 u_int64_t offset;
475 u_int64_t length;
476 u_int64_t device_sz;
477 int err = 0;
478
479 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
480 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
481
482 if (ALLOC_DEBUG) {
483 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
484 panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks);
485 }
486 }
487
488 if (hfsmp->jnl != NULL) {
489 device_sz = hfsmp->hfs_logical_bytes;
490 offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
491 length = (u_int64_t) numBlocks * hfsmp->blockSize;
492
493 /* Validate that the trim is in a valid range of bytes */
494 if ((offset >= device_sz) || ((offset + length) > device_sz)) {
495 printf("hfs_unmap_free_ext: ignoring trim vol=%s @ off %lld len %lld \n", hfsmp->vcbVN, offset, length);
496 err = EINVAL;
497 }
498
499 if (err == 0) {
500 err = journal_trim_add_extent(hfsmp->jnl, offset, length);
501 if (err) {
502 printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent for vol=%s", err, hfsmp->vcbVN);
503 }
504 }
505 }
506
507 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
508 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0);
509 }
510
511 /*
512 ;________________________________________________________________________________
513 ;
514 ; Routine: hfs_track_unmap_blocks
515 ;
516 ; Function: Make note of a range of allocation blocks that should be
517 ; unmapped (trimmed). That is, the given range of blocks no
518 ; longer have useful content, and the device can unmap the
519 ; previous contents. For example, a solid state disk may reuse
520 ; the underlying storage for other blocks.
521 ;
522 ; This routine is only supported for journaled volumes.
523 ;
524 ; *****NOTE*****:
525 ; This function should *NOT* be used when the volume is fully
526 ; mounted. This function is intended to support a bitmap iteration
527 ; at mount time to fully inform the SSD driver of the state of all blocks
528 ; at mount time, and assumes that there is no allocation/deallocation
529 ; interference during its iteration.,
530 ;
531 ; Input Arguments:
532 ; hfsmp - The volume containing the allocation blocks.
533 ; offset - The first allocation block of the extent being freed.
534 ; numBlocks - The number of allocation blocks of the extent being freed.
535 ; list - The list of currently tracked trim ranges.
536 ;________________________________________________________________________________
537 */
538 static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start,
539 u_int32_t numBlocks, struct jnl_trim_list *list) {
540
541 u_int64_t offset;
542 u_int64_t length;
543 int error = 0;
544
545 if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL) && list->allocated_count && list->extents != NULL) {
546 int extent_no = list->extent_count;
547 offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
548 length = (u_int64_t) numBlocks * hfsmp->blockSize;
549
550
551 list->extents[extent_no].offset = offset;
552 list->extents[extent_no].length = length;
553 list->extent_count++;
554 if (list->extent_count == list->allocated_count) {
555 error = hfs_issue_unmap (hfsmp, list);
556 }
557 }
558
559 return error;
560 }
561
562 /*
563 ;________________________________________________________________________________
564 ;
565 ; Routine: hfs_issue_unmap
566 ;
567 ; Function: Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list
568 ;
569 ; Input Arguments:
570 ; hfsmp - The volume containing the allocation blocks.
571 ; list - The list of currently tracked trim ranges.
572 ;________________________________________________________________________________
573 */
574
575 static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list)
576 {
577 dk_unmap_t unmap;
578 int error = 0;
579
580 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
581 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
582 }
583
584 if (list->extent_count > 0 && list->extents != NULL) {
585 bzero(&unmap, sizeof(unmap));
586 unmap.extents = list->extents;
587 unmap.extentsCount = list->extent_count;
588
589 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
590 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_NONE, hfsmp->hfs_raw_dev, unmap.extentsCount, 0, 0, 0);
591 }
592
593 #if CONFIG_PROTECT
594 /*
595 * If we have not yet completed the first scan through the bitmap, then
596 * optionally inform the block driver below us that this is an initialization
597 * TRIM scan, if it can deal with this information.
598 */
599 if ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED) == 0) {
600 unmap.options |= _DK_UNMAP_INITIALIZE;
601 }
602 #endif
603 /* Issue a TRIM and flush them out */
604 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
605
606 bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t)));
607 bzero (&unmap, sizeof(unmap));
608 list->extent_count = 0;
609 }
610
611 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
612 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
613 }
614
615 return error;
616 }
617
618 /*
619 ;________________________________________________________________________________
620 ;
621 ; Routine: hfs_unmap_alloc_extent
622 ;
623 ; Function: Make note of a range of allocation blocks, some of
624 ; which may have previously been passed to hfs_unmap_free_extent,
625 ; is now in use on the volume. The given blocks will be removed
626 ; from any pending DKIOCUNMAP.
627 ;
628 ; Input Arguments:
629 ; hfsmp - The volume containing the allocation blocks.
630 ; startingBlock - The first allocation block of the extent being allocated.
631 ; numBlocks - The number of allocation blocks being allocated.
632 ;________________________________________________________________________________
633 */
634
635 static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
636 {
637 u_int64_t offset;
638 u_int64_t length;
639 int err = 0;
640
641 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
642 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
643
644 if (hfsmp->jnl != NULL) {
645 offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
646 length = (u_int64_t) numBlocks * hfsmp->blockSize;
647
648 err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
649 if (err) {
650 printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent for vol=%s", err, hfsmp->vcbVN);
651 }
652 }
653
654 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
655 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0);
656 }
657
658
659 /*
660 ;________________________________________________________________________________
661 ;
662 ; Routine: hfs_trim_callback
663 ;
664 ; Function: This function is called when a transaction that freed extents
665 ; (via hfs_unmap_free_extent/journal_trim_add_extent) has been
666 ; written to the on-disk journal. This routine will add those
667 ; extents to the free extent cache so that they can be reused.
668 ;
669 ; CAUTION: This routine is called while the journal's trim lock
670 ; is held shared, so that no other thread can reuse any portion
671 ; of those extents. We must be very careful about which locks
672 ; we take from within this callback, to avoid deadlock. The
673 ; call to add_free_extent_cache will end up taking the cache's
674 ; lock (just long enough to add these extents to the cache).
675 ;
676 ; CAUTION: If the journal becomes invalid (eg., due to an I/O
677 ; error when trying to write to the journal), this callback
678 ; will stop getting called, even if extents got freed before
679 ; the journal became invalid!
680 ;
681 ; Input Arguments:
682 ; arg - The hfsmount of the volume containing the extents.
683 ; extent_count - The number of extents freed in the transaction.
684 ; extents - An array of extents (byte ranges) that were freed.
685 ;________________________________________________________________________________
686 */
687
688 __private_extern__ void
689 hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
690 {
691 uint32_t i;
692 uint32_t startBlock, numBlocks;
693 struct hfsmount *hfsmp = arg;
694
695 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
696 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0);
697
698 for (i=0; i<extent_count; ++i) {
699 /* Convert the byte range in *extents back to a range of allocation blocks. */
700 startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize;
701 numBlocks = extents[i].length / hfsmp->blockSize;
702 (void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
703 }
704
705 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
706 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0);
707 }
708
709
710 /*
711 ;________________________________________________________________________________
712 ;
713 ; Routine: CheckUnmappedBytes
714 ;
715 ; Function: From the specified inputs, determine if the extent in question overlaps
716 ; space that was recently freed, where the recently freed space may still be
717 ; lingering in an uncommitted journal transaction. This may have data safety
718 ; implications. The intended use is to decide whether or not to force a journal flush
719 ; before allowing file data I/O to be issued. If we did not do this
720 ; then it would be possible to issue the file I/O ahead of the
721 ; journal, resulting in data being overwritten if the transaction either
722 ; is not committed or cannot be replayed.
723 ;
724 ; NOTE: This function assumes that the journal and catalog/extent locks are held.
725 ;
726 ; Input Arguments:
727 ; hfsmp - The volume containing the allocation blocks.
728 ; foffset - start of the extent in question (in allocation blocks)
729 ; numbytes - number of blocks in the extent.
730 ; recently_freed: - output pointer containing whether or not the blocks were freed recently
731 ; overlap_end - end of the overlap between the argument extent and the trim list (in allocation blocks)
732 ;
733 ; Output:
734 ;
735 ; Returns 0 if we could determine extent validity for this (or a previous transaction)
736 ; Returns errno if there was an error
737 ;
738 ; If returned 0, then recently freed will contain a boolean that indicates
739 ; that it was recently freed.
740 ;________________________________________________________________________________
741 */
742
743 u_int32_t
744 CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recently_freed, uint32_t *overlap_end) {
745 uint64_t device_offset;
746 uint64_t numbytes;
747 uint32_t err = 0;
748 uint64_t lba_overlap_end;
749
750 if (hfsmp->jnl != NULL) {
751 /*
752 * Convert the allocation block # and the number of blocks into device-relative
753 * offsets so that they can be compared using the TRIM list.
754 */
755 uint64_t device_sz = hfsmp->hfs_logical_bytes;
756 device_offset = blockno * ((uint64_t)hfsmp->blockSize);
757 device_offset += hfsmp->hfsPlusIOPosOffset;
758 numbytes = (((uint64_t)hfsmp->blockSize) * numblocks);
759
760 /*
761 * Since we check that the device_offset isn't too large, it's safe to subtract it
762 * from the size in the second check.
763 */
764 if ((device_offset >= device_sz) || (numbytes > (device_sz - device_offset))) {
765 return EINVAL;
766 }
767
768 /* Ask the journal if this extent overlaps with any pending TRIMs */
769 if (journal_trim_extent_overlap (hfsmp->jnl, device_offset, numbytes, &lba_overlap_end)) {
770 *recently_freed = 1;
771
772 /* Convert lba_overlap_end back into allocation blocks */
773 uint64_t end_offset = lba_overlap_end - hfsmp->hfsPlusIOPosOffset;
774 end_offset = end_offset / ((uint64_t) hfsmp->blockSize);
775 *overlap_end = (uint32_t) end_offset;
776 }
777 else {
778 *recently_freed = 0;
779 }
780 err = 0;
781 }
782 else {
783 /* There may not be a journal. In that case, always return success. */
784 *recently_freed = 0;
785 }
786 return err;
787
788 }
789
790
791 /*
792 ;________________________________________________________________________________
793 ;
794 ; Routine: ScanUnmapBlocks
795 ;
796 ; Function: Traverse the bitmap, and potentially issue DKIOCUNMAPs to the underlying
797 ; device as needed so that the underlying disk device is as
798 ; up-to-date as possible with which blocks are unmapped.
799 ; Additionally build up the summary table as needed.
800 ;
801 ; This function reads the bitmap in large block size
802 ; (up to 1MB) unlink the runtime which reads the bitmap
803 ; in 4K block size. So if this function is being called
804 ; after the volume is mounted and actively modified, the
805 ; caller needs to invalidate all of the existing buffers
806 ; associated with the bitmap vnode before calling this
807 ; function. If the buffers are not invalidated, it can
808 ; cause but_t collision and potential data corruption.
809 ;
810 ; Input Arguments:
811 ; hfsmp - The volume containing the allocation blocks.
812 ;________________________________________________________________________________
813 */
814
815 __private_extern__
816 u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp)
817 {
818 u_int32_t blocks_scanned = 0;
819 int error = 0;
820 struct jnl_trim_list trimlist;
821
822 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
823 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
824 }
825
826 /*
827 *struct jnl_trim_list {
828 uint32_t allocated_count;
829 uint32_t extent_count;
830 dk_extent_t *extents;
831 };
832 */
833 bzero (&trimlist, sizeof(trimlist));
834
835 /*
836 * The scanning itself here is not tied to the presence of CONFIG_HFS_TRIM
837 * which is now enabled for most architectures. Instead, any trim related
838 * work should be tied to whether the underlying storage media supports
839 * UNMAP, as any solid state device would on desktop or embedded.
840 *
841 * We do this because we may want to scan the full bitmap on desktop
842 * for spinning media for the purposes of building up the
843 * summary table.
844 *
845 * We also avoid sending TRIMs down to the underlying media if the mount is read-only.
846 */
847
848 if ((hfsmp->hfs_flags & HFS_UNMAP) &&
849 ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
850 /* If the underlying device supports unmap and the mount is read-write, initialize */
851 int alloc_count = PAGE_SIZE / sizeof(dk_extent_t);
852 void *extents = kalloc (alloc_count * sizeof(dk_extent_t));
853 if (extents == NULL) {
854 return ENOMEM;
855 }
856 trimlist.extents = (dk_extent_t*)extents;
857 trimlist.allocated_count = alloc_count;
858 trimlist.extent_count = 0;
859 }
860
861 while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){
862
863 error = hfs_alloc_scan_range (hfsmp, blocks_scanned, &blocks_scanned, &trimlist);
864
865 if (error) {
866 printf("HFS: bitmap scan range error: %d on vol=%s\n", error, hfsmp->vcbVN);
867 break;
868 }
869 }
870
871 if ((hfsmp->hfs_flags & HFS_UNMAP) &&
872 ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
873 if (error == 0) {
874 hfs_issue_unmap(hfsmp, &trimlist);
875 }
876 if (trimlist.extents) {
877 kfree (trimlist.extents, (trimlist.allocated_count * sizeof(dk_extent_t)));
878 }
879 }
880
881 /*
882 * This is in an #if block because hfs_validate_summary prototype and function body
883 * will only show up if ALLOC_DEBUG is on, to save wired memory ever so slightly.
884 */
885 #if ALLOC_DEBUG
886 sanity_check_free_ext(hfsmp, 1);
887 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
888 /* Validate the summary table too! */
889 hfs_validate_summary(hfsmp);
890 printf("HFS: Summary validation complete on %s\n", hfsmp->vcbVN);
891 }
892 #endif
893
894 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
895 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
896 }
897
898 return error;
899 }
900
901 static void add_to_reserved_list(hfsmount_t *hfsmp, uint32_t start,
902 uint32_t count, int list,
903 struct rl_entry **reservation)
904 {
905 struct rl_entry *range, *next_range;
906
907 if (list == HFS_TENTATIVE_BLOCKS) {
908 int nranges = 0;
909 // Don't allow more than 4 tentative reservations
910 TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
911 rl_link, next_range) {
912 if (++nranges > 3)
913 hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
914 }
915 }
916
917 MALLOC(range, struct rl_entry *, sizeof(*range), M_TEMP, M_WAITOK);
918 range->rl_start = start;
919 range->rl_end = start + count - 1;
920 TAILQ_INSERT_HEAD(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
921 *reservation = range;
922 }
923
924 static void hfs_release_reserved(hfsmount_t *hfsmp,
925 struct rl_entry *range,
926 int list)
927 {
928 if (range->rl_start == -1)
929 return;
930
931 TAILQ_REMOVE(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
932
933 if (rl_len(range) > 0) {
934 if (list == HFS_TENTATIVE_BLOCKS)
935 hfsmp->tentativeBlocks -= rl_len(range);
936 else {
937 /*
938 * We don't need to unmap tentative blocks because we won't have
939 * written to them, but we might have written to reserved blocks.
940 * Nothing can refer to those blocks so this doesn't have to be
941 * via the journal. If this proves to be too expensive, we could
942 * consider not sending down the unmap or we could require this
943 * to always be called within a transaction and then we can use
944 * the journal.
945 */
946 dk_extent_t extent = {
947 .offset = (hfs_blk_to_bytes(range->rl_start, hfsmp->blockSize)
948 + hfsmp->hfsPlusIOPosOffset),
949 .length = hfs_blk_to_bytes(rl_len(range), hfsmp->blockSize)
950 };
951 dk_unmap_t unmap = {
952 .extents = &extent,
953 .extentsCount = 1,
954 };
955 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap,
956 0, vfs_context_kernel());
957 assert(hfsmp->lockedBlocks >= rl_len(range));
958 hfsmp->lockedBlocks -= rl_len(range);
959 }
960 hfs_release_summary(hfsmp, range->rl_start, rl_len(range));
961 add_free_extent_cache(hfsmp, range->rl_start, rl_len(range));
962 }
963
964 range->rl_start = -1;
965 range->rl_end = -2;
966 }
967
968 static void hfs_free_locked_internal(hfsmount_t *hfsmp,
969 struct rl_entry **reservation,
970 int list)
971 {
972 if (*reservation) {
973 hfs_release_reserved(hfsmp, *reservation, list);
974 FREE(*reservation, M_TEMP);
975 *reservation = NULL;
976 }
977 }
978
979 void hfs_free_tentative(hfsmount_t *hfsmp, struct rl_entry **reservation)
980 {
981 hfs_free_locked_internal(hfsmp, reservation, HFS_TENTATIVE_BLOCKS);
982 }
983
984 void hfs_free_locked(hfsmount_t *hfsmp, struct rl_entry **reservation)
985 {
986 hfs_free_locked_internal(hfsmp, reservation, HFS_LOCKED_BLOCKS);
987 }
988
989 OSErr BlockAllocate (
990 hfsmount_t *hfsmp, /* which volume to allocate space on */
991 u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */
992 u_int32_t minBlocks, /* desired number of blocks to allocate */
993 u_int32_t maxBlocks, /* maximum number of blocks to allocate */
994 hfs_block_alloc_flags_t flags, /* option flags */
995 u_int32_t *actualStartBlock, /* actual first block of allocation */
996 u_int32_t *actualNumBlocks)
997 {
998 hfs_alloc_extra_args_t extra_args = {
999 .max_blocks = maxBlocks
1000 };
1001
1002 HFSPlusExtentDescriptor extent = { startingBlock, minBlocks };
1003
1004 OSErr err = hfs_block_alloc_int(hfsmp, &extent, flags, &extra_args);
1005
1006 *actualStartBlock = extent.startBlock;
1007 *actualNumBlocks = extent.blockCount;
1008
1009 return err;
1010 }
1011
1012 errno_t hfs_block_alloc(hfsmount_t *hfsmp,
1013 HFSPlusExtentDescriptor *extent,
1014 hfs_block_alloc_flags_t flags,
1015 hfs_alloc_extra_args_t *ap)
1016 {
1017 return MacToVFSError(hfs_block_alloc_int(hfsmp, extent, flags, ap));
1018 }
1019
1020 /*
1021 ;________________________________________________________________________________
1022 ;
1023 ; Routine: hfs_block_alloc_int
1024 ;
1025 ; Function: Allocate space on a volume. If contiguous allocation is requested,
1026 ; at least the requested number of bytes will be allocated or an
1027 ; error will be returned. If contiguous allocation is not forced,
1028 ; the space will be allocated with the first largest extent available
1029 ; at the requested starting allocation block. If there is not enough
1030 ; room there, a block allocation of less than the requested size will be
1031 ; allocated.
1032 ;
1033 ; If the requested starting block is 0 (for new file allocations),
1034 ; the volume's allocation block pointer will be used as a starting
1035 ; point.
1036 ;
1037 ; Input Arguments:
1038 ; hfsmp - Pointer to the HFS mount structure.
1039 ; extent - startBlock indicates the block to start
1040 ; searching from and blockCount is the number of
1041 ; blocks required. Depending on the flags used,
1042 ; more or less blocks may be returned. The
1043 ; allocated extent is returned via this
1044 ; parameter.
1045 ; flags - Flags to specify options like contiguous, use
1046 ; metadata zone, skip free block check, etc.
1047 ; ap - Additional arguments used depending on flags.
1048 ; See hfs_alloc_extra_args_t and below.
1049 ;
1050 ; Output:
1051 ; (result) - Error code, zero for successful allocation
1052 ; extent - If successful, the allocated extent.
1053 ;
1054 ; Side effects:
1055 ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
1056 ;
1057 ; HFS_ALLOC_TENTATIVE
1058 ; Blocks will be reserved but not marked allocated. They can be
1059 ; stolen if free space is limited. Tentative blocks can be used by
1060 ; passing HFS_ALLOC_USE_TENTATIVE and passing in the resevation.
1061 ; @ap->reservation_out is used to store the reservation.
1062 ;
1063 ; HFS_ALLOC_USE_TENTATIVE
1064 ; Use blocks previously returned with HFS_ALLOC_TENTATIVE.
1065 ; @ap->reservation_in should be set to whatever @ap->reservation_out
1066 ; was set to when HFS_ALLOC_TENTATIVE was used. If the tentative
1067 ; reservation was stolen, a normal allocation will take place.
1068 ;
1069 ; HFS_ALLOC_LOCKED
1070 ; Blocks will be reserved but not marked allocated. Unlike tentative
1071 ; reservations they cannot be stolen. It is safe to write to these
1072 ; blocks. @ap->reservation_out is used to store the reservation.
1073 ;
1074 ; HFS_ALLOC_COMMIT
1075 ; This will take blocks previously returned with HFS_ALLOC_LOCKED and
1076 ; mark them allocated on disk. @ap->reservation_in is used.
1077 ;
1078 ; HFS_ALLOC_ROLL_BACK
1079 ; Take blocks that were just recently deallocated and mark them
1080 ; allocated. This is for roll back situations. Blocks got
1081 ; deallocated and then something went wrong and we need to roll back
1082 ; by marking the blocks allocated.
1083 ;
1084 ; HFS_ALLOC_FORCECONTIG
1085 ; It will not return fewer than @min_blocks.
1086 ;
1087 ; HFS_ALLOC_TRY_HARD
1088 ; We will perform an exhaustive search to try and find @max_blocks.
1089 ; It will not return fewer than @min_blocks.
1090 ;
1091 ;________________________________________________________________________________
1092 */
1093 OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
1094 HFSPlusExtentDescriptor *extent,
1095 hfs_block_alloc_flags_t flags,
1096 hfs_alloc_extra_args_t *ap)
1097 {
1098 u_int32_t freeBlocks;
1099 OSErr err = 0;
1100 Boolean updateAllocPtr = false; // true if nextAllocation needs to be updated
1101 Boolean useMetaZone;
1102 Boolean forceContiguous = false;
1103 Boolean forceFlush;
1104
1105 uint32_t startingBlock = extent->startBlock;
1106 uint32_t minBlocks = extent->blockCount;
1107 uint32_t maxBlocks = (ap && ap->max_blocks) ? ap->max_blocks : minBlocks;
1108
1109 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1110 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
1111
1112 if (ISSET(flags, HFS_ALLOC_COMMIT)) {
1113 extent->startBlock = (*ap->reservation_in)->rl_start;
1114 extent->blockCount = rl_len(*ap->reservation_in);
1115 goto mark_allocated;
1116 }
1117
1118 if (ISSET(flags, HFS_ALLOC_ROLL_BACK))
1119 goto mark_allocated;
1120
1121 freeBlocks = hfs_freeblks(hfsmp, 0);
1122
1123 if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
1124 struct rl_entry *range = *ap->reservation_in;
1125
1126 if (range && range->rl_start != -1) {
1127 /*
1128 * It's possible that we have a tentative reservation
1129 * but there aren't enough free blocks due to loaned blocks
1130 * or insufficient space in the backing store.
1131 */
1132 uint32_t count = min(min(maxBlocks, rl_len(range)), freeBlocks);
1133
1134 if (count >= minBlocks) {
1135 extent->startBlock = range->rl_start;
1136 extent->blockCount = count;
1137
1138 // Should we go straight to commit?
1139 if (!ISSET(flags, HFS_ALLOC_LOCKED))
1140 SET(flags, HFS_ALLOC_COMMIT);
1141
1142 goto mark_allocated;
1143 }
1144 }
1145
1146 /*
1147 * We can't use the tentative reservation so free it and allocate
1148 * normally.
1149 */
1150 hfs_free_tentative(hfsmp, ap->reservation_in);
1151 CLR(flags, HFS_ALLOC_USE_TENTATIVE);
1152 }
1153
1154 if (ISSET(flags, HFS_ALLOC_FORCECONTIG | HFS_ALLOC_TRY_HARD))
1155 forceContiguous = true;
1156
1157 if (flags & HFS_ALLOC_METAZONE) {
1158 useMetaZone = true;
1159 } else {
1160 useMetaZone = false;
1161 }
1162
1163 if (flags & HFS_ALLOC_FLUSHTXN) {
1164 forceFlush = true;
1165 }
1166 else {
1167 forceFlush = false;
1168 }
1169
1170 assert(hfsmp->freeBlocks >= hfsmp->tentativeBlocks);
1171
1172 // See if we have to steal tentative blocks
1173 if (freeBlocks < hfsmp->tentativeBlocks + minBlocks)
1174 SET(flags, HFS_ALLOC_IGNORE_TENTATIVE);
1175
1176 /* Skip free block check if blocks are being allocated for relocating
1177 * data during truncating a volume.
1178 *
1179 * During hfs_truncatefs(), the volume free block count is updated
1180 * before relocating data to reflect the total number of free blocks
1181 * that will exist on the volume after resize is successful. This
1182 * means that we have reserved allocation blocks required for relocating
1183 * the data and hence there is no need to check the free blocks.
1184 * It will also prevent resize failure when the number of blocks in
1185 * an extent being relocated is more than the free blocks that will
1186 * exist after the volume is resized.
1187 */
1188 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1189 // If the disk is already full, don't bother.
1190 if (freeBlocks == 0) {
1191 err = dskFulErr;
1192 goto exit;
1193 }
1194 if (forceContiguous && freeBlocks < minBlocks) {
1195 err = dskFulErr;
1196 goto exit;
1197 }
1198
1199 /*
1200 * Clip if necessary so we don't over-subscribe the free blocks.
1201 */
1202 if (minBlocks > freeBlocks) {
1203 minBlocks = freeBlocks;
1204 }
1205 if (maxBlocks > freeBlocks) {
1206 maxBlocks = freeBlocks;
1207 }
1208 }
1209
1210 if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
1211 err = hfs_alloc_try_hard(hfsmp, extent, maxBlocks, flags);
1212 if (err)
1213 goto exit;
1214
1215 goto mark_allocated;
1216 }
1217
1218 //
1219 // If caller didn't specify a starting block number, then use the volume's
1220 // next block to allocate from.
1221 //
1222 if (startingBlock == 0) {
1223 hfs_lock_mount (hfsmp);
1224
1225 /* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
1226 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1227 startingBlock = hfsmp->sparseAllocation;
1228 }
1229 else {
1230 startingBlock = hfsmp->nextAllocation;
1231 }
1232 hfs_unlock_mount(hfsmp);
1233 updateAllocPtr = true;
1234 }
1235
1236
1237 if (startingBlock >= hfsmp->allocLimit) {
1238 startingBlock = 0; /* overflow so start at beginning */
1239 }
1240
1241 //
1242 // If the request must be contiguous, then find a sequence of free blocks
1243 // that is long enough. Otherwise, find the first free block.
1244 //
1245 if (forceContiguous) {
1246 err = BlockFindContig(hfsmp, startingBlock, minBlocks, maxBlocks,
1247 flags, &extent->startBlock, &extent->blockCount);
1248 /*
1249 * If we allocated from a new position then also update the roving allocator.
1250 * This will keep the roving allocation pointer up-to-date even
1251 * if we are using the new R/B tree allocator, since
1252 * it doesn't matter to us here, how the underlying allocator found
1253 * the block to vend out.
1254 */
1255 if ((err == noErr) &&
1256 (extent->startBlock > startingBlock) &&
1257 ((extent->startBlock < hfsmp->hfs_metazone_start) ||
1258 (extent->startBlock > hfsmp->hfs_metazone_end))) {
1259 updateAllocPtr = true;
1260 }
1261 } else {
1262 /*
1263 * Scan the bitmap once, gather the N largest free extents, then
1264 * allocate from these largest extents. Repeat as needed until
1265 * we get all the space we needed. We could probably build up
1266 * that list when the higher level caller tried (and failed) a
1267 * contiguous allocation first.
1268 *
1269 * Note that the free-extent cache will be cease to be updated if
1270 * we are using the red-black tree for allocations. If we jettison
1271 * the tree, then we will reset the free-extent cache and start over.
1272 */
1273
1274 /* Disable HFS_ALLOC_FLUSHTXN if needed */
1275 if (forceFlush) {
1276 flags &= ~HFS_ALLOC_FLUSHTXN;
1277 }
1278
1279 /*
1280 * BlockFindKnown only examines the free extent cache; anything in there will
1281 * have been committed to stable storage already.
1282 */
1283 err = BlockFindKnown(hfsmp, maxBlocks, &extent->startBlock,
1284 &extent->blockCount);
1285
1286 /* dskFulErr out of BlockFindKnown indicates an empty Free Extent Cache */
1287
1288 if (err == dskFulErr) {
1289 /*
1290 * Now we have to do a bigger scan. Start at startingBlock and go up until the
1291 * allocation limit. We 'trust' the summary bitmap in this call, if it tells us
1292 * that it could not find any free space.
1293 */
1294 err = BlockFindAny(hfsmp, startingBlock, hfsmp->allocLimit,
1295 maxBlocks, flags, true,
1296 &extent->startBlock, &extent->blockCount);
1297 }
1298 if (err == dskFulErr) {
1299 /*
1300 * Vary the behavior here if the summary table is on or off.
1301 * If it is on, then we don't trust it it if we get into this case and
1302 * basically do a full scan for maximum coverage.
1303 * If it is off, then we trust the above and go up until the startingBlock.
1304 */
1305 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1306 err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
1307 flags, false,
1308 &extent->startBlock, &extent->blockCount);
1309 }
1310 else {
1311 err = BlockFindAny(hfsmp, 1, startingBlock, maxBlocks,
1312 flags, false,
1313 &extent->startBlock, &extent->blockCount);
1314 }
1315
1316 /*
1317 * Last Resort: Find/use blocks that may require a journal flush.
1318 */
1319 if (err == dskFulErr && forceFlush) {
1320 flags |= HFS_ALLOC_FLUSHTXN;
1321 err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
1322 flags, false,
1323 &extent->startBlock, &extent->blockCount);
1324 }
1325 }
1326 }
1327
1328 if (err)
1329 goto exit;
1330
1331 mark_allocated:
1332
1333 // Handle alignment
1334 if (ap && ap->alignment && extent->blockCount < ap->max_blocks) {
1335 /*
1336 * See the comment in FileMgrInternal.h for alignment
1337 * semantics.
1338 */
1339 uint32_t rounding = ((extent->blockCount + ap->alignment_offset)
1340 % ap->alignment);
1341
1342 // @minBlocks is still the minimum
1343 if (extent->blockCount >= minBlocks + rounding)
1344 extent->blockCount -= rounding;
1345 }
1346
1347 err = BlockMarkAllocatedInternal(hfsmp, extent->startBlock,
1348 extent->blockCount, flags);
1349
1350 if (err)
1351 goto exit;
1352
1353 if (ISSET(hfsmp->hfs_flags, HFS_CS) && extent->blockCount != 0
1354 && !ISSET(flags, HFS_ALLOC_TENTATIVE)) {
1355 if (ISSET(flags, HFS_ALLOC_FAST_DEV)) {
1356 #if !HFS_ALLOC_TEST /* need this guard because this file is compiled outside of the kernel */
1357 hfs_pin_block_range(hfsmp, HFS_PIN_IT,
1358 extent->startBlock, extent->blockCount,
1359 vfs_context_kernel());
1360 #endif
1361 } else {
1362 _dk_cs_map_t cm = {
1363 .cm_extent = {
1364 (hfs_blk_to_bytes(extent->startBlock, hfsmp->blockSize)
1365 + hfsmp->hfsPlusIOPosOffset),
1366 hfs_blk_to_bytes(extent->blockCount, hfsmp->blockSize)
1367 }
1368 };
1369
1370 errno_t err2 = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP,
1371 (caddr_t)&cm, 0, vfs_context_current());
1372
1373 /*
1374 * Ignore errors for now; we are fully provisioned so in
1375 * theory CoreStorage should be able to handle this
1376 * allocation. Should we want to change this in future, then
1377 * we should think carefully how we handle errors. Allowing
1378 * CoreStorage to truncate our allocation is problematic
1379 * because we might have minimum and alignment requirements
1380 * and backing out changes we have already made is
1381 * non-trivial.
1382 */
1383
1384 if (err2 || cm.cm_bytes_mapped < cm.cm_extent.length) {
1385 printf("hfs: _DKIOCCSMAP error: %d, bytes_mapped: %llu\n",
1386 err2, cm.cm_bytes_mapped);
1387 }
1388 }
1389 }
1390
1391 // if we actually allocated something then go update the
1392 // various bits of state that we maintain regardless of
1393 // whether there was an error (i.e. partial allocations
1394 // still need to update things like the free block count).
1395 //
1396 if (extent->blockCount != 0) {
1397 //
1398 // If we used the volume's roving allocation pointer, then we need to update it.
1399 // Adding in the length of the current allocation might reduce the next allocate
1400 // call by avoiding a re-scan of the already allocated space. However, the clump
1401 // just allocated can quite conceivably end up being truncated or released when
1402 // the file is closed or its EOF changed. Leaving the allocation pointer at the
1403 // start of the last allocation will avoid unnecessary fragmentation in this case.
1404 //
1405 hfs_lock_mount (hfsmp);
1406
1407 if (!ISSET(flags, HFS_ALLOC_USE_TENTATIVE | HFS_ALLOC_COMMIT)) {
1408 lck_spin_lock(&hfsmp->vcbFreeExtLock);
1409 if (hfsmp->vcbFreeExtCnt == 0 && hfsmp->hfs_freed_block_count == 0) {
1410 hfsmp->sparseAllocation = extent->startBlock;
1411 }
1412 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
1413 if (extent->blockCount < hfsmp->hfs_freed_block_count) {
1414 hfsmp->hfs_freed_block_count -= extent->blockCount;
1415 } else {
1416 hfsmp->hfs_freed_block_count = 0;
1417 }
1418
1419 if (updateAllocPtr &&
1420 ((extent->startBlock < hfsmp->hfs_metazone_start) ||
1421 (extent->startBlock > hfsmp->hfs_metazone_end))) {
1422 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, extent->startBlock);
1423 }
1424
1425 (void) remove_free_extent_cache(hfsmp, extent->startBlock, extent->blockCount);
1426 }
1427
1428 if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
1429 (*ap->reservation_in)->rl_start += extent->blockCount;
1430 hfsmp->tentativeBlocks -= extent->blockCount;
1431 if (rl_len(*ap->reservation_in) <= 0)
1432 hfs_free_tentative(hfsmp, ap->reservation_in);
1433 } else if (ISSET(flags, HFS_ALLOC_COMMIT)) {
1434 // Handle committing locked extents
1435 assert(hfsmp->lockedBlocks >= extent->blockCount);
1436 (*ap->reservation_in)->rl_start += extent->blockCount;
1437 hfsmp->lockedBlocks -= extent->blockCount;
1438 hfs_free_locked(hfsmp, ap->reservation_in);
1439 }
1440
1441 /*
1442 * Update the number of free blocks on the volume
1443 *
1444 * Skip updating the free blocks count if the block are
1445 * being allocated to relocate data as part of hfs_truncatefs()
1446 */
1447
1448 if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
1449 hfsmp->tentativeBlocks += extent->blockCount;
1450 } else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
1451 hfsmp->lockedBlocks += extent->blockCount;
1452 } else if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1453 hfsmp->freeBlocks -= extent->blockCount;
1454 }
1455 MarkVCBDirty(hfsmp);
1456 hfs_unlock_mount(hfsmp);
1457
1458 hfs_generate_volume_notifications(hfsmp);
1459
1460 if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
1461 add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount,
1462 0, ap->reservation_out);
1463 } else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
1464 add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount,
1465 1, ap->reservation_out);
1466 }
1467
1468 if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
1469 /*
1470 * See if we used tentative blocks. Note that we cannot
1471 * free the reservations here because we don't have access
1472 * to the external pointers. All we can do is update the
1473 * reservations and they'll be cleaned up when whatever is
1474 * holding the pointers calls us back.
1475 *
1476 * We use the rangelist code to detect overlaps and
1477 * constrain the tentative block allocation. Note that
1478 * @end is inclusive so that our rangelist code will
1479 * resolve the various cases for us. As a result, we need
1480 * to ensure that we account for it properly when removing
1481 * the blocks from the tentative count in the mount point
1482 * and re-inserting the remainder (either head or tail)
1483 */
1484 struct rl_entry *range, *next_range;
1485 struct rl_head *ranges = &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS];
1486 const uint32_t start = extent->startBlock;
1487 const uint32_t end = start + extent->blockCount - 1;
1488 TAILQ_FOREACH_SAFE(range, ranges, rl_link, next_range) {
1489 switch (rl_overlap(range, start, end)) {
1490 case RL_OVERLAPCONTAINSRANGE:
1491 // Keep the bigger part
1492 if (start - range->rl_start > range->rl_end - end) {
1493 // Discard the tail
1494 hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
1495 hfs_release_summary(hfsmp, end + 1, range->rl_end - end);
1496 const uint32_t old_end = range->rl_end;
1497 range->rl_end = start - 1;
1498 add_free_extent_cache(hfsmp, end + 1, old_end - end);
1499 } else {
1500 // Discard the head
1501 hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
1502 hfs_release_summary(hfsmp, range->rl_start,
1503 start - range->rl_start);
1504 const uint32_t old_start = range->rl_start;
1505 range->rl_start = end + 1;
1506 add_free_extent_cache(hfsmp, old_start,
1507 start - old_start);
1508 }
1509 assert(range->rl_end >= range->rl_start);
1510 break;
1511 case RL_MATCHINGOVERLAP:
1512 case RL_OVERLAPISCONTAINED:
1513 hfsmp->tentativeBlocks -= rl_len(range);
1514 range->rl_end = range->rl_start - 1;
1515 hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
1516 break;
1517 case RL_OVERLAPSTARTSBEFORE:
1518 hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
1519 range->rl_end = start - 1;
1520 assert(range->rl_end >= range->rl_start);
1521 break;
1522 case RL_OVERLAPENDSAFTER:
1523 hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
1524 range->rl_start = end + 1;
1525 assert(range->rl_end >= range->rl_start);
1526 break;
1527 case RL_NOOVERLAP:
1528 break;
1529 }
1530 }
1531 }
1532 }
1533
1534 exit:
1535
1536 if (ALLOC_DEBUG) {
1537 if (err == noErr) {
1538 if (extent->startBlock >= hfsmp->totalBlocks) {
1539 panic ("BlockAllocate: vending invalid blocks!");
1540 }
1541 if (extent->startBlock >= hfsmp->allocLimit) {
1542 panic ("BlockAllocate: vending block past allocLimit!");
1543 }
1544
1545 if ((extent->startBlock + extent->blockCount) >= hfsmp->totalBlocks) {
1546 panic ("BlockAllocate: vending too many invalid blocks!");
1547 }
1548
1549 if ((extent->startBlock + extent->blockCount) >= hfsmp->allocLimit) {
1550 panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
1551 }
1552 }
1553 }
1554
1555 if (err) {
1556 // Just to be safe...
1557 extent->startBlock = 0;
1558 extent->blockCount = 0;
1559 }
1560
1561 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1562 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, extent->startBlock, extent->blockCount, 0, 0);
1563
1564 return err;
1565 }
1566
1567
1568 /*
1569 ;________________________________________________________________________________
1570 ;
1571 ; Routine: BlockDeallocate
1572 ;
1573 ; Function: Update the bitmap to deallocate a run of disk allocation blocks
1574 ;
1575 ; Input Arguments:
1576 ; vcb - Pointer to ExtendedVCB for the volume to free space on
1577 ; firstBlock - First allocation block to be freed
1578 ; numBlocks - Number of allocation blocks to free up (must be > 0!)
1579 ;
1580 ; Output:
1581 ; (result) - Result code
1582 ;
1583 ; Side effects:
1584 ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
1585 ; The Allocator's red-black trees may also be modified as a result.
1586 ;
1587 ;________________________________________________________________________________
1588 */
1589
1590 OSErr BlockDeallocate (
1591 ExtendedVCB *vcb, // Which volume to deallocate space on
1592 u_int32_t firstBlock, // First block in range to deallocate
1593 u_int32_t numBlocks, // Number of contiguous blocks to deallocate
1594 hfs_block_alloc_flags_t flags)
1595 {
1596 if (ISSET(flags, HFS_ALLOC_TENTATIVE | HFS_ALLOC_LOCKED))
1597 return 0;
1598
1599 OSErr err;
1600 struct hfsmount *hfsmp;
1601 hfsmp = VCBTOHFS(vcb);
1602
1603 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1604 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0);
1605
1606 //
1607 // If no blocks to deallocate, then exit early
1608 //
1609 if (numBlocks == 0) {
1610 err = noErr;
1611 goto Exit;
1612 }
1613
1614
1615 if (ALLOC_DEBUG) {
1616 if (firstBlock >= hfsmp->totalBlocks) {
1617 panic ("BlockDeallocate: freeing invalid blocks!");
1618 }
1619
1620 if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {
1621 panic ("BlockDeallocate: freeing too many invalid blocks!");
1622 }
1623 }
1624
1625 /*
1626 * If we're using the summary bitmap, then try to mark the bits
1627 * as potentially usable/free before actually deallocating them.
1628 * It is better to be slightly speculative here for correctness.
1629 */
1630
1631 (void) hfs_release_summary (hfsmp, firstBlock, numBlocks);
1632
1633 err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
1634
1635 if (err) {
1636 goto Exit;
1637 }
1638
1639 //
1640 // Update the volume's free block count, and mark the VCB as dirty.
1641 //
1642 hfs_lock_mount(hfsmp);
1643 /*
1644 * Do not update the free block count. This flags is specified
1645 * when a volume is being truncated.
1646 */
1647 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1648 vcb->freeBlocks += numBlocks;
1649 }
1650
1651 vcb->hfs_freed_block_count += numBlocks;
1652
1653 if (vcb->nextAllocation == (firstBlock + numBlocks)) {
1654 HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
1655 }
1656
1657 if (hfsmp->jnl == NULL) {
1658 /*
1659 * In the journal case, we'll add the free extent once the journal
1660 * calls us back to tell us it wrote the transaction to disk.
1661 */
1662 (void) add_free_extent_cache(vcb, firstBlock, numBlocks);
1663
1664 /*
1665 * If the journal case, we'll only update sparseAllocation once the
1666 * free extent cache becomes empty (when we remove the last entry
1667 * from the cache). Skipping it here means we're less likely to
1668 * find a recently freed extent via the bitmap before it gets added
1669 * to the free extent cache.
1670 */
1671 if (firstBlock < vcb->sparseAllocation) {
1672 vcb->sparseAllocation = firstBlock;
1673 }
1674 }
1675
1676 MarkVCBDirty(vcb);
1677 hfs_unlock_mount(hfsmp);
1678
1679 hfs_generate_volume_notifications(VCBTOHFS(vcb));
1680 Exit:
1681
1682 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1683 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0);
1684
1685 return err;
1686 }
1687
1688
1689 u_int8_t freebitcount[16] = {
1690 4, 3, 3, 2, 3, 2, 2, 1, /* 0 1 2 3 4 5 6 7 */
1691 3, 2, 2, 1, 2, 1, 1, 0, /* 8 9 A B C D E F */
1692 };
1693
1694 u_int32_t
1695 MetaZoneFreeBlocks(ExtendedVCB *vcb)
1696 {
1697 u_int32_t freeblocks;
1698 u_int32_t *currCache;
1699 uintptr_t blockRef;
1700 u_int32_t bit;
1701 u_int32_t lastbit;
1702 int bytesleft;
1703 int bytesperblock;
1704 u_int8_t byte;
1705 u_int8_t *buffer;
1706
1707 blockRef = 0;
1708 bytesleft = freeblocks = 0;
1709 buffer = NULL;
1710 bit = VCBTOHFS(vcb)->hfs_metazone_start;
1711 if (bit == 1)
1712 bit = 0;
1713
1714 lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
1715 bytesperblock = vcb->vcbVBMIOSize;
1716
1717 /*
1718 * Count all the bits from bit to lastbit.
1719 */
1720 while (bit < lastbit) {
1721 /*
1722 * Get next bitmap block.
1723 */
1724 if (bytesleft == 0) {
1725 if (blockRef) {
1726 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1727 blockRef = 0;
1728 }
1729 if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef,
1730 HFS_ALLOC_IGNORE_TENTATIVE) != 0) {
1731 return (0);
1732 }
1733 buffer = (u_int8_t *)currCache;
1734 bytesleft = bytesperblock;
1735 }
1736 byte = *buffer++;
1737 freeblocks += freebitcount[byte & 0x0F];
1738 freeblocks += freebitcount[(byte >> 4) & 0x0F];
1739 bit += kBitsPerByte;
1740 --bytesleft;
1741 }
1742 if (blockRef)
1743 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1744
1745 return (freeblocks);
1746 }
1747
1748
1749 /*
1750 * Obtain the next allocation block (bit) that's
1751 * outside the metadata allocation zone.
1752 */
1753 static u_int32_t NextBitmapBlock(
1754 ExtendedVCB *vcb,
1755 u_int32_t bit)
1756 {
1757 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1758
1759 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
1760 return (bit);
1761 /*
1762 * Skip over metadata allocation zone.
1763 */
1764 if ((bit >= hfsmp->hfs_metazone_start) &&
1765 (bit <= hfsmp->hfs_metazone_end)) {
1766 bit = hfsmp->hfs_metazone_end + 1;
1767 }
1768 return (bit);
1769 }
1770
1771
1772 // Assumes @bitmap is aligned to 8 bytes and multiple of 8 bytes.
1773 static void bits_set(void *bitmap, int start, int end)
1774 {
1775 const int start_bit = start & 63;
1776 const int end_bit = end & 63;
1777
1778 #define LEFT_MASK(bit) OSSwapHostToBigInt64(0xffffffffffffffffull << (64 - bit))
1779 #define RIGHT_MASK(bit) OSSwapHostToBigInt64(0xffffffffffffffffull >> bit)
1780
1781 uint64_t *p = (uint64_t *)bitmap + start / 64;
1782
1783 if ((start & ~63) == (end & ~63)) {
1784 // Start and end in same 64 bits
1785 *p |= RIGHT_MASK(start_bit) & LEFT_MASK(end_bit);
1786 } else {
1787 *p++ |= RIGHT_MASK(start_bit);
1788
1789 int nquads = (end - end_bit - start - 1) / 64;
1790
1791 while (nquads--)
1792 *p++ = 0xffffffffffffffffull;
1793
1794 if (end_bit)
1795 *p |= LEFT_MASK(end_bit);
1796 }
1797 }
1798
1799 // Modifies the buffer and applies any reservations that we might have
1800 static buf_t process_reservations(hfsmount_t *hfsmp, buf_t bp, off_t offset,
1801 hfs_block_alloc_flags_t flags,
1802 bool always_copy)
1803 {
1804 bool taken_copy = false;
1805 void *buffer = (void *)buf_dataptr(bp);
1806 const uint32_t nbytes = buf_count(bp);
1807 const off_t end = offset + nbytes * 8 - 1;
1808
1809 for (int i = (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
1810 ? HFS_LOCKED_BLOCKS : HFS_TENTATIVE_BLOCKS); i < 2; ++i) {
1811 struct rl_entry *entry;
1812 TAILQ_FOREACH(entry, &hfsmp->hfs_reserved_ranges[i], rl_link) {
1813 uint32_t a, b;
1814
1815 enum rl_overlaptype overlap_type = rl_overlap(entry, offset, end);
1816
1817 if (overlap_type == RL_NOOVERLAP)
1818 continue;
1819
1820 /*
1821 * If always_copy is false, we only take a copy if B_LOCKED is
1822 * set because ReleaseScanBitmapRange doesn't invalidate the
1823 * buffer in that case.
1824 */
1825 if (!taken_copy && (always_copy || ISSET(buf_flags(bp), B_LOCKED))) {
1826 buf_t new_bp = buf_create_shadow(bp, true, 0, NULL, NULL);
1827 buf_brelse(bp);
1828 bp = new_bp;
1829 buf_setflags(bp, B_NOCACHE);
1830 buffer = (void *)buf_dataptr(bp);
1831 taken_copy = true;
1832 }
1833
1834 switch (overlap_type) {
1835 case RL_OVERLAPCONTAINSRANGE:
1836 case RL_MATCHINGOVERLAP:
1837 memset(buffer, 0xff, nbytes);
1838 return bp;
1839 case RL_OVERLAPISCONTAINED:
1840 a = entry->rl_start;
1841 b = entry->rl_end;
1842 break;
1843 case RL_OVERLAPSTARTSBEFORE:
1844 a = offset;
1845 b = entry->rl_end;
1846 break;
1847 case RL_OVERLAPENDSAFTER:
1848 a = entry->rl_start;
1849 b = end;
1850 break;
1851 case RL_NOOVERLAP:
1852 __builtin_unreachable();
1853 }
1854
1855 a -= offset;
1856 b -= offset;
1857
1858 assert(a < buf_count(bp) * 8);
1859 assert(b < buf_count(bp) * 8);
1860 assert(b >= a);
1861
1862 // b is inclusive
1863 bits_set(buffer, a, b + 1);
1864 }
1865 } // for (;;)
1866
1867 return bp;
1868 }
1869
1870 /*
1871 ;_______________________________________________________________________
1872 ;
1873 ; Routine: ReadBitmapBlock
1874 ;
1875 ; Function: Read in a bitmap block corresponding to a given allocation
1876 ; block (bit). Return a pointer to the bitmap block.
1877 ;
1878 ; Inputs:
1879 ; vcb -- Pointer to ExtendedVCB
1880 ; bit -- Allocation block whose bitmap block is desired
1881 ;
1882 ; Outputs:
1883 ; buffer -- Pointer to bitmap block corresonding to "block"
1884 ; blockRef
1885 ;_______________________________________________________________________
1886 */
1887 static OSErr ReadBitmapBlock(ExtendedVCB *vcb,
1888 u_int32_t bit,
1889 u_int32_t **buffer,
1890 uintptr_t *blockRef,
1891 hfs_block_alloc_flags_t flags)
1892 {
1893 OSErr err;
1894 struct buf *bp = NULL;
1895 struct vnode *vp = NULL;
1896 daddr64_t block;
1897 u_int32_t blockSize;
1898
1899 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1900 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0);
1901
1902 /*
1903 * volume bitmap blocks are protected by the allocation file lock
1904 */
1905 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1906
1907 blockSize = (u_int32_t)vcb->vcbVBMIOSize;
1908 block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
1909
1910 /* HFS+ / HFSX */
1911 if (vcb->vcbSigWord != kHFSSigWord) {
1912 vp = vcb->hfs_allocation_vp; /* use allocation file vnode */
1913 }
1914 #if CONFIG_HFS_STD
1915 else {
1916 /* HFS Standard */
1917 vp = VCBTOHFS(vcb)->hfs_devvp; /* use device I/O vnode */
1918 block += vcb->vcbVBMSt; /* map to physical block */
1919 }
1920 #endif
1921
1922 err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);
1923
1924 if (bp) {
1925 if (err) {
1926 buf_brelse(bp);
1927 *blockRef = 0;
1928 *buffer = NULL;
1929 } else {
1930 if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED)) {
1931 bp = process_reservations(vcb, bp, block * blockSize * 8,
1932 flags, /* always_copy: */ true);
1933 }
1934
1935 buf_setfsprivate(bp, (void *)(uintptr_t)flags);
1936
1937 *blockRef = (uintptr_t)bp;
1938 *buffer = (u_int32_t *)buf_dataptr(bp);
1939 }
1940 }
1941
1942 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1943 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0);
1944
1945 return err;
1946 }
1947
1948
1949 /*
1950 ;_______________________________________________________________________
1951 ;
1952 ; Routine: ReadBitmapRange
1953 ;
1954 ; Function: Read in a range of the bitmap starting at the given offset.
1955 ; Use the supplied size to determine the amount of I/O to generate
1956 ; against the bitmap file. Return a pointer to the bitmap block.
1957 ;
1958 ; Inputs:
1959 ; hfsmp -- Pointer to hfs mount
1960 ; offset -- byte offset into the bitmap file
1961 ; size -- How much I/O to generate against the bitmap file.
1962 ;
1963 ; Outputs:
1964 ; buffer -- Pointer to bitmap block data corresonding to "block"
1965 ; blockRef -- struct 'buf' pointer which MUST be released in a subsequent call.
1966 ;_______________________________________________________________________
1967 */
1968 static OSErr ReadBitmapRange(struct hfsmount *hfsmp, uint32_t offset,
1969 uint32_t iosize, uint32_t **buffer, struct buf **blockRef)
1970 {
1971
1972 OSErr err;
1973 struct buf *bp = NULL;
1974 struct vnode *vp = NULL;
1975 daddr64_t block;
1976
1977 /* This function isn't supported for HFS standard */
1978 if (hfsmp->vcbSigWord != kHFSPlusSigWord) {
1979 return EINVAL;
1980 }
1981
1982 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1983 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_START, offset, iosize, 0, 0, 0);
1984 }
1985
1986 /*
1987 * volume bitmap blocks are protected by the allocation file lock
1988 */
1989 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1990
1991 vp = hfsmp->hfs_allocation_vp; /* use allocation file vnode */
1992
1993 /*
1994 * The byte offset argument must be converted into bitmap-relative logical
1995 * block numbers before using it in buf_meta_bread.
1996 *
1997 * buf_meta_bread (and the things it calls) will eventually try to
1998 * reconstruct the byte offset into the file by multiplying the logical
1999 * block number passed in below by the vcbVBMIOSize field in the mount
2000 * point. So we prepare for that by converting the byte offset back into
2001 * logical blocks in terms of VBMIOSize units.
2002 *
2003 * The amount of I/O requested and the byte offset should be computed
2004 * based on the helper function in the frame that called us, so we can
2005 * get away with just doing a simple divide here.
2006 */
2007 block = (daddr64_t)(offset / hfsmp->vcbVBMIOSize);
2008
2009 err = (int) buf_meta_bread(vp, block, iosize, NOCRED, &bp);
2010
2011 if (bp) {
2012 if (err) {
2013 buf_brelse(bp);
2014 *blockRef = 0;
2015 *buffer = NULL;
2016 } else {
2017 bp = process_reservations(hfsmp, bp, (offset * 8), 0,
2018 /* always_copy: */ false);
2019
2020 *blockRef = bp;
2021 *buffer = (u_int32_t *)buf_dataptr(bp);
2022 }
2023 }
2024
2025 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
2026 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_END, err, 0, 0, 0, 0);
2027 }
2028
2029 return err;
2030 }
2031
2032
2033 /*
2034 ;_______________________________________________________________________
2035 ;
2036 ; Routine: ReleaseBitmapBlock
2037 ;
2038 ; Function: Relase a bitmap block.
2039 ;
2040 ; Inputs:
2041 ; vcb
2042 ; blockRef
2043 ; dirty
2044 ;_______________________________________________________________________
2045 */
2046 static OSErr ReleaseBitmapBlock(
2047 ExtendedVCB *vcb,
2048 uintptr_t blockRef,
2049 Boolean dirty)
2050 {
2051 struct buf *bp = (struct buf *)blockRef;
2052
2053 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2054 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0);
2055
2056 if (blockRef == 0) {
2057 if (dirty)
2058 panic("hfs: ReleaseBitmapBlock: missing bp");
2059 return (0);
2060 }
2061
2062 if (bp) {
2063 if (dirty) {
2064 hfs_block_alloc_flags_t flags = (uintptr_t)buf_fsprivate(bp);
2065
2066 if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED))
2067 panic("Modified read-only bitmap buffer!");
2068
2069 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2070
2071 if (hfsmp->jnl) {
2072 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
2073 } else {
2074 buf_bdwrite(bp);
2075 }
2076 } else {
2077 buf_brelse(bp);
2078 }
2079 }
2080
2081 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2082 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0);
2083
2084 return (0);
2085 }
2086
2087 /*
2088 * ReleaseScanBitmapRange
2089 *
2090 * This is used to release struct bufs that were created for use by
2091 * bitmap scanning code. Because they may be of sizes different than the
2092 * typical runtime manipulation code, we want to force them to be purged out
2093 * of the buffer cache ASAP, so we'll release them differently than in the
2094 * ReleaseBitmapBlock case.
2095 *
2096 * Additionally, because we know that we're only reading the blocks and that they
2097 * should have been clean prior to reading them, we will never
2098 * issue a write to them (thus dirtying them).
2099 */
2100
2101 static OSErr ReleaseScanBitmapRange(struct buf *bp ) {
2102
2103 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
2104 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, 0, 0, 0, 0, 0);
2105 }
2106
2107 if (bp) {
2108 /* Mark the buffer invalid if it isn't locked, then release it */
2109 if ((buf_flags(bp) & B_LOCKED) == 0) {
2110 buf_markinvalid(bp);
2111 }
2112 buf_brelse(bp);
2113 }
2114
2115 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
2116 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_SCAN_BITMAP | DBG_FUNC_END, 0, 0, 0, 0, 0);
2117 }
2118
2119 return (0);
2120 }
2121
2122 /*
2123 * @extent.startBlock, on input, contains a preferred block for the
2124 * allocation. @extent.blockCount, on input, contains the minimum
2125 * number of blocks acceptable. Upon success, the result is conveyed
2126 * in @extent.
2127 */
2128 static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
2129 HFSPlusExtentDescriptor *extent,
2130 uint32_t max_blocks,
2131 hfs_block_alloc_flags_t flags)
2132 {
2133 OSErr err = dskFulErr;
2134
2135 const uint32_t min_blocks = extent->blockCount;
2136
2137 // It's > rather than >= because the last block is always reserved
2138 if (extent->startBlock > 0 && extent->startBlock < hfsmp->allocLimit
2139 && hfsmp->allocLimit - extent->startBlock > max_blocks) {
2140 /*
2141 * This is just checking to see if there's an extent starting
2142 * at extent->startBlock that will suit. We only check for
2143 * @max_blocks here; @min_blocks is ignored.
2144 */
2145
2146 err = BlockFindContiguous(hfsmp, extent->startBlock, extent->startBlock + max_blocks,
2147 max_blocks, max_blocks, true, true,
2148 &extent->startBlock, &extent->blockCount, flags);
2149
2150 if (err != dskFulErr)
2151 return err;
2152 }
2153
2154 err = BlockFindKnown(hfsmp, max_blocks, &extent->startBlock,
2155 &extent->blockCount);
2156
2157 if (!err) {
2158 if (extent->blockCount >= max_blocks)
2159 return 0;
2160 } else if (err != dskFulErr)
2161 return err;
2162
2163 // Try a more exhaustive search
2164 return BlockFindContiguous(hfsmp, 1, hfsmp->allocLimit,
2165 min_blocks, max_blocks,
2166 /* useMetaZone: */ true,
2167 /* trustSummary: */ true,
2168 &extent->startBlock, &extent->blockCount, flags);
2169 }
2170
2171 /*
2172 _______________________________________________________________________
2173
2174 Routine: BlockFindContig
2175
2176 Function: Find a contiguous group of allocation blocks. If the
2177 minimum cannot be satisfied, nothing is returned. The
2178 caller guarantees that there are enough free blocks
2179 (though they may not be contiguous, in which case this
2180 call will fail).
2181
2182 Inputs:
2183 vcb Pointer to volume where space is to be allocated
2184 startingBlock Preferred first block for allocation
2185 minBlocks Minimum number of contiguous blocks to allocate
2186 maxBlocks Maximum number of contiguous blocks to allocate
2187 flags
2188
2189 Outputs:
2190 actualStartBlock First block of range allocated, or 0 if error
2191 actualNumBlocks Number of blocks allocated, or 0 if error
2192 _______________________________________________________________________
2193 */
2194 static OSErr BlockFindContig(
2195 ExtendedVCB *vcb,
2196 u_int32_t startingBlock,
2197 u_int32_t minBlocks,
2198 u_int32_t maxBlocks,
2199 hfs_block_alloc_flags_t flags,
2200 u_int32_t *actualStartBlock,
2201 u_int32_t *actualNumBlocks)
2202 {
2203 OSErr retval = noErr;
2204 uint32_t currentStart = startingBlock;
2205
2206 uint32_t foundStart = 0; // values to emit to caller
2207 uint32_t foundCount = 0;
2208
2209 uint32_t collision_start = 0; // if we have to re-allocate a recently deleted extent, use this
2210 uint32_t collision_count = 0;
2211
2212 int err;
2213 int allowReuse = (flags & HFS_ALLOC_FLUSHTXN);
2214 Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
2215
2216 int recently_deleted = 0;
2217 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2218
2219 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2220 KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
2221
2222 while ((retval == noErr) && (foundStart == 0) && (foundCount == 0)) {
2223
2224 /* Try and find something that works. */
2225
2226 /*
2227 * NOTE: If the only contiguous free extent of at least minBlocks
2228 * crosses startingBlock (i.e. starts before, ends after), then we
2229 * won't find it. Earlier versions *did* find this case by letting
2230 * the second search look past startingBlock by minBlocks. But
2231 * with the free extent cache, this can lead to duplicate entries
2232 * in the cache, causing the same blocks to be allocated twice.
2233 */
2234 retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks,
2235 maxBlocks, useMetaZone, true, &foundStart, &foundCount, flags);
2236
2237 if (retval == dskFulErr && currentStart != 0) {
2238 /*
2239 * We constrain the endingBlock so we don't bother looking for ranges
2240 * that would overlap those found in the previous call, if the summary bitmap
2241 * is not on for this volume. If it is, then we assume that it was not trust
2242 * -worthy and do a full scan.
2243 */
2244 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2245 retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks,
2246 maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
2247 }
2248 else {
2249 retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks,
2250 maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
2251 }
2252 }
2253
2254 if (retval != noErr) {
2255 goto bailout;
2256 }
2257
2258 /* Do we overlap with the recently found collision extent? */
2259 if (collision_start) {
2260 if (extents_overlap (foundStart, foundCount, collision_start, collision_count)) {
2261 /*
2262 * We've looped around, and the only thing we could use was the collision extent.
2263 * Since we are allowed to use it, go ahead and do so now.
2264 */
2265 if(allowReuse) {
2266 /*
2267 * then we couldn't find anything except values which might have been
2268 * recently deallocated. just return our cached value if we are allowed to.
2269 */
2270 foundStart = collision_start;
2271 foundCount = collision_count;
2272 goto bailout;
2273 }
2274 else {
2275 /* Otherwise, we looped around and couldn't find anything that wouldn't require a journal flush. */
2276 retval = dskFulErr;
2277 goto bailout;
2278 }
2279 }
2280 }
2281
2282 /* OK, we know we must not have collided . See if this one is recently deleted */
2283 if (hfsmp->jnl) {
2284 recently_deleted = 0;
2285 uint32_t nextStart;
2286 err = CheckUnmappedBytes (hfsmp, (uint64_t)foundStart,
2287 (uint64_t) foundCount, &recently_deleted, &nextStart);
2288 if (err == 0) {
2289 if(recently_deleted != 0) {
2290 /*
2291 * these blocks were recently deleted/deallocated. Cache the extent, but
2292 * but keep searching to see if we can find one that won't collide here.
2293 */
2294 if (collision_start == 0) {
2295 collision_start = foundStart;
2296 collision_count = foundCount;
2297 }
2298 recently_deleted = 0;
2299
2300 /*
2301 * advance currentStart to the point just past the overlap we just found. Note that
2302 * we will automatically loop around to start of the bitmap as needed.
2303 */
2304 currentStart = nextStart;
2305 /* Unset foundStart/Count to allow it to loop around again. */
2306 foundStart = 0;
2307 foundCount = 0;
2308 }
2309 }
2310 } // end jnl/deleted case
2311
2312 /*
2313 * If we found something good, we'd break out of the loop at the top; foundCount
2314 * and foundStart should be set.
2315 */
2316
2317 } // end while loop.
2318
2319 bailout:
2320
2321 if (retval == noErr) {
2322 *actualStartBlock = foundStart;
2323 *actualNumBlocks = foundCount;
2324 }
2325
2326 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2327 KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_END, foundStart, foundCount, retval, 0, 0);
2328
2329 return retval;
2330
2331 }
2332
2333
2334 /*
2335 _______________________________________________________________________
2336
2337 Routine: BlockFindAny
2338
2339 Function: Find one or more allocation blocks and may return fewer than
2340 requested. The caller guarantees that there is at least one
2341 free block.
2342
2343 Inputs:
2344 vcb Pointer to volume where space is to be allocated
2345 startingBlock Preferred first block for allocation
2346 endingBlock Last block to check + 1
2347 maxBlocks Maximum number of contiguous blocks to allocate
2348 useMetaZone
2349
2350 Outputs:
2351 actualStartBlock First block of range allocated, or 0 if error
2352 actualNumBlocks Number of blocks allocated, or 0 if error
2353 _______________________________________________________________________
2354 */
2355
2356 static OSErr BlockFindAny(
2357 ExtendedVCB *vcb,
2358 u_int32_t startingBlock,
2359 register u_int32_t endingBlock,
2360 u_int32_t maxBlocks,
2361 hfs_block_alloc_flags_t flags,
2362 Boolean trustSummary,
2363 u_int32_t *actualStartBlock,
2364 u_int32_t *actualNumBlocks)
2365 {
2366
2367 /*
2368 * If it is enabled, scan through the summary table to find the first free block.
2369 *
2370 * If it reports that there are not any free blocks, we could have a false
2371 * positive, so in that case, use the input arguments as a pass through.
2372 */
2373 uint32_t start_blk = startingBlock;
2374 uint32_t end_blk = endingBlock;
2375 struct hfsmount *hfsmp;
2376 OSErr err;
2377
2378 hfsmp = (struct hfsmount*)vcb;
2379 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2380 uint32_t suggested_start;
2381
2382 /*
2383 * If the summary table is enabled, scan through it to find the first free
2384 * block. If there was an error, or we couldn't find anything free in the
2385 * summary table, then just leave the start_blk fields unmodified. We wouldn't
2386 * have gotten to this point if the mount point made it look like there was possibly
2387 * free space in the FS.
2388 */
2389 err = hfs_find_summary_free (hfsmp, startingBlock, &suggested_start);
2390 if (err == 0) {
2391 start_blk = suggested_start;
2392 }
2393 else {
2394 /* Differentiate between ENOSPC and a more esoteric error in the above call. */
2395 if ((err == ENOSPC) && (trustSummary)) {
2396 /*
2397 * The 'trustSummary' argument is for doing a full scan if we really
2398 * really, need the space and we think it's somewhere but can't find it in the
2399 * summary table. If it's true, then we trust the summary table and return
2400 * dskFulErr if we couldn't find it above.
2401 */
2402 return dskFulErr;
2403 }
2404 /*
2405 * If either trustSummary was false or we got a different errno, then we
2406 * want to fall through to the real bitmap single i/o code...
2407 */
2408 }
2409 }
2410
2411 err = BlockFindAnyBitmap(vcb, start_blk, end_blk, maxBlocks,
2412 flags, actualStartBlock, actualNumBlocks);
2413
2414 return err;
2415 }
2416
2417
2418 /*
2419 * BlockFindAnyBitmap finds free ranges by scanning the bitmap to
2420 * figure out where the free allocation blocks are. Inputs and
2421 * outputs are the same as for BlockFindAny.
2422 */
2423
2424 static OSErr BlockFindAnyBitmap(
2425 ExtendedVCB *vcb,
2426 u_int32_t startingBlock,
2427 register u_int32_t endingBlock,
2428 u_int32_t maxBlocks,
2429 hfs_block_alloc_flags_t flags,
2430 u_int32_t *actualStartBlock,
2431 u_int32_t *actualNumBlocks)
2432 {
2433 OSErr err;
2434 register u_int32_t block = 0; // current block number
2435 register u_int32_t currentWord; // Pointer to current word within bitmap block
2436 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
2437 register u_int32_t wordsLeft; // Number of words left in this bitmap block
2438 u_int32_t *buffer = NULL;
2439 u_int32_t *currCache = NULL;
2440 uintptr_t blockRef = 0;
2441 u_int32_t bitsPerBlock;
2442 u_int32_t wordsPerBlock;
2443 Boolean dirty = false;
2444 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2445 Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
2446 Boolean forceFlush = (flags & HFS_ALLOC_FLUSHTXN);
2447
2448 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2449 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);
2450
2451 restartSearchAny:
2452
2453 /*
2454 * When we're skipping the metadata zone and the start/end
2455 * range overlaps with the metadata zone then adjust the
2456 * start to be outside of the metadata zone. If the range
2457 * is entirely inside the metadata zone then we can deny the
2458 * request (dskFulErr).
2459 */
2460 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
2461 if (startingBlock <= vcb->hfs_metazone_end) {
2462 if (endingBlock > (vcb->hfs_metazone_end + 2))
2463 startingBlock = vcb->hfs_metazone_end + 1;
2464 else {
2465 err = dskFulErr;
2466 goto Exit;
2467 }
2468 }
2469 }
2470
2471 // Since this routine doesn't wrap around
2472 if (maxBlocks > (endingBlock - startingBlock)) {
2473 maxBlocks = endingBlock - startingBlock;
2474 }
2475
2476 //
2477 // Pre-read the first bitmap block
2478 //
2479 err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef, flags);
2480 if (err != noErr) goto Exit;
2481 buffer = currCache;
2482
2483 //
2484 // Set up the current position within the block
2485 //
2486 {
2487 u_int32_t wordIndexInBlock;
2488
2489 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
2490 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2491
2492 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2493 buffer += wordIndexInBlock;
2494 wordsLeft = wordsPerBlock - wordIndexInBlock;
2495 currentWord = SWAP_BE32 (*buffer);
2496 bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
2497 }
2498
2499 /*
2500 * While loop 1:
2501 * Find the first unallocated block starting at 'block'
2502 */
2503 uint32_t summary_block_scan = 0;
2504
2505 block=startingBlock;
2506 while (block < endingBlock) {
2507 if ((currentWord & bitMask) == 0)
2508 break;
2509
2510 // Next bit
2511 ++block;
2512 bitMask >>= 1;
2513 if (bitMask == 0) {
2514 // Next word
2515 bitMask = kHighBitInWordMask;
2516 ++buffer;
2517
2518 if (--wordsLeft == 0) {
2519 // Next block
2520 buffer = currCache = NULL;
2521 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2522 /*
2523 * If summary_block_scan is non-zero, then we must have
2524 * pulled a bitmap file block into core, and scanned through
2525 * the entire thing. Because we're in this loop, we are
2526 * implicitly trusting that the bitmap didn't have any knowledge
2527 * about this particular block. As a result, update the bitmap
2528 * (lazily, now that we've scanned it) with our findings that
2529 * this particular block is completely used up.
2530 */
2531 if (summary_block_scan != 0) {
2532 uint32_t summary_bit;
2533 (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
2534 hfs_set_summary (hfsmp, summary_bit, 1);
2535 summary_block_scan = 0;
2536 }
2537 }
2538
2539 err = ReleaseBitmapBlock(vcb, blockRef, false);
2540 if (err != noErr) goto Exit;
2541
2542 /*
2543 * Skip over metadata blocks.
2544 */
2545 if (!useMetaZone) {
2546 block = NextBitmapBlock(vcb, block);
2547 }
2548 if (block >= endingBlock) {
2549 err = dskFulErr;
2550 goto Exit;
2551 }
2552
2553 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
2554 if (err != noErr) goto Exit;
2555 buffer = currCache;
2556 summary_block_scan = block;
2557 wordsLeft = wordsPerBlock;
2558 }
2559 currentWord = SWAP_BE32 (*buffer);
2560 }
2561 }
2562
2563 // Did we get to the end of the bitmap before finding a free block?
2564 // If so, then couldn't allocate anything.
2565 if (block >= endingBlock) {
2566 err = dskFulErr;
2567 goto Exit;
2568 }
2569
2570
2571 /*
2572 * Don't move forward just yet. Verify that either one of the following
2573 * two conditions is true:
2574 * 1) journaling is not enabled
2575 * 2) block is not currently on any pending TRIM list.
2576 */
2577 if (hfsmp->jnl != NULL && (forceFlush == false)) {
2578 int recently_deleted = 0;
2579 uint32_t nextblk;
2580 err = CheckUnmappedBytes (hfsmp, (uint64_t) block, 1, &recently_deleted, &nextblk);
2581 if ((err == 0) && (recently_deleted)) {
2582
2583 /* release the bitmap block & unset currCache. we may jump past it. */
2584 err = ReleaseBitmapBlock(vcb, blockRef, false);
2585 currCache = NULL;
2586 if (err != noErr) {
2587 goto Exit;
2588 }
2589 /* set our start to nextblk, and re-do the search. */
2590 startingBlock = nextblk;
2591 goto restartSearchAny;
2592 }
2593 }
2594
2595
2596 // Return the first block in the allocated range
2597 *actualStartBlock = block;
2598 dirty = true;
2599
2600 // If we could get the desired number of blocks before hitting endingBlock,
2601 // then adjust endingBlock so we won't keep looking. Ideally, the comparison
2602 // would be (block + maxBlocks) < endingBlock, but that could overflow. The
2603 // comparison below yields identical results, but without overflow.
2604 if (block < (endingBlock-maxBlocks)) {
2605 endingBlock = block + maxBlocks; // if we get this far, we've found enough
2606 }
2607
2608 /*
2609 * While loop 2:
2610 * Scan the bitmap, starting at 'currentWord' in the current
2611 * bitmap block. Continue iterating through the bitmap until
2612 * either we hit an allocated block, or until we have accumuluated
2613 * maxBlocks worth of bitmap.
2614 */
2615
2616 /* Continue until we see an allocated block */
2617 while ((currentWord & bitMask) == 0) {
2618 // Move to the next block. If no more, then exit.
2619 ++block;
2620 if (block == endingBlock) {
2621 break;
2622 }
2623
2624 // Next bit
2625 bitMask >>= 1;
2626 if (bitMask == 0) {
2627 // Next word
2628 bitMask = kHighBitInWordMask;
2629 ++buffer;
2630
2631 if (--wordsLeft == 0) {
2632 // Next block
2633 buffer = currCache = NULL;
2634
2635 /* We're only reading the bitmap here, so mark it as clean */
2636 err = ReleaseBitmapBlock(vcb, blockRef, false);
2637 if (err != noErr) {
2638 goto Exit;
2639 }
2640
2641 /*
2642 * Skip over metadata blocks.
2643 */
2644 if (!useMetaZone) {
2645 u_int32_t nextBlock;
2646 nextBlock = NextBitmapBlock(vcb, block);
2647 if (nextBlock != block) {
2648 goto Exit; /* allocation gap, so stop */
2649 }
2650 }
2651
2652 if (block >= endingBlock) {
2653 goto Exit;
2654 }
2655
2656 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
2657 if (err != noErr) {
2658 goto Exit;
2659 }
2660 buffer = currCache;
2661 wordsLeft = wordsPerBlock;
2662 }
2663 currentWord = SWAP_BE32 (*buffer);
2664 }
2665 }
2666
2667 Exit:
2668 if (currCache) {
2669 /* Release the bitmap reference prior to marking bits in-use */
2670 (void) ReleaseBitmapBlock(vcb, blockRef, false);
2671 currCache = NULL;
2672 }
2673
2674 if (err == noErr) {
2675 *actualNumBlocks = block - *actualStartBlock;
2676
2677 // sanity check
2678 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
2679 panic("hfs: BlockFindAnyBitmap: allocation overflow on \"%s\"", vcb->vcbVN);
2680 }
2681 }
2682 else {
2683 *actualStartBlock = 0;
2684 *actualNumBlocks = 0;
2685 }
2686
2687 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2688 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2689
2690 return err;
2691 }
2692
2693
2694 /*
2695 _______________________________________________________________________
2696
2697 Routine: BlockFindKnown
2698
2699 Function: Return a potential extent from the free extent cache. The
2700 returned extent *must* be marked allocated and removed
2701 from the cache by the *caller*.
2702
2703 Inputs:
2704 vcb Pointer to volume where space is to be allocated
2705 maxBlocks Maximum number of contiguous blocks to allocate
2706
2707 Outputs:
2708 actualStartBlock First block of range allocated, or 0 if error
2709 actualNumBlocks Number of blocks allocated, or 0 if error
2710
2711 Returns:
2712 dskFulErr Free extent cache is empty
2713 _______________________________________________________________________
2714 */
2715
2716 static OSErr BlockFindKnown(
2717 ExtendedVCB *vcb,
2718 u_int32_t maxBlocks,
2719 u_int32_t *actualStartBlock,
2720 u_int32_t *actualNumBlocks)
2721 {
2722 OSErr err;
2723 u_int32_t foundBlocks;
2724 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2725
2726 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2727 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
2728
2729 hfs_lock_mount (hfsmp);
2730 lck_spin_lock(&vcb->vcbFreeExtLock);
2731 if ( vcb->vcbFreeExtCnt == 0 ||
2732 vcb->vcbFreeExt[0].blockCount == 0) {
2733 lck_spin_unlock(&vcb->vcbFreeExtLock);
2734 hfs_unlock_mount(hfsmp);
2735 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2736 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
2737 return dskFulErr;
2738 }
2739 lck_spin_unlock(&vcb->vcbFreeExtLock);
2740 hfs_unlock_mount(hfsmp);
2741
2742 lck_spin_lock(&vcb->vcbFreeExtLock);
2743
2744 // Just grab up to maxBlocks of the first (largest) free exent.
2745 *actualStartBlock = vcb->vcbFreeExt[0].startBlock;
2746 foundBlocks = vcb->vcbFreeExt[0].blockCount;
2747 if (foundBlocks > maxBlocks)
2748 foundBlocks = maxBlocks;
2749 *actualNumBlocks = foundBlocks;
2750
2751 lck_spin_unlock(&vcb->vcbFreeExtLock);
2752
2753 // sanity check
2754 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
2755 {
2756 printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
2757 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
2758 err = EIO;
2759 } else
2760 err = 0;
2761
2762 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2763 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2764
2765 return err;
2766 }
2767
2768 /*
2769 * BlockMarkAllocated
2770 *
2771 * This is a wrapper function around the internal calls which will actually mark the blocks
2772 * as in-use. It will mark the blocks in the red-black tree if appropriate. We need to do
2773 * this logic here to avoid callers having to deal with whether or not the red-black tree
2774 * is enabled.
2775 */
2776
2777 OSErr BlockMarkAllocated(
2778 ExtendedVCB *vcb,
2779 u_int32_t startingBlock,
2780 register u_int32_t numBlocks)
2781 {
2782 struct hfsmount *hfsmp;
2783
2784 hfsmp = VCBTOHFS(vcb);
2785
2786 return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks, 0);
2787
2788 }
2789
2790
2791 /*
2792 _______________________________________________________________________
2793
2794 Routine: BlockMarkAllocatedInternal
2795
2796 Function: Mark a contiguous group of blocks as allocated (set in the
2797 bitmap). It assumes those bits are currently marked
2798 deallocated (clear in the bitmap). Note that this function
2799 must be called regardless of whether or not the bitmap or
2800 tree-based allocator is used, as all allocations must correctly
2801 be marked on-disk. If the tree-based approach is running, then
2802 this will be done before the node is removed from the tree.
2803
2804 Inputs:
2805 vcb Pointer to volume where space is to be allocated
2806 startingBlock First block number to mark as allocated
2807 numBlocks Number of blocks to mark as allocated
2808 _______________________________________________________________________
2809 */
2810 static
2811 OSErr BlockMarkAllocatedInternal (
2812 ExtendedVCB *vcb,
2813 u_int32_t startingBlock,
2814 u_int32_t numBlocks,
2815 hfs_block_alloc_flags_t flags)
2816 {
2817 OSErr err;
2818 register u_int32_t *currentWord; // Pointer to current word within bitmap block
2819 register u_int32_t wordsLeft; // Number of words left in this bitmap block
2820 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
2821 u_int32_t firstBit; // Bit index within word of first bit to allocate
2822 u_int32_t numBits; // Number of bits in word to allocate
2823 u_int32_t *buffer = NULL;
2824 uintptr_t blockRef = 0;
2825 u_int32_t bitsPerBlock;
2826 u_int32_t wordsPerBlock;
2827 // XXXdbg
2828 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2829
2830 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2831 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, flags, 0, 0);
2832
2833 #if DEBUG
2834
2835 struct rl_entry *range;
2836 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS], rl_link) {
2837 assert(rl_overlap(range, startingBlock,
2838 startingBlock + numBlocks - 1) == RL_NOOVERLAP);
2839 }
2840
2841 #endif
2842
2843 int force_flush = 0;
2844 /*
2845 * Since we are about to mark these bits as in-use
2846 * in the bitmap, decide if we need to alert the caller
2847 * that a journal flush might be appropriate. It's safe to
2848 * poke at the journal pointer here since we MUST have
2849 * called start_transaction by the time this function is invoked.
2850 * If the journal is enabled, then it will have taken the requisite
2851 * journal locks. If it is not enabled, then we have taken
2852 * a shared lock on the global lock.
2853 */
2854 if (hfsmp->jnl) {
2855 uint32_t ignore;
2856 err = CheckUnmappedBytes (hfsmp, (uint64_t) startingBlock, (uint64_t)numBlocks, &force_flush, &ignore);
2857 if ((err == 0) && (force_flush)) {
2858 journal_request_immediate_flush (hfsmp->jnl);
2859 }
2860 }
2861
2862 hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
2863
2864 /*
2865 * Don't make changes to the disk if we're just reserving. Note that
2866 * we could do better in the tentative case because we could, in theory,
2867 * avoid the journal flush above. However, that would mean that we would
2868 * need to catch the callback to stop it incorrectly addding the extent
2869 * to our free cache.
2870 */
2871 if (ISSET(flags, HFS_ALLOC_LOCKED | HFS_ALLOC_TENTATIVE)) {
2872 err = 0;
2873 goto Exit;
2874 }
2875
2876 //
2877 // Pre-read the bitmap block containing the first word of allocation
2878 //
2879
2880 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2881 HFS_ALLOC_IGNORE_RESERVED);
2882 if (err != noErr) goto Exit;
2883 //
2884 // Initialize currentWord, and wordsLeft.
2885 //
2886 {
2887 u_int32_t wordIndexInBlock;
2888
2889 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
2890 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2891
2892 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2893 currentWord = buffer + wordIndexInBlock;
2894 wordsLeft = wordsPerBlock - wordIndexInBlock;
2895 }
2896
2897 // XXXdbg
2898 if (hfsmp->jnl) {
2899 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2900 }
2901
2902 //
2903 // If the first block to allocate doesn't start on a word
2904 // boundary in the bitmap, then treat that first word
2905 // specially.
2906 //
2907
2908 firstBit = startingBlock % kBitsPerWord;
2909 if (firstBit != 0) {
2910 bitMask = kAllBitsSetInWord >> firstBit; // turn off all bits before firstBit
2911 numBits = kBitsPerWord - firstBit; // number of remaining bits in this word
2912 if (numBits > numBlocks) {
2913 numBits = numBlocks; // entire allocation is inside this one word
2914 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); // turn off bits after last
2915 }
2916 #if DEBUG
2917 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2918 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2919 }
2920 #endif
2921 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
2922 numBlocks -= numBits; // adjust number of blocks left to allocate
2923
2924 ++currentWord; // move to next word
2925 --wordsLeft; // one less word left in this block
2926 }
2927
2928 //
2929 // Allocate whole words (32 blocks) at a time.
2930 //
2931
2932 bitMask = kAllBitsSetInWord; // put this in a register for 68K
2933 while (numBlocks >= kBitsPerWord) {
2934 if (wordsLeft == 0) {
2935 // Read in the next bitmap block
2936 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
2937
2938 buffer = NULL;
2939 err = ReleaseBitmapBlock(vcb, blockRef, true);
2940 if (err != noErr) goto Exit;
2941
2942 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2943 HFS_ALLOC_IGNORE_RESERVED);
2944 if (err != noErr) goto Exit;
2945
2946 // XXXdbg
2947 if (hfsmp->jnl) {
2948 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2949 }
2950
2951 // Readjust currentWord and wordsLeft
2952 currentWord = buffer;
2953 wordsLeft = wordsPerBlock;
2954 }
2955 #if DEBUG
2956 if (*currentWord != 0) {
2957 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2958 }
2959 #endif
2960 *currentWord = SWAP_BE32 (bitMask);
2961 numBlocks -= kBitsPerWord;
2962
2963 ++currentWord; // move to next word
2964 --wordsLeft; // one less word left in this block
2965 }
2966
2967 //
2968 // Allocate any remaining blocks.
2969 //
2970
2971 if (numBlocks != 0) {
2972 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
2973 if (wordsLeft == 0) {
2974 // Read in the next bitmap block
2975 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
2976
2977 buffer = NULL;
2978 err = ReleaseBitmapBlock(vcb, blockRef, true);
2979 if (err != noErr) goto Exit;
2980
2981 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2982 HFS_ALLOC_IGNORE_RESERVED);
2983 if (err != noErr) goto Exit;
2984
2985 // XXXdbg
2986 if (hfsmp->jnl) {
2987 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2988 }
2989
2990 // Readjust currentWord and wordsLeft
2991 currentWord = buffer;
2992 wordsLeft = wordsPerBlock;
2993 }
2994 #if DEBUG
2995 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2996 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
2997 }
2998 #endif
2999 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
3000
3001 // No need to update currentWord or wordsLeft
3002 }
3003
3004 Exit:
3005
3006 if (buffer)
3007 (void)ReleaseBitmapBlock(vcb, blockRef, true);
3008
3009 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3010 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
3011
3012 return err;
3013 }
3014
3015
3016 /*
3017 * BlockMarkFree
3018 *
3019 * This is a wrapper function around the internal calls which will actually mark the blocks
3020 * as freed. It will mark the blocks in the red-black tree if appropriate. We need to do
3021 * this logic here to avoid callers having to deal with whether or not the red-black tree
3022 * is enabled.
3023 *
3024 */
3025 OSErr BlockMarkFree(
3026 ExtendedVCB *vcb,
3027 u_int32_t startingBlock,
3028 register u_int32_t numBlocks)
3029 {
3030 struct hfsmount *hfsmp;
3031 hfsmp = VCBTOHFS(vcb);
3032
3033 return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
3034 }
3035
3036
3037 /*
3038 * BlockMarkFreeUnused
3039 *
3040 * Scan the bitmap block beyond end of current file system for bits
3041 * that are marked as used. If any of the bits are marked as used,
3042 * this function marks them free.
3043 *
3044 * Note: This was specifically written to mark all bits beyond
3045 * end of current file system during hfs_extendfs(), which makes
3046 * sure that all the new blocks added to the file system are
3047 * marked as free. We expect that all the blocks beyond end of
3048 * current file system are always marked as free, but there might
3049 * be cases where are marked as used. This function assumes that
3050 * the number of blocks marked as used incorrectly are relatively
3051 * small, otherwise this can overflow journal transaction size
3052 * on certain file system configurations (example, large unused
3053 * bitmap with relatively small journal).
3054 *
3055 * Input:
3056 * startingBlock: First block of the range to mark unused
3057 * numBlocks: Number of blocks in the range to mark unused
3058 *
3059 * Returns: zero on success, non-zero on error.
3060 */
3061 OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t numBlocks)
3062 {
3063 int error = 0;
3064 struct hfsmount *hfsmp = VCBTOHFS(vcb);
3065 u_int32_t curNumBlocks;
3066 u_int32_t bitsPerBlock;
3067 u_int32_t lastBit;
3068
3069 /* Use the optimal bitmap I/O size instead of bitmap block size */
3070 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
3071
3072 /*
3073 * First clear any non bitmap allocation block aligned bits
3074 *
3075 * Calculate the first bit in the bitmap block next to
3076 * the bitmap block containing the bit for startingBlock.
3077 * Using this value, we calculate the total number of
3078 * bits to be marked unused from startingBlock to the
3079 * end of bitmap block containing startingBlock.
3080 */
3081 lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
3082 curNumBlocks = lastBit - startingBlock;
3083 if (curNumBlocks > numBlocks) {
3084 curNumBlocks = numBlocks;
3085 }
3086 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
3087 if (error) {
3088 return error;
3089 }
3090 startingBlock += curNumBlocks;
3091 numBlocks -= curNumBlocks;
3092
3093 /*
3094 * Check a full bitmap block for any 'used' bit. If any bit is used,
3095 * mark all the bits only in that bitmap block as free. This ensures
3096 * that we do not write unmodified bitmap blocks and do not
3097 * overwhelm the journal.
3098 *
3099 * The code starts by checking full bitmap block at a time, and
3100 * marks entire bitmap block as free only if any bit in that bitmap
3101 * block is marked as used. In the end, it handles the last bitmap
3102 * block which might be partially full by only checking till the
3103 * caller-specified last bit and if any bit is set, only mark that
3104 * range as free.
3105 */
3106 while (numBlocks) {
3107 if (numBlocks >= bitsPerBlock) {
3108 curNumBlocks = bitsPerBlock;
3109 } else {
3110 curNumBlocks = numBlocks;
3111 }
3112 if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
3113 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
3114 if (error) {
3115 return error;
3116 }
3117 }
3118 startingBlock += curNumBlocks;
3119 numBlocks -= curNumBlocks;
3120 }
3121
3122 return error;
3123 }
3124
3125 /*
3126 _______________________________________________________________________
3127
3128 Routine: BlockMarkFreeInternal
3129
3130 Function: Mark a contiguous group of blocks as free (clear in the
3131 bitmap). It assumes those bits are currently marked
3132 allocated (set in the bitmap).
3133
3134 Inputs:
3135 vcb Pointer to volume where space is to be freed
3136 startingBlock First block number to mark as freed
3137 numBlocks Number of blocks to mark as freed
3138 do_validate If true, validate that the blocks being
3139 deallocated to check if they are within totalBlocks
3140 for current volume and whether they were allocated
3141 before they are marked free.
3142 _______________________________________________________________________
3143 */
3144 static
3145 OSErr BlockMarkFreeInternal(
3146 ExtendedVCB *vcb,
3147 u_int32_t startingBlock_in,
3148 register u_int32_t numBlocks_in,
3149 Boolean do_validate)
3150 {
3151 OSErr err;
3152 u_int32_t startingBlock = startingBlock_in;
3153 u_int32_t numBlocks = numBlocks_in;
3154 uint32_t unmapStart = startingBlock_in;
3155 uint32_t unmapCount = numBlocks_in;
3156 uint32_t wordIndexInBlock;
3157 u_int32_t *currentWord; // Pointer to current word within bitmap block
3158 u_int32_t wordsLeft; // Number of words left in this bitmap block
3159 u_int32_t bitMask; // Word with given bits already set (ready to OR in)
3160 u_int32_t currentBit; // Bit index within word of current bit to allocate
3161 u_int32_t numBits; // Number of bits in word to allocate
3162 u_int32_t *buffer = NULL;
3163 uintptr_t blockRef = 0;
3164 u_int32_t bitsPerBlock;
3165 u_int32_t wordsPerBlock;
3166 // XXXdbg
3167 struct hfsmount *hfsmp = VCBTOHFS(vcb);
3168
3169 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3170 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);
3171
3172 /*
3173 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
3174 * need to be able to free blocks being relocated during hfs_truncatefs.
3175 */
3176 if ((do_validate == true) &&
3177 (startingBlock + numBlocks > vcb->totalBlocks)) {
3178 #if ALLOC_DEBUG || DEBUG
3179 panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
3180 __builtin_unreachable();
3181 #else
3182 printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
3183 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
3184 err = EIO;
3185 goto Exit;
3186 #endif
3187 }
3188
3189 //
3190 // Pre-read the bitmap block containing the first word of allocation
3191 //
3192
3193 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
3194 HFS_ALLOC_IGNORE_RESERVED);
3195 if (err != noErr) goto Exit;
3196 // XXXdbg
3197 if (hfsmp->jnl) {
3198 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
3199 }
3200
3201 uint32_t min_unmap = 0, max_unmap = UINT32_MAX;
3202
3203 // Work out the bounds of any unmap we can send down
3204 struct rl_entry *range;
3205 for (int i = 0; i < 2; ++i) {
3206 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
3207 if (range->rl_start < startingBlock
3208 && range->rl_end >= min_unmap) {
3209 min_unmap = range->rl_end + 1;
3210 }
3211 if (range->rl_end >= startingBlock + numBlocks
3212 && range->rl_start < max_unmap) {
3213 max_unmap = range->rl_start;
3214 }
3215 }
3216 }
3217
3218 //
3219 // Figure out how many bits and words per bitmap block.
3220 //
3221 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
3222 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
3223 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3224
3225 //
3226 // Look for a range of free blocks immediately before startingBlock
3227 // (up to the start of the current bitmap block). Set unmapStart to
3228 // the first free block.
3229 //
3230 currentWord = buffer + wordIndexInBlock;
3231 currentBit = startingBlock % kBitsPerWord;
3232 bitMask = kHighBitInWordMask >> currentBit;
3233 while (unmapStart > min_unmap) {
3234 // Move currentWord/bitMask back by one bit
3235 bitMask <<= 1;
3236 if (bitMask == 0) {
3237 if (--currentWord < buffer)
3238 break;
3239 bitMask = kLowBitInWordMask;
3240 }
3241
3242 if (*currentWord & SWAP_BE32(bitMask))
3243 break; // Found an allocated block. Stop searching.
3244 --unmapStart;
3245 ++unmapCount;
3246 }
3247
3248 //
3249 // If the first block to free doesn't start on a word
3250 // boundary in the bitmap, then treat that first word
3251 // specially.
3252 //
3253
3254 currentWord = buffer + wordIndexInBlock;
3255 wordsLeft = wordsPerBlock - wordIndexInBlock;
3256 currentBit = startingBlock % kBitsPerWord;
3257 if (currentBit != 0) {
3258 bitMask = kAllBitsSetInWord >> currentBit; // turn off all bits before currentBit
3259 numBits = kBitsPerWord - currentBit; // number of remaining bits in this word
3260 if (numBits > numBlocks) {
3261 numBits = numBlocks; // entire allocation is inside this one word
3262 bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits)); // turn off bits after last
3263 }
3264 if ((do_validate == true) &&
3265 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
3266 goto Corruption;
3267 }
3268 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
3269 numBlocks -= numBits; // adjust number of blocks left to free
3270
3271 ++currentWord; // move to next word
3272 --wordsLeft; // one less word left in this block
3273 }
3274
3275 //
3276 // Free whole words (32 blocks) at a time.
3277 //
3278
3279 while (numBlocks >= kBitsPerWord) {
3280 if (wordsLeft == 0) {
3281 // Read in the next bitmap block
3282 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
3283
3284 buffer = NULL;
3285 err = ReleaseBitmapBlock(vcb, blockRef, true);
3286 if (err != noErr) goto Exit;
3287
3288 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
3289 HFS_ALLOC_IGNORE_RESERVED);
3290 if (err != noErr) goto Exit;
3291
3292 // XXXdbg
3293 if (hfsmp->jnl) {
3294 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
3295 }
3296
3297 // Readjust currentWord and wordsLeft
3298 currentWord = buffer;
3299 wordsLeft = wordsPerBlock;
3300 }
3301 if ((do_validate == true) &&
3302 (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
3303 goto Corruption;
3304 }
3305 *currentWord = 0; // clear the entire word
3306 numBlocks -= kBitsPerWord;
3307
3308 ++currentWord; // move to next word
3309 --wordsLeft; // one less word left in this block
3310 }
3311
3312 //
3313 // Free any remaining blocks.
3314 //
3315
3316 if (numBlocks != 0) {
3317 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
3318 if (wordsLeft == 0) {
3319 // Read in the next bitmap block
3320 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
3321
3322 buffer = NULL;
3323 err = ReleaseBitmapBlock(vcb, blockRef, true);
3324 if (err != noErr) goto Exit;
3325
3326 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
3327 HFS_ALLOC_IGNORE_RESERVED);
3328 if (err != noErr) goto Exit;
3329
3330 // XXXdbg
3331 if (hfsmp->jnl) {
3332 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
3333 }
3334
3335 // Readjust currentWord and wordsLeft
3336 currentWord = buffer;
3337 wordsLeft = wordsPerBlock;
3338 }
3339 if ((do_validate == true) &&
3340 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
3341 goto Corruption;
3342 }
3343 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
3344
3345 // No need to update currentWord or wordsLeft
3346 }
3347
3348 //
3349 // Look for a range of free blocks immediately after the range we just freed
3350 // (up to the end of the current bitmap block).
3351 //
3352 wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord;
3353 wordsLeft = wordsPerBlock - wordIndexInBlock;
3354 currentWord = buffer + wordIndexInBlock;
3355 currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord;
3356 bitMask = kHighBitInWordMask >> currentBit;
3357 while (unmapStart + unmapCount < max_unmap) {
3358 // Move currentWord/bitMask/wordsLeft forward one bit
3359 bitMask >>= 1;
3360 if (bitMask == 0) {
3361 if (--wordsLeft == 0)
3362 break;
3363 ++currentWord;
3364 bitMask = kHighBitInWordMask;
3365 }
3366
3367 if (*currentWord & SWAP_BE32(bitMask))
3368 break; // Found an allocated block. Stop searching.
3369 ++unmapCount;
3370 }
3371
3372 Exit:
3373
3374 if (buffer)
3375 (void)ReleaseBitmapBlock(vcb, blockRef, true);
3376
3377 if (err == noErr) {
3378 hfs_unmap_free_extent(vcb, unmapStart, unmapCount);
3379 }
3380
3381 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3382 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
3383
3384 return err;
3385
3386 Corruption:
3387 #if DEBUG
3388 panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
3389 __builtin_unreachable();
3390 #else
3391 printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s <%u, %u>\n",
3392 vcb->vcbVN, startingBlock_in, numBlocks_in);
3393 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
3394 err = EIO;
3395 goto Exit;
3396 #endif
3397 }
3398
3399
3400 /*
3401 _______________________________________________________________________
3402
3403 Routine: BlockFindContiguous
3404
3405 Function: Find a contiguous range of blocks that are free (bits
3406 clear in the bitmap). If a contiguous range of the
3407 minimum size can't be found, an error will be returned.
3408 This is only needed to support the bitmap-scanning logic,
3409 as the red-black tree should be able to do this by internally
3410 searching its tree.
3411
3412 Inputs:
3413 vcb Pointer to volume where space is to be allocated
3414 startingBlock Preferred first block of range
3415 endingBlock Last possible block in range + 1
3416 minBlocks Minimum number of blocks needed. Must be > 0.
3417 maxBlocks Maximum (ideal) number of blocks desired
3418 useMetaZone OK to dip into metadata allocation zone
3419
3420 Outputs:
3421 actualStartBlock First block of range found, or 0 if error
3422 actualNumBlocks Number of blocks found, or 0 if error
3423
3424 Returns:
3425 noErr Found at least minBlocks contiguous
3426 dskFulErr No contiguous space found, or all less than minBlocks
3427 _______________________________________________________________________
3428 */
3429
3430 static OSErr BlockFindContiguous(
3431 ExtendedVCB *vcb,
3432 u_int32_t startingBlock,
3433 u_int32_t endingBlock,
3434 u_int32_t minBlocks,
3435 u_int32_t maxBlocks,
3436 Boolean useMetaZone,
3437 Boolean trustSummary,
3438 u_int32_t *actualStartBlock,
3439 u_int32_t *actualNumBlocks,
3440 hfs_block_alloc_flags_t flags)
3441 {
3442 OSErr err;
3443 register u_int32_t currentBlock; // Block we're currently looking at.
3444 u_int32_t firstBlock; // First free block in current extent.
3445 u_int32_t stopBlock; // If we get to this block, stop searching for first free block.
3446 u_int32_t foundBlocks; // Number of contiguous free blocks in current extent.
3447 u_int32_t *buffer = NULL;
3448 register u_int32_t *currentWord;
3449 register u_int32_t bitMask;
3450 register u_int32_t wordsLeft;
3451 register u_int32_t tempWord;
3452 uintptr_t blockRef = 0;
3453 u_int32_t wordsPerBlock;
3454 u_int32_t updated_free_extent = 0;
3455 struct hfsmount *hfsmp = (struct hfsmount*) vcb;
3456 HFSPlusExtentDescriptor best = { 0, 0 };
3457
3458 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
3459 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);
3460
3461 /*
3462 * When we're skipping the metadata zone and the start/end
3463 * range overlaps with the metadata zone then adjust the
3464 * start to be outside of the metadata zone. If the range
3465 * is entirely inside the metadata zone then we can deny the
3466 * request (dskFulErr).
3467 */
3468 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
3469 if (startingBlock <= vcb->hfs_metazone_end) {
3470 if (endingBlock > (vcb->hfs_metazone_end + 2))
3471 startingBlock = vcb->hfs_metazone_end + 1;
3472 else
3473 goto DiskFull;
3474 }
3475 }
3476
3477 if ((endingBlock - startingBlock) < minBlocks)
3478 {
3479 // The set of blocks we're checking is smaller than the minimum number
3480 // of blocks, so we couldn't possibly find a good range.
3481 goto DiskFull;
3482 }
3483
3484 stopBlock = endingBlock - minBlocks + 1;
3485 currentBlock = startingBlock;
3486 firstBlock = 0;
3487
3488 /*
3489 * Skip over metadata blocks.
3490 */
3491 if (!useMetaZone)
3492 currentBlock = NextBitmapBlock(vcb, currentBlock);
3493
3494 /*
3495 * Use the summary table if we can. Skip over any totally
3496 * allocated blocks. currentBlock should now point to the first
3497 * block beyond the metadata zone if the metazone allocations are not
3498 * allowed in this invocation.
3499 */
3500 if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
3501 uint32_t suggestion;
3502 err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
3503 if (err && err != ENOSPC)
3504 goto ErrorExit;
3505 if (err == ENOSPC || suggestion >= stopBlock)
3506 goto DiskFull;
3507 currentBlock = suggestion;
3508 }
3509
3510
3511 //
3512 // Pre-read the first bitmap block.
3513 //
3514 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
3515 if ( err != noErr ) goto ErrorExit;
3516
3517 //
3518 // Figure out where currentBlock is within the buffer.
3519 //
3520 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
3521
3522 wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1); // Current index into buffer
3523 currentWord = buffer + wordsLeft;
3524 wordsLeft = wordsPerBlock - wordsLeft;
3525
3526 uint32_t remaining = (hfsmp->freeBlocks - hfsmp->lockedBlocks
3527 - (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
3528 ? 0 : hfsmp->tentativeBlocks));
3529
3530 /*
3531 * This outer do-while loop is the main body of this function. Its job is
3532 * to search through the blocks (until we hit 'stopBlock'), and iterate
3533 * through swaths of allocated bitmap until it finds free regions.
3534 */
3535
3536 do
3537 {
3538 foundBlocks = 0;
3539 /*
3540 * We will try and update the summary table as we search
3541 * below. Note that we will never update the summary table
3542 * for the first and last blocks that the summary table
3543 * covers. Ideally, we should, but the benefits probably
3544 * aren't that significant so we leave things alone for now.
3545 */
3546 uint32_t summary_block_scan = 0;
3547 /*
3548 * Inner while loop 1:
3549 * Look for free blocks, skipping over allocated ones.
3550 *
3551 * Initialization starts with checking the initial partial word
3552 * if applicable.
3553 */
3554 bitMask = currentBlock & kBitsWithinWordMask;
3555 if (bitMask)
3556 {
3557 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
3558 bitMask = kHighBitInWordMask >> bitMask;
3559 while (tempWord & bitMask)
3560 {
3561 bitMask >>= 1;
3562 ++currentBlock;
3563 }
3564
3565 // Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)?
3566 if (bitMask)
3567 goto FoundUnused;
3568
3569 // Didn't find any unused bits, so we're done with this word.
3570 ++currentWord;
3571 --wordsLeft;
3572 }
3573
3574 //
3575 // Check whole words
3576 //
3577 while (currentBlock < stopBlock)
3578 {
3579 // See if it's time to read another block.
3580 if (wordsLeft == 0)
3581 {
3582 buffer = NULL;
3583 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3584 /*
3585 * If summary_block_scan is non-zero, then we must have
3586 * pulled a bitmap file block into core, and scanned through
3587 * the entire thing. Because we're in this loop, we are
3588 * implicitly trusting that the bitmap didn't have any knowledge
3589 * about this particular block. As a result, update the bitmap
3590 * (lazily, now that we've scanned it) with our findings that
3591 * this particular block is completely used up.
3592 */
3593 if (summary_block_scan != 0) {
3594 uint32_t summary_bit;
3595 (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
3596 hfs_set_summary (hfsmp, summary_bit, 1);
3597 summary_block_scan = 0;
3598 }
3599 }
3600 err = ReleaseBitmapBlock(vcb, blockRef, false);
3601 if (err != noErr) goto ErrorExit;
3602
3603 /*
3604 * Skip over metadata blocks.
3605 */
3606 if (!useMetaZone) {
3607 currentBlock = NextBitmapBlock(vcb, currentBlock);
3608 if (currentBlock >= stopBlock) {
3609 goto LoopExit;
3610 }
3611 }
3612
3613 /* Skip over fully allocated bitmap blocks if we can */
3614 if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
3615 uint32_t suggestion;
3616 err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
3617 if (err && err != ENOSPC)
3618 goto ErrorExit;
3619 if (err == ENOSPC || suggestion >= stopBlock)
3620 goto LoopExit;
3621 currentBlock = suggestion;
3622 }
3623
3624 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
3625 if ( err != noErr ) goto ErrorExit;
3626
3627 /*
3628 * Set summary_block_scan to be the block we just read into the block cache.
3629 *
3630 * At this point, we've just read an allocation block worth of bitmap file
3631 * into the buffer above, but we don't know if it is completely allocated or not.
3632 * If we find that it is completely allocated/full then we will jump
3633 * through this loop again and set the appropriate summary bit as fully allocated.
3634 */
3635 summary_block_scan = currentBlock;
3636 currentWord = buffer;
3637 wordsLeft = wordsPerBlock;
3638 }
3639
3640 // See if any of the bits are clear
3641 if ((tempWord = SWAP_BE32(*currentWord)) + 1) // non-zero if any bits were clear
3642 {
3643 // Figure out which bit is clear
3644 bitMask = kHighBitInWordMask;
3645 while (tempWord & bitMask)
3646 {
3647 bitMask >>= 1;
3648 ++currentBlock;
3649 }
3650
3651 break; // Found the free bit; break out to FoundUnused.
3652 }
3653
3654 // Keep looking at the next word
3655 currentBlock += kBitsPerWord;
3656 ++currentWord;
3657 --wordsLeft;
3658 }
3659
3660 FoundUnused:
3661 // Make sure the unused bit is early enough to use
3662 if (currentBlock >= stopBlock)
3663 {
3664 break;
3665 }
3666
3667 // Remember the start of the extent
3668 firstBlock = currentBlock;
3669
3670
3671 /*
3672 * Inner while loop 2:
3673 * We get here if we find a free block. Count the number
3674 * of contiguous free blocks observed.
3675 *
3676 * Initialization starts with checking the initial partial word
3677 * if applicable.
3678 */
3679 bitMask = currentBlock & kBitsWithinWordMask;
3680 if (bitMask)
3681 {
3682 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
3683 bitMask = kHighBitInWordMask >> bitMask;
3684 while (bitMask && !(tempWord & bitMask))
3685 {
3686 bitMask >>= 1;
3687 ++currentBlock;
3688 }
3689
3690 // Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)?
3691 if (bitMask)
3692 goto FoundUsed;
3693
3694 // Didn't find any used bits, so we're done with this word.
3695 ++currentWord;
3696 --wordsLeft;
3697 }
3698
3699 //
3700 // Check whole words
3701 //
3702 while (currentBlock < endingBlock)
3703 {
3704 // See if it's time to read another block.
3705 if (wordsLeft == 0)
3706 {
3707 buffer = NULL;
3708 err = ReleaseBitmapBlock(vcb, blockRef, false);
3709 if (err != noErr) goto ErrorExit;
3710
3711 /*
3712 * Skip over metadata blocks.
3713 */
3714 if (!useMetaZone) {
3715 u_int32_t nextBlock;
3716
3717 nextBlock = NextBitmapBlock(vcb, currentBlock);
3718 if (nextBlock != currentBlock) {
3719 goto LoopExit; /* allocation gap, so stop */
3720 }
3721 }
3722
3723 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
3724 if ( err != noErr ) goto ErrorExit;
3725
3726 currentWord = buffer;
3727 wordsLeft = wordsPerBlock;
3728 }
3729
3730 // See if any of the bits are set
3731 if ((tempWord = SWAP_BE32(*currentWord)) != 0)
3732 {
3733 // Figure out which bit is set
3734 bitMask = kHighBitInWordMask;
3735 while (!(tempWord & bitMask))
3736 {
3737 bitMask >>= 1;
3738 ++currentBlock;
3739 }
3740
3741 break; // Found the used bit; break out to FoundUsed.
3742 }
3743
3744 // Keep looking at the next word
3745 currentBlock += kBitsPerWord;
3746 ++currentWord;
3747 --wordsLeft;
3748
3749 // If we found at least maxBlocks, we can quit early.
3750 if ((currentBlock - firstBlock) >= maxBlocks)
3751 break;
3752 }
3753
3754 FoundUsed:
3755 // Make sure we didn't run out of bitmap looking for a used block.
3756 // If so, pin to the end of the bitmap.
3757 if (currentBlock > endingBlock)
3758 currentBlock = endingBlock;
3759
3760 // Figure out how many contiguous free blocks there were.
3761 // Pin the answer to maxBlocks.
3762 foundBlocks = currentBlock - firstBlock;
3763 if (foundBlocks > maxBlocks)
3764 foundBlocks = maxBlocks;
3765
3766 if (remaining) {
3767 if (foundBlocks > remaining) {
3768 #if DEBUG || DEVELOPMENT
3769 printf("hfs: found more blocks than are indicated free!\n");
3770 #endif
3771 remaining = UINT32_MAX;
3772 } else
3773 remaining -= foundBlocks;
3774 }
3775
3776 if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
3777 if (foundBlocks > best.blockCount) {
3778 best.startBlock = firstBlock;
3779 best.blockCount = foundBlocks;
3780 }
3781
3782 if (foundBlocks >= maxBlocks || best.blockCount >= remaining)
3783 break;
3784
3785 /*
3786 * Note that we will go ahead and add this free extent to our
3787 * cache below but that's OK because we'll remove it again if we
3788 * decide to use this extent.
3789 */
3790 } else if (foundBlocks >= minBlocks)
3791 break; // Found what we needed!
3792
3793 /*
3794 * We did not find the total blocks we were looking for, but
3795 * add this free block run to our free extent cache list, if possible.
3796 */
3797
3798 // If we're ignoring tentative ranges, we need to account for them here
3799 if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
3800 struct rl_entry free_extent = rl_make(firstBlock, firstBlock + foundBlocks - 1);
3801 struct rl_entry *range;;
3802 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS], rl_link) {
3803 rl_subtract(&free_extent, range);
3804 if (rl_len(range) == 0)
3805 break;
3806 }
3807 firstBlock = free_extent.rl_start;
3808 foundBlocks = rl_len(&free_extent);
3809 }
3810
3811 if (foundBlocks) {
3812 if (hfsmp->jnl == NULL) {
3813 /* If there is no journal, go ahead and add to the free ext cache. */
3814 updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
3815 }
3816 else {
3817 /*
3818 * If journaled, only add to the free extent cache if this block is not
3819 * waiting for a TRIM to complete; that implies that the transaction that freed it
3820 * has not yet been committed to stable storage.
3821 */
3822 int recently_deleted = 0;
3823 uint32_t nextblock;
3824 err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock,
3825 (uint64_t)foundBlocks, &recently_deleted, &nextblock);
3826 if ((err) || (recently_deleted == 0)) {
3827 /* if we hit an error, or the blocks not recently freed, go ahead and insert it */
3828 updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
3829 }
3830 err = 0;
3831 }
3832 }
3833 } while (currentBlock < stopBlock);
3834 LoopExit:
3835
3836 if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
3837 firstBlock = best.startBlock;
3838 foundBlocks = best.blockCount;
3839 }
3840
3841 // Return the outputs.
3842 if (foundBlocks < minBlocks)
3843 {
3844 DiskFull:
3845 err = dskFulErr;
3846 ErrorExit:
3847 *actualStartBlock = 0;
3848 *actualNumBlocks = 0;
3849 }
3850 else
3851 {
3852 err = noErr;
3853 *actualStartBlock = firstBlock;
3854 *actualNumBlocks = foundBlocks;
3855 /*
3856 * Sanity check for overflow
3857 */
3858 if ((firstBlock + foundBlocks) > vcb->allocLimit) {
3859 panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
3860 vcb->vcbVN, startingBlock, endingBlock, currentBlock,
3861 firstBlock, stopBlock, minBlocks, foundBlocks);
3862 }
3863 }
3864
3865 if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
3866 int i;
3867 u_int32_t min_start = vcb->totalBlocks;
3868
3869 // set the nextAllocation pointer to the smallest free block number
3870 // we've seen so on the next mount we won't rescan unnecessarily
3871 lck_spin_lock(&vcb->vcbFreeExtLock);
3872 for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
3873 if (vcb->vcbFreeExt[i].startBlock < min_start) {
3874 min_start = vcb->vcbFreeExt[i].startBlock;
3875 }
3876 }
3877 lck_spin_unlock(&vcb->vcbFreeExtLock);
3878 if (min_start != vcb->totalBlocks) {
3879 if (min_start < vcb->nextAllocation) {
3880 vcb->nextAllocation = min_start;
3881 }
3882 if (min_start < vcb->sparseAllocation) {
3883 vcb->sparseAllocation = min_start;
3884 }
3885 }
3886 }
3887
3888 if (buffer)
3889 (void) ReleaseBitmapBlock(vcb, blockRef, false);
3890
3891 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
3892 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
3893
3894 return err;
3895 }
3896
3897
3898 /*
3899 * Count number of bits set in the given 32-bit unsigned number
3900 *
3901 * Returns:
3902 * Number of bits set
3903 */
3904 static int num_bits_set(u_int32_t num)
3905 {
3906 int count;
3907
3908 for (count = 0; num; count++) {
3909 num &= num - 1;
3910 }
3911
3912 return count;
3913 }
3914
3915 /*
3916 * For a given range of blocks, find the total number of blocks
3917 * allocated. If 'stop_on_first' is true, it stops as soon as it
3918 * encounters the first allocated block. This option is useful
3919 * to determine if any block is allocated or not.
3920 *
3921 * Inputs:
3922 * startingBlock First allocation block number of the range to be scanned.
3923 * numBlocks Total number of blocks that need to be scanned.
3924 * stop_on_first Stop the search after the first allocated block is found.
3925 *
3926 * Output:
3927 * allocCount Total number of allocation blocks allocated in the given range.
3928 *
3929 * On error, it is the number of allocated blocks found
3930 * before the function got an error.
3931 *
3932 * If 'stop_on_first' is set,
3933 * allocCount = 1 if any allocated block was found.
3934 * allocCount = 0 if no allocated block was found.
3935 *
3936 * Returns:
3937 * 0 on success, non-zero on failure.
3938 */
3939 static int
3940 hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock,
3941 u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
3942 {
3943 u_int32_t *currentWord; // Pointer to current word within bitmap block
3944 u_int32_t wordsLeft; // Number of words left in this bitmap block
3945 u_int32_t bitMask; // Word with given bits already set (ready to test)
3946 u_int32_t firstBit; // Bit index within word of first bit to allocate
3947 u_int32_t numBits; // Number of bits in word to allocate
3948 u_int32_t *buffer = NULL;
3949 uintptr_t blockRef;
3950 u_int32_t bitsPerBlock;
3951 u_int32_t wordsPerBlock;
3952 u_int32_t blockCount = 0;
3953 int error;
3954
3955 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3956 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0);
3957
3958 /*
3959 * Pre-read the bitmap block containing the first word of allocation
3960 */
3961 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
3962 HFS_ALLOC_IGNORE_TENTATIVE);
3963 if (error)
3964 goto JustReturn;
3965
3966 /*
3967 * Initialize currentWord, and wordsLeft.
3968 */
3969 {
3970 u_int32_t wordIndexInBlock;
3971
3972 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
3973 wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
3974
3975 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3976 currentWord = buffer + wordIndexInBlock;
3977 wordsLeft = wordsPerBlock - wordIndexInBlock;
3978 }
3979
3980 /*
3981 * First test any non word aligned bits.
3982 */
3983 firstBit = startingBlock % kBitsPerWord;
3984 if (firstBit != 0) {
3985 bitMask = kAllBitsSetInWord >> firstBit;
3986 numBits = kBitsPerWord - firstBit;
3987 if (numBits > numBlocks) {
3988 numBits = numBlocks;
3989 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
3990 }
3991 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3992 if (stop_on_first) {
3993 blockCount = 1;
3994 goto Exit;
3995 }
3996 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3997 }
3998 numBlocks -= numBits;
3999 ++currentWord;
4000 --wordsLeft;
4001 }
4002
4003 /*
4004 * Test whole words (32 blocks) at a time.
4005 */
4006 while (numBlocks >= kBitsPerWord) {
4007 if (wordsLeft == 0) {
4008 /* Read in the next bitmap block. */
4009 startingBlock += bitsPerBlock;
4010
4011 buffer = NULL;
4012 error = ReleaseBitmapBlock(hfsmp, blockRef, false);
4013 if (error) goto Exit;
4014
4015 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
4016 HFS_ALLOC_IGNORE_TENTATIVE);
4017 if (error) goto Exit;
4018
4019 /* Readjust currentWord and wordsLeft. */
4020 currentWord = buffer;
4021 wordsLeft = wordsPerBlock;
4022 }
4023 if (*currentWord != 0) {
4024 if (stop_on_first) {
4025 blockCount = 1;
4026 goto Exit;
4027 }
4028 blockCount += num_bits_set(*currentWord);
4029 }
4030 numBlocks -= kBitsPerWord;
4031 ++currentWord;
4032 --wordsLeft;
4033 }
4034
4035 /*
4036 * Test any remaining blocks.
4037 */
4038 if (numBlocks != 0) {
4039 bitMask = ~(kAllBitsSetInWord >> numBlocks);
4040 if (wordsLeft == 0) {
4041 /* Read in the next bitmap block */
4042 startingBlock += bitsPerBlock;
4043
4044 buffer = NULL;
4045 error = ReleaseBitmapBlock(hfsmp, blockRef, false);
4046 if (error) goto Exit;
4047
4048 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
4049 HFS_ALLOC_IGNORE_TENTATIVE);
4050 if (error) goto Exit;
4051
4052 currentWord = buffer;
4053 wordsLeft = wordsPerBlock;
4054 }
4055 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
4056 if (stop_on_first) {
4057 blockCount = 1;
4058 goto Exit;
4059 }
4060 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
4061 }
4062 }
4063 Exit:
4064 if (buffer) {
4065 (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
4066 }
4067 if (allocCount) {
4068 *allocCount = blockCount;
4069 }
4070
4071 JustReturn:
4072 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
4073 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0);
4074
4075 return (error);
4076 }
4077
4078 /*
4079 * Count total number of blocks that are allocated in the given
4080 * range from the bitmap. This is used to preflight total blocks
4081 * that need to be relocated during volume resize.
4082 *
4083 * The journal or allocation file lock must be held.
4084 *
4085 * Returns:
4086 * 0 on success, non-zero on failure.
4087 * On failure, allocCount is zero.
4088 */
4089 int
4090 hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
4091 u_int32_t numBlocks, u_int32_t *allocCount)
4092 {
4093 return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
4094 }
4095
4096 /*
4097 * Test to see if any blocks in a range are allocated.
4098 *
4099 * Note: On error, this function returns 1, which means that
4100 * one or more blocks in the range are allocated. This function
4101 * is primarily used for volume resize and we do not want
4102 * to report to the caller that the blocks are free when we
4103 * were not able to deterministically find it out. So on error,
4104 * we always report that the blocks are allocated.
4105 *
4106 * The journal or allocation file lock must be held.
4107 *
4108 * Returns
4109 * 0 if all blocks in the range are free.
4110 * 1 if blocks in the range are allocated, or there was an error.
4111 */
4112 int
4113 hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
4114 {
4115 int error;
4116 u_int32_t allocCount;
4117
4118 error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
4119 if (error) {
4120 /* On error, we always say that the blocks are allocated
4121 * so that volume resize does not return false success.
4122 */
4123 return 1;
4124 } else {
4125 /* The function was deterministically able to find out
4126 * if there was any block allocated or not. In that case,
4127 * the value in allocCount is good enough to be returned
4128 * back to the caller.
4129 */
4130 return allocCount;
4131 }
4132 }
4133
4134 /*
4135 * CONFIG_HFS_RBTREE
4136 * Check to see if the red-black tree is live. Allocation file lock must be held
4137 * shared or exclusive to call this function. Note that we may call this even if
4138 * HFS is built without activating the red-black tree code.
4139 */
4140 __private_extern__
4141 int
4142 hfs_isrbtree_active(struct hfsmount *hfsmp){
4143
4144 #pragma unused (hfsmp)
4145
4146 /* Just return 0 for now */
4147 return 0;
4148 }
4149
4150
4151
4152 /* Summary Table Functions */
4153 /*
4154 * hfs_check_summary:
4155 *
4156 * This function should be used to query the summary table to see if we can
4157 * bypass a bitmap block or not when we're trying to find a free allocation block.
4158 *
4159 *
4160 * Inputs:
4161 * allocblock - allocation block number. Will be used to infer the correct summary bit.
4162 * hfsmp -- filesystem in question.
4163 *
4164 * Output Arg:
4165 * *freeblocks - set to 1 if we believe at least one free blocks in this vcbVBMIOSize
4166 * page of bitmap file.
4167 *
4168 *
4169 * Returns:
4170 * 0 on success
4171 * EINVAL on error
4172 *
4173 */
4174
4175 static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t allocblock, uint32_t *freeblocks) {
4176
4177 int err = EINVAL;
4178 if (hfsmp->vcbVBMIOSize) {
4179 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4180 uint32_t index;
4181 if (hfs_get_summary_index (hfsmp, allocblock, &index)) {
4182 *freeblocks = 0;
4183 return EINVAL;
4184 }
4185
4186 /* Ok, now that we have the bit index into the array, what byte is it in ? */
4187 uint32_t byteindex = index / kBitsPerByte;
4188 uint8_t current_byte = hfsmp->hfs_summary_table[byteindex];
4189 uint8_t bit_in_byte = index % kBitsPerByte;
4190
4191 if (current_byte & (1 << bit_in_byte)) {
4192 /*
4193 * We do not believe there is anything free in the
4194 * entire vcbVBMIOSize'd block.
4195 */
4196 *freeblocks = 0;
4197 }
4198 else {
4199 /* Looks like there might be a free block here... */
4200 *freeblocks = 1;
4201 }
4202 }
4203 err = 0;
4204 }
4205
4206 return err;
4207 }
4208
4209
4210 #if 0
4211 /*
4212 * hfs_get_next_summary
4213 *
4214 * From a given allocation block, jump to the allocation block at the start of the
4215 * next vcbVBMIOSize boundary. This is useful when trying to quickly skip over
4216 * large swaths of bitmap once we have determined that the bitmap is relatively full.
4217 *
4218 * Inputs: hfsmount, starting allocation block number
4219 * Output Arg: *newblock will contain the allocation block number to start
4220 * querying.
4221 *
4222 * Returns:
4223 * 0 on success
4224 * EINVAL if the block argument is too large to be used, or the summary table not live.
4225 * EFBIG if there are no more summary bits to be queried
4226 */
4227 static int
4228 hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
4229
4230 u_int32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4231 u_int32_t start_offset;
4232 u_int32_t next_offset;
4233 int err = EINVAL;
4234
4235 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4236 if ((err = hfs_get_summary_index(hfsmp, block, &start_offset))) {
4237 return err;
4238 }
4239
4240 next_offset = start_offset++;
4241
4242 if ((start_offset >= hfsmp->hfs_summary_size) || (next_offset >= hfsmp->hfs_summary_size)) {
4243 /* Can't jump to the next summary bit. */
4244 return EINVAL;
4245 }
4246
4247 /* Otherwise, compute and return */
4248 *newblock = next_offset * bits_per_iosize;
4249 if (*newblock >= hfsmp->totalBlocks) {
4250 return EINVAL;
4251 }
4252 err = 0;
4253 }
4254
4255 return err;
4256 }
4257
4258 #endif
4259
4260 /*
4261 * hfs_release_summary
4262 *
4263 * Given an extent that is about to be de-allocated on-disk, determine the number
4264 * of summary bitmap bits that need to be marked as 'potentially available'.
4265 * Then go ahead and mark them as free.
4266 *
4267 * Inputs:
4268 * hfsmp - hfs mount
4269 * block - starting allocation block.
4270 * length - length of the extent.
4271 *
4272 * Returns:
4273 * EINVAL upon any errors.
4274 */
4275 static int hfs_release_summary(struct hfsmount *hfsmp, uint32_t start_blk, uint32_t length) {
4276 int err = EINVAL;
4277 uint32_t end_blk = (start_blk + length) - 1;
4278
4279 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4280 /* Figure out what the starting / ending block's summary bits are */
4281 uint32_t start_bit;
4282 uint32_t end_bit;
4283 uint32_t current_bit;
4284
4285 err = hfs_get_summary_index (hfsmp, start_blk, &start_bit);
4286 if (err) {
4287 goto release_err;
4288 }
4289 err = hfs_get_summary_index (hfsmp, end_blk, &end_bit);
4290 if (err) {
4291 goto release_err;
4292 }
4293
4294 if (ALLOC_DEBUG) {
4295 if (start_bit > end_bit) {
4296 panic ("HFS: start > end!, %d %d ", start_bit, end_bit);
4297 }
4298 }
4299 current_bit = start_bit;
4300 while (current_bit <= end_bit) {
4301 err = hfs_set_summary (hfsmp, current_bit, 0);
4302 current_bit++;
4303 }
4304 }
4305
4306 release_err:
4307 return err;
4308 }
4309
4310 /*
4311 * hfs_find_summary_free
4312 *
4313 * Given a allocation block as input, returns an allocation block number as output as a
4314 * suggestion for where to start scanning the bitmap in order to find free blocks. It will
4315 * determine the vcbVBMIOsize of the input allocation block, convert that into a summary
4316 * bit, then keep iterating over the summary bits in order to find the first free one.
4317 *
4318 * Inputs:
4319 * hfsmp - hfs mount
4320 * block - starting allocation block
4321 * newblock - output block as suggestion
4322 *
4323 * Returns:
4324 * 0 on success
4325 * ENOSPC if we could not find a free block
4326 */
4327
4328 int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
4329
4330 int err = ENOSPC;
4331 uint32_t bit_index = 0;
4332 uint32_t maybe_has_blocks = 0;
4333
4334 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4335 uint32_t byte_index;
4336 uint8_t curbyte;
4337 uint8_t bit_in_byte;
4338 uint32_t summary_cap;
4339
4340 /*
4341 * We generate a cap for the summary search because the summary table
4342 * always represents a full summary of the bitmap FILE, which may
4343 * be way more bits than are necessary for the actual filesystem
4344 * whose allocations are mapped by the bitmap.
4345 *
4346 * Compute how much of hfs_summary_size is useable for the given number
4347 * of allocation blocks eligible on this FS.
4348 */
4349 err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit - 1, &summary_cap);
4350 if (err) {
4351 goto summary_exit;
4352 }
4353
4354 /* Check the starting block first */
4355 err = hfs_check_summary (hfsmp, block, &maybe_has_blocks);
4356 if (err) {
4357 goto summary_exit;
4358 }
4359
4360 if (maybe_has_blocks) {
4361 /*
4362 * It looks like the initial start block could have something.
4363 * Short-circuit and just use that.
4364 */
4365 *newblock = block;
4366 goto summary_exit;
4367 }
4368
4369 /*
4370 * OK, now we know that the first block was useless.
4371 * Get the starting summary bit, and find it in the array
4372 */
4373 maybe_has_blocks = 0;
4374 err = hfs_get_summary_index (hfsmp, block, &bit_index);
4375 if (err) {
4376 goto summary_exit;
4377 }
4378
4379 /* Iterate until we find something. */
4380 while (bit_index <= summary_cap) {
4381 byte_index = bit_index / kBitsPerByte;
4382 curbyte = hfsmp->hfs_summary_table[byte_index];
4383 bit_in_byte = bit_index % kBitsPerByte;
4384
4385 if (curbyte & (1 << bit_in_byte)) {
4386 /* nothing here. increment and move on */
4387 bit_index++;
4388 }
4389 else {
4390 /*
4391 * found something! convert bit_index back into
4392 * an allocation block for use. 'newblock' will now
4393 * contain the proper allocation block # based on the bit
4394 * index.
4395 */
4396 err = hfs_get_summary_allocblock (hfsmp, bit_index, newblock);
4397 if (err) {
4398 goto summary_exit;
4399 }
4400 maybe_has_blocks = 1;
4401 break;
4402 }
4403 }
4404
4405 /* If our loop didn't find anything, set err to ENOSPC */
4406 if (maybe_has_blocks == 0) {
4407 err = ENOSPC;
4408 }
4409 }
4410
4411 /* If the summary table is not active for this mount, we'll just return ENOSPC */
4412 summary_exit:
4413 if (maybe_has_blocks) {
4414 err = 0;
4415 }
4416
4417 return err;
4418 }
4419
4420 /*
4421 * hfs_get_summary_allocblock
4422 *
4423 * Convert a summary bit into an allocation block number to use to start searching for free blocks.
4424 *
4425 * Inputs:
4426 * hfsmp - hfs mount
4427 * summarybit - summmary bit index
4428 * *alloc - allocation block number in the bitmap file.
4429 *
4430 * Output:
4431 * 0 on success
4432 * EINVAL on failure
4433 */
4434 int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t
4435 summarybit, uint32_t *alloc) {
4436 uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4437 uint32_t allocblk;
4438
4439 allocblk = summarybit * bits_per_iosize;
4440
4441 if (allocblk >= hfsmp->totalBlocks) {
4442 return EINVAL;
4443 }
4444 else {
4445 *alloc = allocblk;
4446 }
4447
4448 return 0;
4449 }
4450
4451
4452 /*
4453 * hfs_set_summary:
4454 *
4455 * This function should be used to manipulate the summary table
4456 *
4457 * The argument 'inuse' will set the value of the bit in question to one or zero
4458 * depending on its value.
4459 *
4460 * Inputs:
4461 * hfsmp - hfs mount
4462 * summarybit - the bit index into the summary table to set/unset.
4463 * inuse - the value to assign to the bit.
4464 *
4465 * Returns:
4466 * 0 on success
4467 * EINVAL on error
4468 *
4469 */
4470
4471 static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse) {
4472
4473 int err = EINVAL;
4474 if (hfsmp->vcbVBMIOSize) {
4475 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4476
4477 if (ALLOC_DEBUG) {
4478 if (hfsmp->hfs_summary_table == NULL) {
4479 panic ("hfs_set_summary: no table for %p ", hfsmp);
4480 }
4481 }
4482
4483 /* Ok, now that we have the bit index into the array, what byte is it in ? */
4484 uint32_t byte_index = summarybit / kBitsPerByte;
4485 uint8_t current_byte = hfsmp->hfs_summary_table[byte_index];
4486 uint8_t bit_in_byte = summarybit % kBitsPerByte;
4487
4488 if (inuse) {
4489 current_byte = (current_byte | (1 << bit_in_byte));
4490 }
4491 else {
4492 current_byte = (current_byte & ~(1 << bit_in_byte));
4493 }
4494
4495 hfsmp->hfs_summary_table[byte_index] = current_byte;
4496 }
4497 err = 0;
4498 }
4499
4500 return err;
4501 }
4502
4503
4504 /*
4505 * hfs_get_summary_index:
4506 *
4507 * This is a helper function which determines what summary bit represents the vcbVBMIOSize worth
4508 * of IO against the bitmap file.
4509 *
4510 * Returns:
4511 * 0 on success
4512 * EINVAL on failure
4513 */
4514 static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t* index) {
4515 uint32_t summary_bit;
4516 uint32_t bits_per_iosize;
4517 int err = EINVAL;
4518
4519 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4520 /* Is the input block bigger than the total number of blocks? */
4521 if (block >= hfsmp->totalBlocks) {
4522 return EINVAL;
4523 }
4524
4525 /* Is there even a vbmIOSize set? */
4526 if (hfsmp->vcbVBMIOSize == 0) {
4527 return EINVAL;
4528 }
4529
4530 bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4531
4532 summary_bit = block / bits_per_iosize;
4533
4534 *index = summary_bit;
4535 err = 0;
4536 }
4537
4538 return err;
4539 }
4540
4541 /*
4542 * hfs_init_summary
4543 *
4544 * From a given mount structure, compute how big the summary table should be for the given
4545 * filesystem, then allocate and bzero the memory.
4546 *
4547 * Returns:
4548 * 0 on success
4549 * EINVAL on failure
4550 */
4551 int
4552 hfs_init_summary (struct hfsmount *hfsmp) {
4553
4554 uint32_t summary_size;
4555 uint32_t summary_size_bytes;
4556 uint8_t *summary_table;
4557
4558 if (hfsmp->hfs_allocation_cp == NULL) {
4559 if (ALLOC_DEBUG) {
4560 printf("hfs: summary table cannot progress without a bitmap cnode! \n");
4561 }
4562 return EINVAL;
4563 }
4564 /*
4565 * The practical maximum size of the summary table is 16KB:
4566 *
4567 * (512MB maximum bitmap size / (4k -- min alloc block size)) / 8 bits/byte.
4568 *
4569 * HFS+ will allow filesystems with allocation block sizes smaller than 4k, but
4570 * the end result is that we'll start to issue I/O in 2k or 1k sized chunks, which makes
4571 * supporting this much worse. The math would instead look like this:
4572 * (512MB / 2k) / 8 == 32k.
4573 *
4574 * So, we will disallow the summary table if the allocation block size is < 4k.
4575 */
4576
4577 if (hfsmp->blockSize < HFS_MIN_SUMMARY_BLOCKSIZE) {
4578 printf("hfs: summary table not allowed on FS with block size of %d\n", hfsmp->blockSize);
4579 return EINVAL;
4580 }
4581
4582 summary_size = hfsmp->hfs_allocation_cp->c_blocks;
4583
4584 if (ALLOC_DEBUG) {
4585 printf("HFS Summary Table Initialization: Bitmap %u blocks\n",
4586 hfsmp->hfs_allocation_cp->c_blocks);
4587 }
4588
4589 /*
4590 * If the bitmap IO size is not the same as the allocation block size then
4591 * then re-compute the number of summary bits necessary. Note that above, the
4592 * the default size is the number of allocation blocks in the bitmap *FILE*
4593 * (not the number of bits in the bitmap itself). If the allocation block size
4594 * is large enough though, we may need to increase this.
4595 */
4596 if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
4597 uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
4598 lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
4599
4600 /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
4601 summary_size = (uint32_t) lrg_size;
4602 }
4603
4604 /*
4605 * If the block size is the same as the IO Size, then the total number of blocks
4606 * is already equal to the number of IO units, which is our number of summary bits.
4607 */
4608
4609 summary_size_bytes = summary_size / kBitsPerByte;
4610 /* Always add one byte, just in case we have a dangling number of bits */
4611 summary_size_bytes++;
4612
4613 if (ALLOC_DEBUG) {
4614 printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, summary_size);
4615 printf("HFS Summary Table Size (in bytes) %d \n", summary_size_bytes);
4616 }
4617
4618 /* Store the field in the mount point, and then MALLOC/bzero the memory */
4619 hfsmp->hfs_summary_size = summary_size;
4620 hfsmp->hfs_summary_bytes = summary_size_bytes;
4621
4622 MALLOC (summary_table, uint8_t*, summary_size_bytes, M_TEMP, M_WAITOK);
4623 if (summary_table == NULL) {
4624 return ENOMEM;
4625 }
4626 bzero (summary_table, summary_size_bytes);
4627
4628 /* enable the summary table */
4629 hfsmp->hfs_flags |= HFS_SUMMARY_TABLE;
4630 hfsmp->hfs_summary_table = summary_table;
4631
4632 if (ALLOC_DEBUG) {
4633 if (hfsmp->hfs_summary_table == NULL) {
4634 panic ("HFS Summary Init: no table for %p\n", hfsmp);
4635 }
4636 }
4637 return 0;
4638 }
4639
4640 /*
4641 * hfs_rebuild_summary
4642 *
4643 * This function should be used to allocate a new hunk of memory for use as a summary
4644 * table, then copy the existing data into it. We use it whenever the filesystem's size
4645 * changes. When a resize is in progress, you can still use the extant summary
4646 * table if it is active.
4647 *
4648 * Inputs:
4649 * hfsmp -- FS in question
4650 * newlength -- new length of the FS in allocation blocks.
4651 *
4652 * Outputs:
4653 * 0 on success, EINVAL on failure. If this function fails, the summary table
4654 * will be disabled for future use.
4655 *
4656 */
4657 static int hfs_rebuild_summary (struct hfsmount *hfsmp) {
4658
4659 uint32_t new_summary_size;
4660
4661 new_summary_size = hfsmp->hfs_allocation_cp->c_blocks;
4662
4663
4664 if (ALLOC_DEBUG) {
4665 printf("HFS Summary Table Re-init: bitmap %u blocks\n", new_summary_size);
4666 }
4667
4668 /*
4669 * If the bitmap IO size is not the same as the allocation block size, then re-compute
4670 * the number of summary bits necessary. Note that above, the default size is the number
4671 * of allocation blocks in the bitmap *FILE* (not the number of bits that the bitmap manages).
4672 * If the allocation block size is large enough though, we may need to increase this, as
4673 * bitmap IO is capped at 64k per IO
4674 */
4675 if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
4676 uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
4677 lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
4678
4679 /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
4680 new_summary_size = (uint32_t) lrg_size;
4681 }
4682
4683 /*
4684 * Ok, we have the new summary bitmap theoretical max size. See if it's the same as
4685 * what we've got already...
4686 */
4687 if (new_summary_size != hfsmp->hfs_summary_size) {
4688 uint32_t summarybytes = new_summary_size / kBitsPerByte;
4689 uint32_t copysize;
4690 uint8_t *newtable;
4691 /* Add one byte for slop */
4692 summarybytes++;
4693
4694 if (ALLOC_DEBUG) {
4695 printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, new_summary_size);
4696 printf("HFS Summary Table Size (in bytes) %d \n", summarybytes);
4697 }
4698
4699 /* Attempt to MALLOC the memory */
4700 MALLOC (newtable, uint8_t*, summarybytes, M_TEMP, M_WAITOK);
4701 if (newtable == NULL) {
4702 /*
4703 * ERROR! We need to disable the table now
4704 */
4705 FREE (hfsmp->hfs_summary_table, M_TEMP);
4706 hfsmp->hfs_summary_table = NULL;
4707 hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE;
4708 return EINVAL;
4709 }
4710 bzero (newtable, summarybytes);
4711
4712 /*
4713 * The new table may be smaller than the old one. If this is true, then
4714 * we can't copy the full size of the existing summary table into the new
4715 * one.
4716 *
4717 * The converse is not an issue since we bzeroed the table above.
4718 */
4719 copysize = hfsmp->hfs_summary_bytes;
4720 if (summarybytes < hfsmp->hfs_summary_bytes) {
4721 copysize = summarybytes;
4722 }
4723 memcpy (newtable, hfsmp->hfs_summary_table, copysize);
4724
4725 /* We're all good. Destroy the old copy and update ptrs */
4726 FREE (hfsmp->hfs_summary_table, M_TEMP);
4727
4728 hfsmp->hfs_summary_table = newtable;
4729 hfsmp->hfs_summary_size = new_summary_size;
4730 hfsmp->hfs_summary_bytes = summarybytes;
4731 }
4732
4733 return 0;
4734 }
4735
4736
4737 #if ALLOC_DEBUG
4738 /*
4739 * hfs_validate_summary
4740 *
4741 * Validation routine for the summary table. Debug-only function.
4742 *
4743 * Bitmap lock must be held.
4744 *
4745 */
4746 void hfs_validate_summary (struct hfsmount *hfsmp) {
4747 uint32_t i;
4748 int err;
4749
4750 /*
4751 * Iterate over all of the bits in the summary table, and verify if
4752 * there really are free blocks in the pages that we believe may
4753 * may contain free blocks.
4754 */
4755
4756 if (hfsmp->hfs_summary_table == NULL) {
4757 panic ("HFS Summary: No HFS summary table!");
4758 }
4759
4760 /* 131072 bits == 16384 bytes. This is the theoretical max size of the summary table. we add 1 byte for slop */
4761 if (hfsmp->hfs_summary_size == 0 || hfsmp->hfs_summary_size > 131080) {
4762 panic("HFS Summary: Size is bad! %d", hfsmp->hfs_summary_size);
4763 }
4764
4765 if (hfsmp->vcbVBMIOSize == 0) {
4766 panic("HFS Summary: no VCB VBM IO Size !");
4767 }
4768
4769 printf("hfs: summary validation beginning on %s\n", hfsmp->vcbVN);
4770 printf("hfs: summary validation %d summary bits, %d summary blocks\n", hfsmp->hfs_summary_size, hfsmp->totalBlocks);
4771
4772
4773 /* iterate through all possible summary bits */
4774 for (i = 0; i < hfsmp->hfs_summary_size ; i++) {
4775
4776 uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4777 uint32_t byte_offset = hfsmp->vcbVBMIOSize * i;
4778
4779 /* Compute the corresponding allocation block for the summary bit. */
4780 uint32_t alloc_block = i * bits_per_iosize;
4781
4782 /*
4783 * We use a uint32_t pointer here because it will speed up
4784 * access to the real bitmap data on disk.
4785 */
4786 uint32_t *block_data;
4787 struct buf *bp;
4788 int counter;
4789 int counter_max;
4790 int saw_free_bits = 0;
4791
4792 /* Get the block */
4793 if ((err = ReadBitmapRange (hfsmp, byte_offset, hfsmp->vcbVBMIOSize, &block_data, &bp))) {
4794 panic ("HFS Summary: error (%d) in ReadBitmapRange!", err);
4795 }
4796
4797 /* Query the status of the bit and then make sure we match */
4798 uint32_t maybe_has_free_blocks;
4799 err = hfs_check_summary (hfsmp, alloc_block, &maybe_has_free_blocks);
4800 if (err) {
4801 panic ("HFS Summary: hfs_check_summary returned error (%d) ", err);
4802 }
4803 counter_max = hfsmp->vcbVBMIOSize / kBytesPerWord;
4804
4805 for (counter = 0; counter < counter_max; counter++) {
4806 uint32_t word = block_data[counter];
4807
4808 /* We assume that we'll not find any free bits here. */
4809 if (word != kAllBitsSetInWord) {
4810 if (maybe_has_free_blocks) {
4811 /* All done */
4812 saw_free_bits = 1;
4813 break;
4814 }
4815 else {
4816 panic ("HFS Summary: hfs_check_summary saw free bits!");
4817 }
4818 }
4819 }
4820
4821 if (maybe_has_free_blocks && (saw_free_bits == 0)) {
4822 panic ("HFS Summary: did not see free bits !");
4823 }
4824
4825 /* Release the block. */
4826 if ((err = ReleaseScanBitmapRange (bp))) {
4827 panic ("HFS Summary: Error (%d) in ReleaseScanBitmapRange", err);
4828 }
4829 }
4830
4831 printf("hfs: summary validation completed successfully on %s\n", hfsmp->vcbVN);
4832
4833 return;
4834 }
4835 #endif
4836
4837 /*
4838 * hfs_alloc_scan_range:
4839 *
4840 * This function should be used to scan large ranges of the allocation bitmap
4841 * at one time. It makes two key assumptions:
4842 *
4843 * 1) Bitmap lock is held during the duration of the call (exclusive)
4844 * 2) There are no pages in the buffer cache for any of the bitmap
4845 * blocks that we may encounter. It *MUST* be completely empty.
4846 *
4847 * The expected use case is when we are scanning the bitmap in full while we are
4848 * still mounting the filesystem in order to issue TRIMs or build up the summary
4849 * table for the mount point. It should be done after any potential journal replays
4850 * are completed and their I/Os fully issued.
4851 *
4852 * The key reason for assumption (2) above is that this function will try to issue
4853 * I/O against the bitmap file in chunks as large a possible -- essentially as
4854 * much as the buffer layer will handle (1MB). Because the size of these I/Os
4855 * is larger than what would be expected during normal runtime we must invalidate
4856 * the buffers as soon as we are done with them so that they do not persist in
4857 * the buffer cache for other threads to find, as they'll typically be doing
4858 * allocation-block size I/Os instead.
4859 *
4860 * Input Args:
4861 * hfsmp - hfs mount data structure
4862 * startbit - allocation block # to start our scan. It must be aligned
4863 * on a vcbVBMIOsize boundary.
4864 * list - journal trim list data structure for issuing TRIMs
4865 *
4866 * Output Args:
4867 * bitToScan - Return the next bit to scan if this function is called again.
4868 * Caller will supply this into the next invocation
4869 * of this call as 'startbit'.
4870 */
4871
4872 static int hfs_alloc_scan_range(struct hfsmount *hfsmp, u_int32_t startbit,
4873 u_int32_t *bitToScan, struct jnl_trim_list *list) {
4874
4875 int error;
4876 int readwrite = 1;
4877 u_int32_t curAllocBlock;
4878 struct buf *blockRef = NULL;
4879 u_int32_t *buffer = NULL;
4880 u_int32_t free_offset = 0; //tracks the start of the current free range
4881 u_int32_t size = 0; // tracks the length of the current free range.
4882 u_int32_t iosize = 0; //how much io we should generate against the bitmap
4883 u_int32_t byte_off; // byte offset into the bitmap file.
4884 u_int32_t completed_size; // how much io was actually completed
4885 u_int32_t last_bitmap_block;
4886 u_int32_t current_word;
4887 u_int32_t word_index = 0;
4888
4889 /* summary table building */
4890 uint32_t summary_bit = 0;
4891 uint32_t saw_free_blocks = 0;
4892 uint32_t last_marked = 0;
4893
4894 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
4895 readwrite = 0;
4896 }
4897
4898 /*
4899 * Compute how much I/O we should generate here.
4900 * hfs_scan_range_size will validate that the start bit
4901 * converted into a byte offset into the bitmap file,
4902 * is aligned on a VBMIOSize boundary.
4903 */
4904 error = hfs_scan_range_size (hfsmp, startbit, &iosize);
4905 if (error) {
4906 if (ALLOC_DEBUG) {
4907 panic ("hfs_alloc_scan_range: hfs_scan_range_size error %d\n", error);
4908 }
4909 return error;
4910 }
4911
4912 if (iosize < hfsmp->vcbVBMIOSize) {
4913 if (ALLOC_DEBUG) {
4914 panic ("hfs_alloc_scan_range: iosize too small! (iosize %d)\n", iosize);
4915 }
4916 return EINVAL;
4917 }
4918
4919 /* hfs_scan_range_size should have verified startbit. Convert it to bytes */
4920 byte_off = startbit / kBitsPerByte;
4921
4922 /*
4923 * When the journal replays blocks, it does so by writing directly to the disk
4924 * device (bypassing any filesystem vnodes and such). When it finishes its I/Os
4925 * it also immediately re-reads and invalidates the range covered by the bp so
4926 * it does not leave anything lingering in the cache (for iosize reasons).
4927 *
4928 * As such, it is safe to do large I/Os here with ReadBitmapRange.
4929 *
4930 * NOTE: It is not recommended, but it is possible to call the function below
4931 * on sections of the bitmap that may be in core already as long as the pages are not
4932 * dirty. In that case, we'd notice that something starting at that
4933 * logical block of the bitmap exists in the metadata cache, and we'd check
4934 * if the iosize requested is the same as what was already allocated for it.
4935 * Odds are pretty good we're going to request something larger. In that case,
4936 * we just free the existing memory associated with the buf and reallocate a
4937 * larger range. This function should immediately invalidate it as soon as we're
4938 * done scanning, so this shouldn't cause any coherency issues.
4939 */
4940
4941 error = ReadBitmapRange(hfsmp, byte_off, iosize, &buffer, &blockRef);
4942 if (error) {
4943 if (ALLOC_DEBUG) {
4944 panic ("hfs_alloc_scan_range: start %d iosize %d ReadBitmapRange error %d\n", startbit, iosize, error);
4945 }
4946 return error;
4947 }
4948
4949 /*
4950 * At this point, we have a giant wired buffer that represents some portion of
4951 * the bitmap file that we want to analyze. We may not have gotten all 'iosize'
4952 * bytes though, so clip our ending bit to what we actually read in.
4953 */
4954 completed_size = buf_count(blockRef);
4955 last_bitmap_block = completed_size * kBitsPerByte;
4956 last_bitmap_block = last_bitmap_block + startbit;
4957
4958 /* Cap the last block to the total number of blocks if required */
4959 if (last_bitmap_block > hfsmp->totalBlocks) {
4960 last_bitmap_block = hfsmp->totalBlocks;
4961 }
4962
4963 /* curAllocBlock represents the logical block we're analyzing. */
4964 curAllocBlock = startbit;
4965 word_index = 0;
4966 size = 0;
4967
4968 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4969 if (hfs_get_summary_index (hfsmp, startbit, &summary_bit)) {
4970 error = EINVAL;
4971 if (ALLOC_DEBUG) {
4972 panic ("hfs_alloc_scan_range: Could not acquire summary index for %u", startbit);
4973 }
4974 return error;
4975 }
4976 /*
4977 * summary_bit should now be set to the summary bit corresponding to
4978 * the allocation block of the first bit that we're supposed to scan
4979 */
4980 }
4981 saw_free_blocks = 0;
4982
4983 while (curAllocBlock < last_bitmap_block) {
4984 u_int32_t bit;
4985
4986 /* Update the summary table as needed */
4987 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4988 if (ALLOC_DEBUG) {
4989 if (hfsmp->hfs_summary_table == NULL) {
4990 panic ("hfs_alloc_scan_range: no summary table!");
4991 }
4992 }
4993
4994 uint32_t temp_summary;
4995 error = hfs_get_summary_index (hfsmp, curAllocBlock, &temp_summary);
4996 if (error) {
4997 if (ALLOC_DEBUG) {
4998 panic ("hfs_alloc_scan_range: could not get summary index for %u", curAllocBlock);
4999 }
5000 return EINVAL;
5001 }
5002
5003 if (ALLOC_DEBUG) {
5004 if (temp_summary < summary_bit) {
5005 panic ("hfs_alloc_scan_range: backwards summary bit?\n");
5006 }
5007 }
5008
5009 /*
5010 * If temp_summary is greater than summary_bit, then this
5011 * means that the next allocation block crosses a vcbVBMIOSize boundary
5012 * and we should treat this range of on-disk data as part of a new summary
5013 * bit.
5014 */
5015 if (temp_summary > summary_bit) {
5016 if (saw_free_blocks == 0) {
5017 /* Mark the bit as totally consumed in the summary table */
5018 hfs_set_summary (hfsmp, summary_bit, 1);
5019 }
5020 else {
5021 /* Mark the bit as potentially free in summary table */
5022 hfs_set_summary (hfsmp, summary_bit, 0);
5023 }
5024 last_marked = summary_bit;
5025 /*
5026 * Any time we set the summary table, update our counter which tracks
5027 * what the last bit that was fully marked in the summary table.
5028 *
5029 * Then reset our marker which says we haven't seen a free bit yet.
5030 */
5031 saw_free_blocks = 0;
5032 summary_bit = temp_summary;
5033 }
5034 } /* End summary table conditions */
5035
5036 current_word = SWAP_BE32(buffer[word_index]);
5037 /* Iterate through the word 1 bit at a time... */
5038 for (bit = 0 ; bit < kBitsPerWord ; bit++, curAllocBlock++) {
5039 if (curAllocBlock >= last_bitmap_block) {
5040 break;
5041 }
5042 u_int32_t allocated = (current_word & (kHighBitInWordMask >> bit));
5043
5044 if (allocated) {
5045 if (size != 0) {
5046 if (readwrite) {
5047 /* Insert the previously tracked range of free blocks to the trim list */
5048 hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
5049 }
5050 add_free_extent_cache (hfsmp, free_offset, size);
5051 size = 0;
5052 free_offset = 0;
5053 }
5054 }
5055 else {
5056 /* Not allocated */
5057 size++;
5058 if (free_offset == 0) {
5059 /* Start a new run of free spcae at curAllocBlock */
5060 free_offset = curAllocBlock;
5061 }
5062 if (saw_free_blocks == 0) {
5063 saw_free_blocks = 1;
5064 }
5065 }
5066 } /* end for loop iterating through the word */
5067
5068 if (curAllocBlock < last_bitmap_block) {
5069 word_index++;
5070 }
5071
5072 } /* End while loop (iterates through last_bitmap_block) */
5073
5074
5075 /*
5076 * We've (potentially) completed our pass through this region of bitmap,
5077 * but one thing we may not have done is updated that last summary bit for
5078 * the last page we scanned, because we would have never transitioned across
5079 * a vcbVBMIOSize boundary again. Check for that and update the last bit
5080 * as needed.
5081 *
5082 * Note that 'last_bitmap_block' is *not* inclusive WRT the very last bit in the bitmap
5083 * for the region of bitmap on-disk that we were scanning. (it is one greater).
5084 */
5085 if ((curAllocBlock >= last_bitmap_block) &&
5086 (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
5087 uint32_t temp_summary;
5088 /* temp_block should be INSIDE the region we just scanned, so subtract 1 */
5089 uint32_t temp_block = last_bitmap_block - 1;
5090 error = hfs_get_summary_index (hfsmp, temp_block, &temp_summary);
5091 if (error) {
5092 if (ALLOC_DEBUG) {
5093 panic ("hfs_alloc_scan_range: end bit curAllocBlock %u, last_bitmap_block %u", curAllocBlock, last_bitmap_block);
5094 }
5095 return EINVAL;
5096 }
5097
5098 /* Did we already update this in the table? */
5099 if (temp_summary > last_marked) {
5100 if (saw_free_blocks == 0) {
5101 hfs_set_summary (hfsmp, temp_summary, 1);
5102 }
5103 else {
5104 hfs_set_summary (hfsmp, temp_summary, 0);
5105 }
5106 }
5107 }
5108
5109 /*
5110 * We may have been tracking a range of free blocks that hasn't been inserted yet.
5111 * Keep the logic for the TRIM and free extent separate from that of the summary
5112 * table management even though they are closely linked.
5113 */
5114 if (size != 0) {
5115 if (readwrite) {
5116 hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
5117 }
5118 add_free_extent_cache (hfsmp, free_offset, size);
5119 }
5120
5121 /*
5122 * curAllocBlock represents the next block we need to scan when we return
5123 * to this function.
5124 */
5125 *bitToScan = curAllocBlock;
5126 ReleaseScanBitmapRange(blockRef);
5127
5128 return 0;
5129
5130 }
5131
5132
5133
5134 /*
5135 * Compute the maximum I/O size to generate against the bitmap file
5136 * Will attempt to generate at LEAST VBMIOsize I/Os for interior ranges of the bitmap.
5137 *
5138 * Inputs:
5139 * hfsmp -- hfsmount to look at
5140 * bitmap_off -- bit offset into the bitmap file
5141 *
5142 * Outputs:
5143 * iosize -- iosize to generate.
5144 *
5145 * Returns:
5146 * 0 on success; EINVAL otherwise
5147 */
5148 static int hfs_scan_range_size (struct hfsmount *hfsmp, uint32_t bitmap_st, uint32_t *iosize) {
5149
5150 /*
5151 * The maximum bitmap size is 512MB regardless of ABN size, so we can get away
5152 * with 32 bit math in this function.
5153 */
5154
5155 uint32_t bitmap_len;
5156 uint32_t remaining_bitmap;
5157 uint32_t target_iosize;
5158 uint32_t bitmap_off;
5159
5160 /* Is this bit index not word aligned? If so, immediately fail. */
5161 if (bitmap_st % kBitsPerWord) {
5162 if (ALLOC_DEBUG) {
5163 panic ("hfs_scan_range_size unaligned start bit! bitmap_st %d \n", bitmap_st);
5164 }
5165 return EINVAL;
5166 }
5167
5168 /* bitmap_off is in bytes, not allocation blocks/bits */
5169 bitmap_off = bitmap_st / kBitsPerByte;
5170
5171 if ((hfsmp->totalBlocks <= bitmap_st) || (bitmap_off > (512 * 1024 * 1024))) {
5172 if (ALLOC_DEBUG) {
5173 panic ("hfs_scan_range_size: invalid start! bitmap_st %d, bitmap_off %d\n", bitmap_st, bitmap_off);
5174 }
5175 return EINVAL;
5176 }
5177
5178 /*
5179 * Also invalid if it's not at least aligned to HFS bitmap logical
5180 * block boundaries. We don't have to emit an iosize that's an
5181 * exact multiple of the VBMIOSize, but it must start on such
5182 * a boundary.
5183 *
5184 * The vcbVBMIOSize may be SMALLER than the allocation block size
5185 * on a FS with giant allocation blocks, but it will never be
5186 * greater than it, so it should be safe to start I/O
5187 * aligned on a VBMIOsize boundary.
5188 */
5189 if (bitmap_off & (hfsmp->vcbVBMIOSize - 1)) {
5190 if (ALLOC_DEBUG) {
5191 panic ("hfs_scan_range_size: unaligned start! bitmap_off %d\n", bitmap_off);
5192 }
5193 return EINVAL;
5194 }
5195
5196 /*
5197 * Generate the total bitmap file length in bytes, then round up
5198 * that value to the end of the last allocation block, if needed (It
5199 * will probably be needed). We won't scan past the last actual
5200 * allocation block.
5201 *
5202 * Unless we're completing the bitmap scan (or bitmap < 1MB), we
5203 * have to complete the I/O on VBMIOSize boundaries, but we can only read
5204 * up until the end of the bitmap file.
5205 */
5206 bitmap_len = roundup(hfsmp->totalBlocks, hfsmp->blockSize * 8) / 8;
5207
5208 remaining_bitmap = bitmap_len - bitmap_off;
5209
5210 /*
5211 * io size is the MIN of the maximum I/O we can generate or the
5212 * remaining amount of bitmap.
5213 */
5214 target_iosize = MIN((MAXBSIZE), remaining_bitmap);
5215 *iosize = target_iosize;
5216
5217 return 0;
5218 }
5219
5220
5221
5222
5223 /*
5224 * This function is basically the same as hfs_isallocated, except it's designed for
5225 * use with the red-black tree validation code. It assumes we're only checking whether
5226 * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
5227 * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
5228 *
5229 * This should not be called in general purpose scanning code.
5230 */
5231 int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
5232
5233 u_int32_t *currentWord; // Pointer to current word within bitmap block
5234 u_int32_t bitMask; // Word with given bits already set (ready to test)
5235 u_int32_t firstBit; // Bit index within word of first bit to allocate
5236 u_int32_t numBits; // Number of bits in word to allocate
5237 u_int32_t bitsPerBlock;
5238 uintptr_t blockRef = 0;
5239 u_int32_t wordsPerBlock;
5240 u_int32_t numBlocks = 1;
5241 u_int32_t *buffer = NULL;
5242
5243 int inuse = 0;
5244 int error;
5245
5246
5247 if (bp_buf) {
5248 /* just use passed-in buffer if avail. */
5249 buffer = bp_buf;
5250 }
5251 else {
5252 /*
5253 * Pre-read the bitmap block containing the first word of allocation
5254 */
5255 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
5256 HFS_ALLOC_IGNORE_TENTATIVE);
5257 if (error)
5258 return (error);
5259 }
5260
5261 /*
5262 * Initialize currentWord, and wordsLeft.
5263 */
5264 u_int32_t wordIndexInBlock;
5265
5266 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
5267 wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
5268
5269 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
5270 currentWord = buffer + wordIndexInBlock;
5271
5272 /*
5273 * First test any non word aligned bits.
5274 */
5275 firstBit = startingBlock % kBitsPerWord;
5276 bitMask = kAllBitsSetInWord >> firstBit;
5277 numBits = kBitsPerWord - firstBit;
5278 if (numBits > numBlocks) {
5279 numBits = numBlocks;
5280 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
5281 }
5282 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
5283 inuse = 1;
5284 goto Exit;
5285 }
5286 numBlocks -= numBits;
5287 ++currentWord;
5288
5289 Exit:
5290 if(bp_buf == NULL) {
5291 if (buffer) {
5292 (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
5293 }
5294 }
5295 return (inuse);
5296
5297
5298
5299 }
5300
5301 /*
5302 * This function resets all of the data structures relevant to the
5303 * free extent cache stored in the hfsmount struct.
5304 *
5305 * If we are using the red-black tree code then we need to account for the fact that
5306 * we may encounter situations where we need to jettison the tree. If that is the
5307 * case, then we fail-over to the bitmap scanning logic, but we need to ensure that
5308 * the free ext cache is zeroed before we start using it.
5309 *
5310 * We also reset and disable the cache when allocLimit is updated... which
5311 * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()).
5312 * It is independent of the type of allocator being used currently.
5313 */
5314 void ResetVCBFreeExtCache(struct hfsmount *hfsmp)
5315 {
5316 int bytes;
5317 void *freeExt;
5318
5319 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5320 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0);
5321
5322 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5323
5324 /* reset Free Extent Count */
5325 hfsmp->vcbFreeExtCnt = 0;
5326
5327 /* reset the actual array */
5328 bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor);
5329 freeExt = (void*)(hfsmp->vcbFreeExt);
5330
5331 bzero (freeExt, bytes);
5332
5333 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5334
5335 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5336 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0);
5337
5338 return;
5339 }
5340
5341 /*
5342 * This function is used to inform the allocator if we have to effectively shrink
5343 * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs.
5344 *
5345 * The bitmap lock must be held when calling this function. This function also modifies the
5346 * allocLimit field in the hfs mount point structure in the general case.
5347 *
5348 * In the shrinking case, we'll have to remove all free extents from the red-black
5349 * tree past the specified offset new_end_block. In the growth case, we'll have to force
5350 * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
5351 *
5352 * new_end_block represents the total number of blocks available for allocation in the resized
5353 * filesystem. Block #new_end_block should not be allocatable in the resized filesystem since it
5354 * will be out of the (0, n-1) range that are indexable in the bitmap.
5355 *
5356 * Returns 0 on success
5357 * errno on failure
5358 */
5359 __private_extern__
5360 u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
5361
5362 /*
5363 * Update allocLimit to the argument specified
5364 */
5365 hfsmp->allocLimit = new_end_block;
5366
5367 /* Invalidate the free extent cache completely so that
5368 * it does not have any extents beyond end of current
5369 * volume.
5370 */
5371 ResetVCBFreeExtCache(hfsmp);
5372
5373 /* Force a rebuild of the summary table. */
5374 (void) hfs_rebuild_summary (hfsmp);
5375
5376 // Delete any tentative ranges that are in the area we're shrinking
5377 struct rl_entry *range, *next_range;
5378 TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
5379 rl_link, next_range) {
5380 if (rl_overlap(range, new_end_block, RL_INFINITY) != RL_NOOVERLAP)
5381 hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
5382 }
5383
5384 return 0;
5385 }
5386
5387 /*
5388 * Remove an extent from the list of free extents.
5389 *
5390 * This is a low-level routine. It does not handle overlaps or splitting;
5391 * that is the responsibility of the caller. The input extent must exactly
5392 * match an extent already in the list; it will be removed, and any following
5393 * extents in the list will be shifted up.
5394 *
5395 * Inputs:
5396 * startBlock - Start of extent to remove
5397 * blockCount - Number of blocks in extent to remove
5398 *
5399 * Result:
5400 * The index of the extent that was removed.
5401 */
5402 static void remove_free_extent_list(struct hfsmount *hfsmp, int index)
5403 {
5404 if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) {
5405 if (ALLOC_DEBUG)
5406 panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
5407 else
5408 printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
5409 return;
5410 }
5411 int shift_count = hfsmp->vcbFreeExtCnt - index - 1;
5412 if (shift_count > 0) {
5413 memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
5414 }
5415 hfsmp->vcbFreeExtCnt--;
5416 }
5417
5418
5419 /*
5420 * Add an extent to the list of free extents.
5421 *
5422 * This is a low-level routine. It does not handle overlaps or coalescing;
5423 * that is the responsibility of the caller. This routine *does* make
5424 * sure that the extent it is adding is inserted in the correct location.
5425 * If the list is full, this routine will handle either removing the last
5426 * extent in the list to make room for the new extent, or ignoring the
5427 * new extent if it is "worse" than the last extent in the list.
5428 *
5429 * Inputs:
5430 * startBlock - Start of extent to add
5431 * blockCount - Number of blocks in extent to add
5432 *
5433 * Result:
5434 * The index where the extent that was inserted, or kMaxFreeExtents
5435 * if the extent was not inserted (the list was full, and the extent
5436 * being added was "worse" than everything in the list).
5437 */
5438 static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
5439 {
5440 uint32_t i;
5441
5442 /* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */
5443 if (ALLOC_DEBUG) {
5444 uint32_t endBlock = startBlock + blockCount;
5445 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
5446 if (endBlock < hfsmp->vcbFreeExt[i].startBlock ||
5447 startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) {
5448 continue;
5449 }
5450 panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d",
5451 hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i);
5452 }
5453 }
5454
5455 /* Figure out what index the new extent should be inserted at. */
5456 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
5457 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
5458 /* The list is sorted by increasing offset. */
5459 if (startBlock < hfsmp->vcbFreeExt[i].startBlock) {
5460 break;
5461 }
5462 } else {
5463 /* The list is sorted by decreasing size. */
5464 if (blockCount > hfsmp->vcbFreeExt[i].blockCount) {
5465 break;
5466 }
5467 }
5468 }
5469
5470 /* When we get here, i is the index where the extent should be inserted. */
5471 if (i == kMaxFreeExtents) {
5472 /*
5473 * The new extent is worse than anything already in the list,
5474 * and the list is full, so just ignore the extent to be added.
5475 */
5476 return i;
5477 }
5478
5479 /*
5480 * Grow the list (if possible) to make room for an insert.
5481 */
5482 if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents)
5483 hfsmp->vcbFreeExtCnt++;
5484
5485 /*
5486 * If we'll be keeping any extents after the insert position, then shift them.
5487 */
5488 int shift_count = hfsmp->vcbFreeExtCnt - i - 1;
5489 if (shift_count > 0) {
5490 memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
5491 }
5492
5493 /* Finally, store the new extent at its correct position. */
5494 hfsmp->vcbFreeExt[i].startBlock = startBlock;
5495 hfsmp->vcbFreeExt[i].blockCount = blockCount;
5496 return i;
5497 }
5498
5499
5500 /*
5501 * Remove an entry from free extent cache after it has been allocated.
5502 *
5503 * This is a high-level routine. It handles removing a portion of a
5504 * cached extent, potentially splitting it into two (if the cache was
5505 * already full, throwing away the extent that would sort last). It
5506 * also handles removing an extent that overlaps multiple extents in
5507 * the cache.
5508 *
5509 * Inputs:
5510 * hfsmp - mount point structure
5511 * startBlock - starting block of the extent to be removed.
5512 * blockCount - number of blocks of the extent to be removed.
5513 */
5514 static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
5515 {
5516 u_int32_t i, insertedIndex;
5517 u_int32_t currentStart, currentEnd, endBlock;
5518 int extentsRemoved = 0;
5519
5520 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5521 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
5522
5523 endBlock = startBlock + blockCount;
5524
5525 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5526
5527 /*
5528 * Iterate over all of the extents in the free extent cache, removing or
5529 * updating any entries that overlap with the input extent.
5530 */
5531 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
5532 currentStart = hfsmp->vcbFreeExt[i].startBlock;
5533 currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount;
5534
5535 /*
5536 * If the current extent is entirely before or entirely after the
5537 * the extent to be removed, then we keep it as-is.
5538 */
5539 if (currentEnd <= startBlock || currentStart >= endBlock) {
5540 continue;
5541 }
5542
5543 /*
5544 * If the extent being removed entirely contains the current extent,
5545 * then remove the current extent.
5546 */
5547 if (startBlock <= currentStart && endBlock >= currentEnd) {
5548 remove_free_extent_list(hfsmp, i);
5549
5550 /*
5551 * We just removed the extent at index i. The extent at
5552 * index i+1 just got shifted to index i. So decrement i
5553 * to undo the loop's "++i", and the next iteration will
5554 * examine index i again, which contains the next extent
5555 * in the list.
5556 */
5557 --i;
5558 ++extentsRemoved;
5559 continue;
5560 }
5561
5562 /*
5563 * If the extent being removed is strictly "in the middle" of the
5564 * current extent, then we need to split the current extent into
5565 * two discontiguous extents (the "head" and "tail"). The good
5566 * news is that we don't need to examine any other extents in
5567 * the list.
5568 */
5569 if (startBlock > currentStart && endBlock < currentEnd) {
5570 remove_free_extent_list(hfsmp, i);
5571 add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
5572 add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
5573 break;
5574 }
5575
5576 /*
5577 * The only remaining possibility is that the extent to be removed
5578 * overlaps the start or end (but not both!) of the current extent.
5579 * So we need to replace the current extent with a shorter one.
5580 *
5581 * The only tricky part is that the updated extent might be at a
5582 * different index than the original extent. If the updated extent
5583 * was inserted after the current extent, then we need to re-examine
5584 * the entry at index i, since it now contains the extent that was
5585 * previously at index i+1. If the updated extent was inserted
5586 * before or at the same index as the removed extent, then the
5587 * following extents haven't changed position.
5588 */
5589 remove_free_extent_list(hfsmp, i);
5590 if (startBlock > currentStart) {
5591 /* Remove the tail of the current extent. */
5592 insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
5593 } else {
5594 /* Remove the head of the current extent. */
5595 insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
5596 }
5597 if (insertedIndex > i) {
5598 --i; /* Undo the "++i" in the loop, so we examine the entry at index i again. */
5599 }
5600 }
5601
5602 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5603
5604 sanity_check_free_ext(hfsmp, 0);
5605
5606 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5607 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);
5608
5609 return;
5610 }
5611
5612
5613 /*
5614 * Add an entry to free extent cache after it has been deallocated.
5615 *
5616 * This is a high-level routine. It will merge overlapping or contiguous
5617 * extents into a single, larger extent.
5618 *
5619 * If the extent provided has blocks beyond current allocLimit, it is
5620 * clipped to allocLimit (so that we won't accidentally find and allocate
5621 * space beyond allocLimit).
5622 *
5623 * Inputs:
5624 * hfsmp - mount point structure
5625 * startBlock - starting block of the extent to be removed.
5626 * blockCount - number of blocks of the extent to be removed.
5627 *
5628 * Returns:
5629 * true - if the extent was added successfully to the list
5630 * false - if the extent was not added to the list, maybe because
5631 * the extent was beyond allocLimit, or is not best
5632 * candidate to be put in the cache.
5633 */
5634 static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
5635 {
5636 Boolean retval = false;
5637 uint32_t endBlock;
5638 uint32_t currentEnd;
5639 uint32_t i;
5640
5641 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5642 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
5643
5644 #if DEBUG
5645 for (i = 0; i < 2; ++i) {
5646 struct rl_entry *range;
5647 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
5648 assert(rl_overlap(range, startBlock,
5649 startBlock + blockCount - 1) == RL_NOOVERLAP);
5650 }
5651 }
5652 #endif
5653
5654 /* No need to add extent that is beyond current allocLimit */
5655 if (startBlock >= hfsmp->allocLimit) {
5656 goto out_not_locked;
5657 }
5658
5659 /* If end of the free extent is beyond current allocLimit, clip the extent */
5660 if ((startBlock + blockCount) > hfsmp->allocLimit) {
5661 blockCount = hfsmp->allocLimit - startBlock;
5662 }
5663
5664 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5665
5666 /*
5667 * Make a pass through the free extent cache, looking for known extents that
5668 * overlap or are contiguous with the extent to be added. We'll remove those
5669 * extents from the cache, and incorporate them into the new extent to be added.
5670 */
5671 endBlock = startBlock + blockCount;
5672 for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) {
5673 currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount;
5674 if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) {
5675 /* Extent i does not overlap and is not contiguous, so keep it. */
5676 continue;
5677 } else {
5678 /* We need to remove extent i and combine it with the input extent. */
5679 if (hfsmp->vcbFreeExt[i].startBlock < startBlock)
5680 startBlock = hfsmp->vcbFreeExt[i].startBlock;
5681 if (currentEnd > endBlock)
5682 endBlock = currentEnd;
5683
5684 remove_free_extent_list(hfsmp, i);
5685 /*
5686 * We just removed the extent at index i. The extent at
5687 * index i+1 just got shifted to index i. So decrement i
5688 * to undo the loop's "++i", and the next iteration will
5689 * examine index i again, which contains the next extent
5690 * in the list.
5691 */
5692 --i;
5693 }
5694 }
5695 add_free_extent_list(hfsmp, startBlock, endBlock - startBlock);
5696
5697 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5698
5699 out_not_locked:
5700 sanity_check_free_ext(hfsmp, 0);
5701
5702 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5703 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);
5704
5705 return retval;
5706 }
5707
5708 /* Debug function to check if the free extent cache is good or not */
5709 static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
5710 {
5711 u_int32_t i, j;
5712
5713 /* Do not do anything if debug is not on */
5714 if (ALLOC_DEBUG == 0) {
5715 return;
5716 }
5717
5718 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5719
5720 if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents)
5721 panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt);
5722
5723 /*
5724 * Iterate the Free extent cache and ensure no entries are bogus or refer to
5725 * allocated blocks.
5726 */
5727 for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
5728 u_int32_t start, nblocks;
5729
5730 start = hfsmp->vcbFreeExt[i].startBlock;
5731 nblocks = hfsmp->vcbFreeExt[i].blockCount;
5732
5733 /* Check if any of the blocks in free extent cache are allocated.
5734 * This should not be enabled always because it might take
5735 * very long for large extents that get added to the list.
5736 *
5737 * We have to drop vcbFreeExtLock while we call hfs_isallocated
5738 * because it is going to do I/O. Note that the free extent
5739 * cache could change. That's a risk we take when using this
5740 * debugging code. (Another alternative would be to try to
5741 * detect when the free extent cache changed, and perhaps
5742 * restart if the list changed while we dropped the lock.)
5743 */
5744 if (check_allocated) {
5745 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5746 if (hfs_isallocated(hfsmp, start, nblocks)) {
5747 panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
5748 hfsmp, i, start, nblocks);
5749 }
5750 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5751 }
5752
5753 /* Check if any part of the extent is beyond allocLimit */
5754 if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
5755 panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
5756 hfsmp, i, start, nblocks, hfsmp->allocLimit);
5757 }
5758
5759 /* Check if there are any duplicate start blocks */
5760 for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
5761 if (start == hfsmp->vcbFreeExt[j].startBlock) {
5762 panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n",
5763 hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock,
5764 hfsmp->vcbFreeExt[j].blockCount);
5765 }
5766 }
5767
5768 /* Check if the entries are out of order */
5769 if ((i+1) != hfsmp->vcbFreeExtCnt) {
5770 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
5771 /* sparse devices are sorted by starting block number (ascending) */
5772 if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) {
5773 panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n",
5774 hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
5775 hfsmp->vcbFreeExt[i+1].blockCount);
5776 }
5777 } else {
5778 /* normally sorted by block count (descending) */
5779 if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
5780 panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n",
5781 hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
5782 hfsmp->vcbFreeExt[i+1].blockCount);
5783 }
5784 }
5785 }
5786 }
5787 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5788 }
5789
5790 #define BIT_RIGHT_MASK(bit) (0xffffffffffffffffull >> (bit))
5791 #define kHighBitInDoubleWordMask 0x8000000000000000ull
5792
5793 static int clzll(uint64_t x)
5794 {
5795 if (x == 0)
5796 return 64;
5797 else
5798 return __builtin_clzll(x);
5799 }
5800
5801 #if !HFS_ALLOC_TEST
5802
5803 static errno_t get_more_bits(bitmap_context_t *bitmap_ctx)
5804 {
5805 uint32_t start_bit;
5806 uint32_t iosize = 0;
5807 uint32_t byte_offset;
5808 uint32_t last_bitmap_block;
5809 int error;
5810 struct hfsmount *hfsmp = bitmap_ctx->hfsmp;
5811 #if !HFS_ALLOC_TEST
5812 uint64_t lock_elapsed;
5813 #endif
5814
5815
5816 if (bitmap_ctx->bp)
5817 ReleaseScanBitmapRange(bitmap_ctx->bp);
5818
5819 if (msleep(NULL, NULL, PINOD | PCATCH,
5820 "hfs_fsinfo", NULL) == EINTR) {
5821 return EINTR;
5822 }
5823
5824 #if !HFS_ALLOC_TEST
5825 /*
5826 * Let someone else use the allocation map after we've processed over HFS_FSINFO_MAX_LOCKHELD_TIME .
5827 * lock_start is initialized in hfs_find_free_extents().
5828 */
5829 absolutetime_to_nanoseconds(mach_absolute_time() - bitmap_ctx->lock_start, &lock_elapsed);
5830
5831 if (lock_elapsed >= HFS_FSINFO_MAX_LOCKHELD_TIME) {
5832
5833 hfs_systemfile_unlock(hfsmp, bitmap_ctx->lockflags);
5834
5835 /* add tsleep here to force context switch and fairness */
5836 tsleep((caddr_t)get_more_bits, PRIBIO, "hfs_fsinfo", 1);
5837
5838 hfs_journal_lock(hfsmp);
5839
5840 /* Flush the journal and wait for all I/Os to finish up */
5841 error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
5842 if (error) {
5843 hfs_journal_unlock(hfsmp);
5844 return error;
5845 }
5846
5847 /*
5848 * Take bitmap lock to ensure it is not being modified while journal is still held.
5849 * Since we are reading larger than normal blocks from the bitmap, which
5850 * might confuse other parts of the bitmap code using normal blocks, we
5851 * take exclusive lock here.
5852 */
5853 bitmap_ctx->lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5854
5855 bitmap_ctx->lock_start = mach_absolute_time();
5856
5857 /* Release the journal lock */
5858 hfs_journal_unlock(hfsmp);
5859
5860 /*
5861 * Bitmap is read in large block size (up to 1MB),
5862 * unlike the runtime which reads the bitmap in the
5863 * 4K block size. If the bitmap is read by both ways
5864 * at the same time, it can result in multiple buf_t with
5865 * different sizes and potentially case data corruption.
5866 * To avoid this, we invalidate all the existing buffers
5867 * associated with the bitmap vnode.
5868 */
5869 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
5870 if (error) {
5871 /* hfs_systemfile_unlock will be called in the caller */
5872 return error;
5873 }
5874 }
5875 #endif
5876
5877 start_bit = bitmap_ctx->run_offset;
5878
5879 if (start_bit >= bitmap_ctx->hfsmp->totalBlocks) {
5880 bitmap_ctx->chunk_end = 0;
5881 bitmap_ctx->bp = NULL;
5882 bitmap_ctx->bitmap = NULL;
5883 return 0;
5884 }
5885
5886 assert(start_bit % 8 == 0);
5887
5888 /*
5889 * Compute how much I/O we should generate here.
5890 * hfs_scan_range_size will validate that the start bit
5891 * converted into a byte offset into the bitmap file,
5892 * is aligned on a VBMIOSize boundary.
5893 */
5894 error = hfs_scan_range_size (bitmap_ctx->hfsmp, start_bit, &iosize);
5895 if (error)
5896 return error;
5897
5898 assert(iosize != 0);
5899
5900 /* hfs_scan_range_size should have verified startbit. Convert it to bytes */
5901 byte_offset = start_bit / kBitsPerByte;
5902
5903 /*
5904 * When the journal replays blocks, it does so by writing directly to the disk
5905 * device (bypassing any filesystem vnodes and such). When it finishes its I/Os
5906 * it also immediately re-reads and invalidates the range covered by the bp so
5907 * it does not leave anything lingering in the cache (for iosize reasons).
5908 *
5909 * As such, it is safe to do large I/Os here with ReadBitmapRange.
5910 *
5911 * NOTE: It is not recommended, but it is possible to call the function below
5912 * on sections of the bitmap that may be in core already as long as the pages are not
5913 * dirty. In that case, we'd notice that something starting at that
5914 * logical block of the bitmap exists in the metadata cache, and we'd check
5915 * if the iosize requested is the same as what was already allocated for it.
5916 * Odds are pretty good we're going to request something larger. In that case,
5917 * we just free the existing memory associated with the buf and reallocate a
5918 * larger range. This function should immediately invalidate it as soon as we're
5919 * done scanning, so this shouldn't cause any coherency issues.
5920 */
5921 error = ReadBitmapRange(bitmap_ctx->hfsmp, byte_offset, iosize, (uint32_t **)&bitmap_ctx->bitmap, &bitmap_ctx->bp);
5922 if (error)
5923 return error;
5924
5925 /*
5926 * At this point, we have a giant wired buffer that represents some portion of
5927 * the bitmap file that we want to analyze. We may not have gotten all 'iosize'
5928 * bytes though, so clip our ending bit to what we actually read in.
5929 */
5930 last_bitmap_block = start_bit + buf_count(bitmap_ctx->bp) * kBitsPerByte;
5931
5932 /* Cap the last block to the total number of blocks if required */
5933 if (last_bitmap_block > bitmap_ctx->hfsmp->totalBlocks)
5934 last_bitmap_block = bitmap_ctx->hfsmp->totalBlocks;
5935
5936 bitmap_ctx->chunk_current = 0; // new chunk of bitmap
5937 bitmap_ctx->chunk_end = last_bitmap_block - start_bit;
5938
5939 return 0;
5940 }
5941
5942 #endif // !HFS_ALLOC_TEST
5943
5944 // Returns number of contiguous bits set at start
5945 static int bit_count_set(void *bitmap, int start, int end)
5946 {
5947 if (start == end)
5948 return 0;
5949
5950 assert(end > start);
5951
5952 const int start_bit = start & 63;
5953 const int end_bit = end & 63;
5954
5955 uint64_t *p = (uint64_t *)bitmap + start / 64;
5956 uint64_t x = ~OSSwapBigToHostInt64(*p);
5957
5958 if ((start & ~63) == (end & ~63)) {
5959 // Start and end in same 64 bits
5960 x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit);
5961 return clzll(x) - start_bit;
5962 }
5963
5964 // Deal with initial unaligned bit
5965 x &= BIT_RIGHT_MASK(start_bit);
5966
5967 if (x)
5968 return clzll(x) - start_bit;
5969
5970 // Go fast
5971 ++p;
5972 int count = 64 - start_bit;
5973 int nquads = (end - end_bit - start - 1) / 64;
5974
5975 while (nquads--) {
5976 if (*p != 0xffffffffffffffffull) {
5977 x = ~OSSwapBigToHostInt64(*p);
5978 return count + clzll(x);
5979 }
5980 ++p;
5981 count += 64;
5982 }
5983
5984 if (end_bit) {
5985 x = ~OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit);
5986 count += clzll(x);
5987 }
5988
5989 return count;
5990 }
5991
5992 /* Returns the number of a run of cleared bits:
5993 * bitmap is a single chunk of memory being examined
5994 * start: the start bit relative to the current buffer to be examined; start is inclusive.
5995 * end: the end bit relative to the current buffer to be examined; end is not inclusive.
5996 */
5997 static int bit_count_clr(void *bitmap, int start, int end)
5998 {
5999 if (start == end)
6000 return 0;
6001
6002 assert(end > start);
6003
6004 const int start_bit = start & 63;
6005 const int end_bit = end & 63;
6006
6007 uint64_t *p = (uint64_t *)bitmap + start / 64;
6008 uint64_t x = OSSwapBigToHostInt64(*p);
6009
6010 if ((start & ~63) == (end & ~63)) {
6011 // Start and end in same 64 bits
6012 x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit);
6013
6014 return clzll(x) - start_bit;
6015 }
6016
6017 // Deal with initial unaligned bit
6018 x &= BIT_RIGHT_MASK(start_bit);
6019
6020 if (x)
6021 return clzll(x) - start_bit;
6022
6023 // Go fast
6024 ++p;
6025 int count = 64 - start_bit;
6026 int nquads = (end - end_bit - start - 1) / 64;
6027
6028 while (nquads--) {
6029 if (*p) {
6030 x = OSSwapBigToHostInt64(*p);
6031 return count + clzll(x);
6032 }
6033 ++p;
6034 count += 64;
6035 }
6036
6037 if (end_bit) {
6038 x = OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit);
6039
6040 count += clzll(x);
6041 }
6042
6043 return count;
6044 }
6045
6046 #if !HFS_ALLOC_TEST
6047 static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set)
6048 {
6049 uint32_t end, start_summary_bit, end_summary_bit;
6050 errno_t error = 0;
6051
6052 if (count == 0)
6053 goto out;
6054
6055 if (!ISSET(bitmap_ctx->hfsmp->hfs_flags, HFS_SUMMARY_TABLE))
6056 return 0;
6057
6058 if (hfs_get_summary_index (bitmap_ctx->hfsmp, start, &start_summary_bit)) {
6059 error = EINVAL;
6060 goto out;
6061 }
6062
6063 end = start + count - 1;
6064 if (hfs_get_summary_index (bitmap_ctx->hfsmp, end, &end_summary_bit)) {
6065 error = EINVAL;
6066 goto out;
6067 }
6068
6069 // if summary table bit has been updated with free block previously, leave it.
6070 if ((start_summary_bit == bitmap_ctx->last_free_summary_bit) && set)
6071 start_summary_bit++;
6072
6073 for (uint32_t summary_bit = start_summary_bit; summary_bit <= end_summary_bit; summary_bit++)
6074 hfs_set_summary (bitmap_ctx->hfsmp, summary_bit, set);
6075
6076 if (!set)
6077 bitmap_ctx->last_free_summary_bit = end_summary_bit;
6078
6079 out:
6080 return error;
6081
6082 }
6083 #endif //!HFS_ALLOC_TEST
6084
6085 /*
6086 * Read in chunks of the bitmap into memory, and find a run of cleared/set bits;
6087 * the run can extend across chunk boundaries.
6088 * bit_count_clr can be passed to get a run of cleared bits.
6089 * bit_count_set can be passed to get a run of set bits.
6090 */
6091 static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count)
6092 {
6093 int count;
6094 errno_t error = 0;
6095
6096 *bit_count = 0;
6097
6098 do {
6099 if (bitmap_ctx->run_offset == 0 || bitmap_ctx->chunk_current == bitmap_ctx->chunk_end) {
6100 if ((error = get_more_bits(bitmap_ctx)) != 0)
6101 goto out;
6102 }
6103
6104 if (bitmap_ctx->chunk_end == 0)
6105 break;
6106
6107 count = fn(bitmap_ctx->bitmap, bitmap_ctx->chunk_current, bitmap_ctx->chunk_end);
6108
6109 bitmap_ctx->run_offset += count;
6110 bitmap_ctx->chunk_current += count;
6111 *bit_count += count;
6112
6113 } while (bitmap_ctx->chunk_current >= bitmap_ctx->chunk_end && count);
6114
6115 out:
6116 return error;
6117
6118 }
6119
6120 // Returns count of number of bits clear
6121 static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count)
6122 {
6123 return hfs_bit_count(bitmap_ctx, bit_count_clr, count);
6124 }
6125
6126 // Returns count of number of bits set
6127 static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count)
6128 {
6129 return hfs_bit_count(bitmap_ctx, bit_count_set, count);
6130 }
6131
6132 static uint32_t hfs_bit_offset(bitmap_context_t *bitmap_ctx)
6133 {
6134 return bitmap_ctx->run_offset;
6135 }
6136
6137 /*
6138 * Perform a full scan of the bitmap file.
6139 * Note: during the scan of bitmap file, it may drop and reacquire the
6140 * bitmap lock to let someone else use the bitmap for fairness.
6141 * Currently it is used by HFS_GET_FSINFO statistic gathing, which
6142 * is run while other processes might perform HFS operations.
6143 */
6144
6145 errno_t hfs_find_free_extents(struct hfsmount *hfsmp,
6146 void (*callback)(void *data, off_t free_extent_size), void *callback_arg)
6147 {
6148 struct bitmap_context bitmap_ctx;
6149 uint32_t count;
6150 errno_t error = 0;
6151
6152 if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
6153 error = hfs_init_summary(hfsmp);
6154 if (error)
6155 return error;
6156 }
6157
6158 bzero(&bitmap_ctx, sizeof(struct bitmap_context));
6159
6160 /*
6161 * The journal maintains list of recently deallocated blocks to
6162 * issue DKIOCUNMAPs when the corresponding journal transaction is
6163 * flushed to the disk. To avoid any race conditions, we only
6164 * want one active trim list. Therefore we make sure that the
6165 * journal trim list is sync'ed, empty, and not modifiable for
6166 * the duration of our scan.
6167 *
6168 * Take the journal lock before flushing the journal to the disk.
6169 * We will keep on holding the journal lock till we don't get the
6170 * bitmap lock to make sure that no new journal transactions can
6171 * start. This will make sure that the journal trim list is not
6172 * modified after the journal flush and before getting bitmap lock.
6173 * We can release the journal lock after we acquire the bitmap
6174 * lock as it will prevent any further block deallocations.
6175 */
6176 hfs_journal_lock(hfsmp);
6177
6178 /* Flush the journal and wait for all I/Os to finish up */
6179 error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
6180 if (error) {
6181 hfs_journal_unlock(hfsmp);
6182 return error;
6183 }
6184
6185 /*
6186 * Take bitmap lock to ensure it is not being modified.
6187 * Since we are reading larger than normal blocks from the bitmap, which
6188 * might confuse other parts of the bitmap code using normal blocks, we
6189 * take exclusive lock here.
6190 */
6191 bitmap_ctx.lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
6192
6193 #if !HFS_ALLOC_TEST
6194 bitmap_ctx.lock_start = mach_absolute_time();
6195 #endif
6196
6197 /* Release the journal lock */
6198 hfs_journal_unlock(hfsmp);
6199
6200 /*
6201 * Bitmap is read in large block size (up to 1MB),
6202 * unlike the runtime which reads the bitmap in the
6203 * 4K block size. If the bitmap is read by both ways
6204 * at the same time, it can result in multiple buf_t with
6205 * different sizes and potentially case data corruption.
6206 * To avoid this, we invalidate all the existing buffers
6207 * associated with the bitmap vnode.
6208 */
6209 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
6210 if (error)
6211 goto out;
6212
6213 /*
6214 * Get the list of all free extent ranges. hfs_alloc_scan_range()
6215 * will call hfs_fsinfo_data_add() to account for all the free
6216 * extent ranges found during scan.
6217 */
6218 bitmap_ctx.hfsmp = hfsmp;
6219 bitmap_ctx.run_offset = 0;
6220
6221 while (bitmap_ctx.run_offset < hfsmp->totalBlocks) {
6222
6223 uint32_t start = hfs_bit_offset(&bitmap_ctx);
6224
6225 if ((error = hfs_bit_count_clr(&bitmap_ctx, &count)) != 0)
6226 goto out;
6227
6228 if (count)
6229 callback(callback_arg, hfs_blk_to_bytes(count, hfsmp->blockSize));
6230
6231 if ((error = update_summary_table(&bitmap_ctx, start, count, false)) != 0)
6232 goto out;
6233
6234 start = hfs_bit_offset(&bitmap_ctx);
6235
6236 if ((error = hfs_bit_count_set(&bitmap_ctx, &count)) != 0)
6237 goto out;
6238
6239 if ((error = update_summary_table(&bitmap_ctx, start, count, true)) != 0)
6240 goto out;
6241 }
6242
6243 out:
6244 if (bitmap_ctx.lockflags) {
6245 hfs_systemfile_unlock(hfsmp, bitmap_ctx.lockflags);
6246 }
6247
6248 return error;
6249 }
6250