]> git.saurik.com Git - apple/hfs.git/blame_incremental - livefiles_hfs_plugin/lf_hfs_volume_allocation.c
hfs-556.100.11.tar.gz
[apple/hfs.git] / livefiles_hfs_plugin / lf_hfs_volume_allocation.c
... / ...
CommitLineData
1/* Copyright © 2017-2018 Apple Inc. All rights reserved.
2*
3* lf_hfs_volume_allocation.c
4* livefiles_hfs
5*
6* Created by Or Haimovich on 22/3/18.
7*/
8
9#include <sys/disk.h>
10
11#include "lf_hfs_volume_allocation.h"
12#include "lf_hfs_logger.h"
13#include "lf_hfs_endian.h"
14#include "lf_hfs_format.h"
15#include "lf_hfs_locks.h"
16#include "lf_hfs_rangelist.h"
17#include "lf_hfs_raw_read_write.h"
18#include "lf_hfs_readwrite_ops.h"
19#include "lf_hfs_utils.h"
20#include "lf_hfs_journal.h"
21#include "lf_hfs_vfsutils.h"
22#include "lf_hfs_vfsops.h"
23#include "lf_hfs_generic_buf.h"
24
25#pragma clang diagnostic ignored "-Waddress-of-packed-member"
26
27enum {
28 /*
29 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
30 * BlockDeallocate, including the internal BlockAllocateXxx
31 * routines so we can see how an allocation was satisfied.
32 *
33 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
34 * free extent cache.
35 *
36 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
37 *
38 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
39 * or clearing bits, scanning the bitmap).
40 */
41 HFSDBG_ALLOC_ENABLED = 1,
42 HFSDBG_EXT_CACHE_ENABLED = 2,
43 HFSDBG_UNMAP_ENABLED = 4,
44 HFSDBG_BITMAP_ENABLED = 8
45};
46
47enum {
48 kBytesPerWord = 4,
49 kBitsPerByte = 8,
50 kBitsPerWord = 32,
51
52 kBitsWithinWordMask = kBitsPerWord-1
53};
54
55#define kLowBitInWordMask 0x00000001ul
56#define kHighBitInWordMask 0x80000000ul
57#define kAllBitsSetInWord 0xFFFFFFFFul
58
59#define HFS_MIN_SUMMARY_BLOCKSIZE 4096
60
61#define ALLOC_DEBUG 0
62
63static OSErr ReadBitmapBlock(
64 ExtendedVCB *vcb,
65 u_int32_t bit,
66 u_int32_t **buffer,
67 GenericLFBufPtr *blockRef,
68 hfs_block_alloc_flags_t flags);
69
70static OSErr ReleaseBitmapBlock(
71 ExtendedVCB *vcb,
72 GenericLFBufPtr blockRef,
73 Boolean dirty);
74
75static OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
76 HFSPlusExtentDescriptor *extent,
77 hfs_block_alloc_flags_t flags,
78 hfs_alloc_extra_args_t *ap);
79
80static OSErr BlockFindAny(
81 ExtendedVCB *vcb,
82 u_int32_t startingBlock,
83 u_int32_t endingBlock,
84 u_int32_t maxBlocks,
85 hfs_block_alloc_flags_t flags,
86 Boolean trustSummary,
87 u_int32_t *actualStartBlock,
88 u_int32_t *actualNumBlocks);
89
90static OSErr BlockFindAnyBitmap(
91 ExtendedVCB *vcb,
92 u_int32_t startingBlock,
93 u_int32_t endingBlock,
94 u_int32_t maxBlocks,
95 hfs_block_alloc_flags_t flags,
96 u_int32_t *actualStartBlock,
97 u_int32_t *actualNumBlocks);
98
99static OSErr BlockFindContig(
100 ExtendedVCB *vcb,
101 u_int32_t startingBlock,
102 u_int32_t minBlocks,
103 u_int32_t maxBlocks,
104 hfs_block_alloc_flags_t flags,
105 u_int32_t *actualStartBlock,
106 u_int32_t *actualNumBlocks);
107
108static OSErr BlockFindContiguous(
109 ExtendedVCB *vcb,
110 u_int32_t startingBlock,
111 u_int32_t endingBlock,
112 u_int32_t minBlocks,
113 u_int32_t maxBlocks,
114 Boolean useMetaZone,
115 Boolean trustSummary,
116 u_int32_t *actualStartBlock,
117 u_int32_t *actualNumBlocks,
118 hfs_block_alloc_flags_t flags);
119
120static OSErr BlockFindKnown(
121 ExtendedVCB *vcb,
122 u_int32_t maxBlocks,
123 u_int32_t *actualStartBlock,
124 u_int32_t *actualNumBlocks);
125
126static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
127 HFSPlusExtentDescriptor *extent,
128 uint32_t max_blocks,
129 hfs_block_alloc_flags_t flags);
130
131static OSErr BlockMarkAllocatedInternal (
132 ExtendedVCB *vcb,
133 u_int32_t startingBlock,
134 u_int32_t numBlocks,
135 hfs_block_alloc_flags_t flags);
136
137static OSErr BlockMarkFreeInternal(
138 ExtendedVCB *vcb,
139 u_int32_t startingBlock,
140 u_int32_t numBlocks,
141 Boolean do_validate);
142
143
144static OSErr ReadBitmapRange (struct hfsmount *hfsmp, uint32_t offset, uint32_t iosize,
145 uint32_t **buffer, GenericLFBuf **blockRef);
146
147static OSErr ReleaseScanBitmapRange( GenericLFBufPtr bp );
148
149static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset,
150 u_int32_t numBlocks, struct jnl_trim_list *list);
151
152static int hfs_alloc_scan_range(struct hfsmount *hfsmp,
153 u_int32_t startbit,
154 u_int32_t *bitToScan,
155 struct jnl_trim_list *list);
156
157static int hfs_scan_range_size (struct hfsmount* hfsmp, uint32_t start, uint32_t *iosize);
158/* Bitmap Re-use Detection */
159static inline int extents_overlap (uint32_t start1, uint32_t len1,
160 uint32_t start2, uint32_t len2) {
161 return !( ((start1 + len1) <= start2) || ((start2 + len2) <= start1) );
162}
163
164/* Summary Table Functions */
165static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse);
166static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t *index);
167static int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
168static int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t *alloc);
169static int hfs_release_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t length);
170static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t *freeblocks);
171
172/* Used in external mount code to initialize the summary table */
173int hfs_init_summary (struct hfsmount *hfsmp);
174
175#if ALLOC_DEBUG
176void hfs_validate_summary (struct hfsmount *hfsmp);
177#endif
178
179
180/* Functions for manipulating free extent cache */
181static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
182static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
183static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
184
185static void hfs_release_reserved(hfsmount_t *hfsmp, struct rl_entry *range, int list);
186
187
188#if ALLOC_DEBUG
189/*
190 * Validation Routine to verify that the TRIM list maintained by the journal
191 * is in good shape relative to what we think the bitmap should have. We should
192 * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
193 * we panic.
194 */
195int trim_validate_bitmap (struct hfsmount *hfsmp);
196int trim_validate_bitmap (struct hfsmount *hfsmp) {
197 u_int64_t blockno_offset;
198 u_int64_t numblocks;
199 int i;
200 int count;
201 u_int32_t startblk;
202 u_int32_t blks;
203 int err = 0;
204 uint32_t alloccount = 0;
205
206 if (hfsmp->jnl) {
207 struct journal *jnl = (struct journal*)hfsmp->jnl;
208 if (jnl->active_tr) {
209 struct jnl_trim_list *trim = &(jnl->active_tr->trim);
210 count = trim->extent_count;
211 for (i = 0; i < count; i++) {
212 blockno_offset = trim->extents[i].offset;
213 blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
214 blockno_offset = blockno_offset / hfsmp->blockSize;
215 numblocks = trim->extents[i].length / hfsmp->blockSize;
216
217 startblk = (u_int32_t)blockno_offset;
218 blks = (u_int32_t) numblocks;
219 err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);
220
221 if (err == 0 && alloccount != 0) {
222 LFHFS_LOG(LEVEL_ERROR, "trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
223 hfs_assert(0);
224 }
225 }
226 }
227 }
228 return 0;
229}
230
231#endif
232
233/*
234 ;________________________________________________________________________________
235 ;
236 ; Routine: hfs_issue_unmap
237 ;
238 ; Function: Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list
239 ;
240 ; Input Arguments:
241 ; hfsmp - The volume containing the allocation blocks.
242 ; list - The list of currently tracked trim ranges.
243 ;________________________________________________________________________________
244 */
245
246static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list)
247{
248 dk_unmap_t unmap;
249 int error = 0;
250
251 if (list->extent_count > 0 && list->extents != NULL) {
252 bzero(&unmap, sizeof(unmap));
253 unmap.extents = list->extents;
254 unmap.extentsCount = list->extent_count;
255
256 /* Issue a TRIM and flush them out */
257 error = ioctl(hfsmp->hfs_devvp->psFSRecord->iFD, DKIOCUNMAP, &unmap);
258
259 bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t)));
260 bzero (&unmap, sizeof(unmap));
261 list->extent_count = 0;
262 }
263
264 return error;
265}
266
267/*
268 ;________________________________________________________________________________
269 ;
270 ; Routine: hfs_track_unmap_blocks
271 ;
272 ; Function: Make note of a range of allocation blocks that should be
273 ; unmapped (trimmed). That is, the given range of blocks no
274 ; longer have useful content, and the device can unmap the
275 ; previous contents. For example, a solid state disk may reuse
276 ; the underlying storage for other blocks.
277 ;
278 ; This routine is only supported for journaled volumes.
279 ;
280 ; *****NOTE*****:
281 ; This function should *NOT* be used when the volume is fully
282 ; mounted. This function is intended to support a bitmap iteration
283 ; at mount time to fully inform the SSD driver of the state of all blocks
284 ; at mount time, and assumes that there is no allocation/deallocation
285 ; interference during its iteration.,
286 ;
287 ; Input Arguments:
288 ; hfsmp - The volume containing the allocation blocks.
289 ; offset - The first allocation block of the extent being freed.
290 ; numBlocks - The number of allocation blocks of the extent being freed.
291 ; list - The list of currently tracked trim ranges.
292 ;________________________________________________________________________________
293 */
294static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start, u_int32_t numBlocks, struct jnl_trim_list *list) {
295 u_int64_t offset;
296 u_int64_t length;
297 int error = 0;
298
299 if ((hfsmp->jnl != NULL))
300 {
301 if ((hfsmp->hfs_flags & HFS_UNMAP) && list->allocated_count && list->extents != NULL)
302 {
303
304 int extent_no = list->extent_count;
305 offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
306 length = (u_int64_t) numBlocks * hfsmp->blockSize;
307
308 list->extents[extent_no].offset = offset;
309 list->extents[extent_no].length = length;
310 list->extent_count++;
311 if (list->extent_count == list->allocated_count) {
312 error = hfs_issue_unmap (hfsmp, list);
313 }
314 }
315 }
316
317 return error;
318}
319
320/*
321 ;________________________________________________________________________________
322 ;
323 ; Routine: hfs_trim_callback
324 ;
325 ; Function: This function is called when a transaction that freed extents
326 ; (via hfs_unmap_free_extent/journal_trim_add_extent) has been
327 ; written to the on-disk journal. This routine will add those
328 ; extents to the free extent cache so that they can be reused.
329 ;
330 ; CAUTION: This routine is called while the journal's trim lock
331 ; is held shared, so that no other thread can reuse any portion
332 ; of those extents. We must be very careful about which locks
333 ; we take from within this callback, to avoid deadlock. The
334 ; call to add_free_extent_cache will end up taking the cache's
335 ; lock (just long enough to add these extents to the cache).
336 ;
337 ; CAUTION: If the journal becomes invalid (eg., due to an I/O
338 ; error when trying to write to the journal), this callback
339 ; will stop getting called, even if extents got freed before
340 ; the journal became invalid!
341 ;
342 ; Input Arguments:
343 ; arg - The hfsmount of the volume containing the extents.
344 ; extent_count - The number of extents freed in the transaction.
345 ; extents - An array of extents (byte ranges) that were freed.
346 ;________________________________________________________________________________
347 */
348
349void
350hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
351{
352 uint32_t i;
353 uint32_t startBlock, numBlocks;
354 struct hfsmount *hfsmp = arg;
355
356 for (i=0; i<extent_count; ++i) {
357 /* Convert the byte range in *extents back to a range of allocation blocks. */
358 startBlock = (uint32_t)((extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize);
359 numBlocks = (uint32_t)(extents[i].length / hfsmp->blockSize);
360 (void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
361 }
362}
363
364
365/*
366 ;________________________________________________________________________________
367 ;
368 ; Routine: ScanUnmapBlocks
369 ;
370 ; Function: Traverse the bitmap, and potentially issue DKIOCUNMAPs to the underlying
371 ; device as needed so that the underlying disk device is as
372 ; up-to-date as possible with which blocks are unmapped.
373 ; Additionally build up the summary table as needed.
374 ;
375 ; This function reads the bitmap in large block size
376 ; (up to 1MB) unlike the runtime which reads the bitmap
377 ; in 4K block size. So if this function is being called
378 ; after the volume is mounted and actively modified, the
379 ; caller needs to invalidate all of the existing buffers
380 ; associated with the bitmap vnode before calling this
381 ; function. If the buffers are not invalidated, it can
382 ; cause buf_t collision and potential data corruption.
383 ;
384 ; Input Arguments:
385 ; hfsmp - The volume containing the allocation blocks.
386 ;________________________________________________________________________________
387 */
388
389u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp)
390{
391 u_int32_t blocks_scanned = 0;
392 int error = 0;
393 struct jnl_trim_list trimlist;
394
395 /*
396 *struct jnl_trim_list {
397 uint32_t allocated_count;
398 uint32_t extent_count;
399 dk_extent_t *extents;
400 };
401 */
402 bzero (&trimlist, sizeof(trimlist));
403
404 /*
405 * Any trim related work should be tied to whether the underlying
406 * storage media supports UNMAP, as any solid state device would
407 * on desktop or embedded.
408 *
409 * We do this because we may want to scan the full bitmap on
410 * desktop for spinning media for the purposes of building up the
411 * summary table.
412 *
413 * We also avoid sending TRIMs down to the underlying media if the
414 * mount is read-only.
415 */
416
417 if ((hfsmp->hfs_flags & HFS_UNMAP) &&
418 ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
419 /* If the underlying device supports unmap and the mount is read-write, initialize */
420 int alloc_count = ((u_int32_t)PAGE_SIZE) / sizeof(dk_extent_t);
421 void *extents = hfs_malloc(alloc_count * sizeof(dk_extent_t));
422 trimlist.extents = (dk_extent_t*)extents;
423 trimlist.allocated_count = alloc_count;
424 trimlist.extent_count = 0;
425 }
426
427 while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){
428
429 error = hfs_alloc_scan_range (hfsmp, blocks_scanned, &blocks_scanned, &trimlist);
430
431 if (error) {
432 LFHFS_LOG(LEVEL_DEBUG, "ScanUnmapBlocks: bitmap scan range error: %d on vol=%s\n", error, hfsmp->vcbVN);
433 break;
434 }
435 }
436
437 if ((hfsmp->hfs_flags & HFS_UNMAP) && ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
438 if (error == 0) {
439 hfs_issue_unmap(hfsmp, &trimlist);
440 }
441 if (trimlist.extents) {
442 hfs_free(trimlist.extents);
443 }
444 }
445
446 /*
447 * This is in an #if block because hfs_validate_summary prototype and function body
448 * will only show up if ALLOC_DEBUG is on, to save wired memory ever so slightly.
449 */
450#if ALLOC_DEBUG
451 sanity_check_free_ext(hfsmp, 1);
452 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
453 /* Validate the summary table too! */
454 hfs_validate_summary(hfsmp);
455 LFHFS_LOG(LEVEL_DEBUG, "ScanUnmapBlocks: Summary validation complete on %s\n", hfsmp->vcbVN);
456 }
457#endif
458
459 return error;
460}
461
462static void add_to_reserved_list(hfsmount_t *hfsmp, uint32_t start,
463 uint32_t count, int list,
464 struct rl_entry **reservation)
465{
466 struct rl_entry *range, *next_range;
467
468 if (list == HFS_TENTATIVE_BLOCKS) {
469 int nranges = 0;
470 // Don't allow more than 4 tentative reservations
471 TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
472 rl_link, next_range) {
473 if (++nranges > 3)
474 hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
475 }
476 }
477
478 range = hfs_malloc(sizeof(*range));
479 range->rl_start = start;
480 range->rl_end = start + count - 1;
481 TAILQ_INSERT_HEAD(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
482 *reservation = range;
483}
484
485static void hfs_release_reserved(hfsmount_t *hfsmp,
486 struct rl_entry *range,
487 int list)
488{
489 if (range->rl_start == -1)
490 return;
491
492 TAILQ_REMOVE(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
493
494 if (rl_len(range) > 0) {
495 if (list == HFS_TENTATIVE_BLOCKS)
496 hfsmp->tentativeBlocks -= rl_len(range);
497 else {
498 hfs_assert(hfsmp->lockedBlocks >= rl_len(range));
499 hfsmp->lockedBlocks -= rl_len(range);
500 }
501 hfs_release_summary(hfsmp, (uint32_t)range->rl_start, (uint32_t)rl_len(range));
502 add_free_extent_cache(hfsmp, (uint32_t)range->rl_start, (uint32_t)rl_len(range));
503 }
504
505 range->rl_start = -1;
506 range->rl_end = -2;
507}
508
509static void hfs_free_locked_internal(hfsmount_t *hfsmp,
510 struct rl_entry **reservation,
511 int list)
512{
513 if (*reservation) {
514 hfs_release_reserved(hfsmp, *reservation, list);
515 hfs_free(*reservation);
516 *reservation = NULL;
517 }
518}
519
520void hfs_free_tentative(hfsmount_t *hfsmp, struct rl_entry **reservation)
521{
522 hfs_free_locked_internal(hfsmp, reservation, HFS_TENTATIVE_BLOCKS);
523}
524
525void hfs_free_locked(hfsmount_t *hfsmp, struct rl_entry **reservation)
526{
527 hfs_free_locked_internal(hfsmp, reservation, HFS_LOCKED_BLOCKS);
528}
529
530OSErr BlockAllocate (
531 hfsmount_t *hfsmp, /* which volume to allocate space on */
532 u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */
533 u_int32_t minBlocks, /* desired number of blocks to allocate */
534 u_int32_t maxBlocks, /* maximum number of blocks to allocate */
535 hfs_block_alloc_flags_t flags, /* option flags */
536 u_int32_t *actualStartBlock, /* actual first block of allocation */
537 u_int32_t *actualNumBlocks)
538{
539 hfs_alloc_extra_args_t extra_args = {
540 .max_blocks = maxBlocks
541 };
542
543 HFSPlusExtentDescriptor extent = { startingBlock, minBlocks };
544
545 OSErr err = hfs_block_alloc_int(hfsmp, &extent, flags, &extra_args);
546
547 *actualStartBlock = extent.startBlock;
548 *actualNumBlocks = extent.blockCount;
549
550 return err;
551}
552
553errno_t hfs_block_alloc(hfsmount_t *hfsmp,
554 HFSPlusExtentDescriptor *extent,
555 hfs_block_alloc_flags_t flags,
556 hfs_alloc_extra_args_t *ap)
557{
558 return MacToVFSError(hfs_block_alloc_int(hfsmp, extent, flags, ap));
559}
560
561/*
562 ;________________________________________________________________________________
563 ;
564 ; Routine: hfs_block_alloc_int
565 ;
566 ; Function: Allocate space on a volume. If contiguous allocation is requested,
567 ; at least the requested number of bytes will be allocated or an
568 ; error will be returned. If contiguous allocation is not forced,
569 ; the space will be allocated with the first largest extent available
570 ; at the requested starting allocation block. If there is not enough
571 ; room there, a block allocation of less than the requested size will be
572 ; allocated.
573 ;
574 ; If the requested starting block is 0 (for new file allocations),
575 ; the volume's allocation block pointer will be used as a starting
576 ; point.
577 ;
578 ; Input Arguments:
579 ; hfsmp - Pointer to the HFS mount structure.
580 ; extent - startBlock indicates the block to start
581 ; searching from and blockCount is the number of
582 ; blocks required. Depending on the flags used,
583 ; more or less blocks may be returned. The
584 ; allocated extent is returned via this
585 ; parameter.
586 ; flags - Flags to specify options like contiguous, use
587 ; metadata zone, skip free block check, etc.
588 ; ap - Additional arguments used depending on flags.
589 ; See hfs_alloc_extra_args_t and below.
590 ;
591 ; Output:
592 ; (result) - Error code, zero for successful allocation
593 ; extent - If successful, the allocated extent.
594 ;
595 ; Side effects:
596 ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
597 ;
598 ; HFS_ALLOC_TENTATIVE
599 ; Blocks will be reserved but not marked allocated. They can be
600 ; stolen if free space is limited. Tentative blocks can be used by
601 ; passing HFS_ALLOC_USE_TENTATIVE and passing in the resevation.
602 ; @ap->reservation_out is used to store the reservation.
603 ;
604 ; HFS_ALLOC_USE_TENTATIVE
605 ; Use blocks previously returned with HFS_ALLOC_TENTATIVE.
606 ; @ap->reservation_in should be set to whatever @ap->reservation_out
607 ; was set to when HFS_ALLOC_TENTATIVE was used. If the tentative
608 ; reservation was stolen, a normal allocation will take place.
609 ;
610 ; HFS_ALLOC_LOCKED
611 ; Blocks will be reserved but not marked allocated. Unlike tentative
612 ; reservations they cannot be stolen. It is safe to write to these
613 ; blocks. @ap->reservation_out is used to store the reservation.
614 ;
615 ; HFS_ALLOC_COMMIT
616 ; This will take blocks previously returned with HFS_ALLOC_LOCKED and
617 ; mark them allocated on disk. @ap->reservation_in is used.
618 ;
619 ; HFS_ALLOC_ROLL_BACK
620 ; Take blocks that were just recently deallocated and mark them
621 ; allocated. This is for roll back situations. Blocks got
622 ; deallocated and then something went wrong and we need to roll back
623 ; by marking the blocks allocated.
624 ;
625 ; HFS_ALLOC_FORCECONTIG
626 ; It will not return fewer than @min_blocks.
627 ;
628 ; HFS_ALLOC_TRY_HARD
629 ; We will perform an exhaustive search to try and find @max_blocks.
630 ; It will not return fewer than @min_blocks.
631 ;
632 ;________________________________________________________________________________
633 */
634OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
635 HFSPlusExtentDescriptor *extent,
636 hfs_block_alloc_flags_t flags,
637 hfs_alloc_extra_args_t *ap)
638{
639 OSErr err = 0;
640 u_int32_t freeBlocks;
641 Boolean updateAllocPtr = false; // true if nextAllocation needs to be updated
642 Boolean forceContiguous = false;
643 Boolean forceFlush;
644
645 uint32_t startingBlock = extent->startBlock;
646 uint32_t minBlocks = extent->blockCount;
647 uint32_t maxBlocks = (ap && ap->max_blocks) ? ap->max_blocks : minBlocks;
648
649 if (ISSET(flags, HFS_ALLOC_COMMIT)) {
650 if (ap == NULL || ap->reservation_in == NULL) {
651 err = paramErr;
652 goto exit;
653 }
654 extent->startBlock = (uint32_t)(*ap->reservation_in)->rl_start;
655 extent->blockCount = (uint32_t)rl_len(*ap->reservation_in);
656 goto mark_allocated;
657 }
658
659 if (ISSET(flags, HFS_ALLOC_ROLL_BACK))
660 goto mark_allocated;
661
662 freeBlocks = hfs_freeblks(hfsmp, 0);
663
664 if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
665 if (ap == NULL || ap->reservation_in == NULL) {
666 err = paramErr;
667 goto exit;
668 }
669 struct rl_entry *range = *ap->reservation_in;
670
671 if (range && range->rl_start != -1) {
672 /*
673 * It's possible that we have a tentative reservation
674 * but there aren't enough free blocks due to loaned blocks
675 * or insufficient space in the backing store.
676 */
677 uint32_t count = (uint32_t)min(min(maxBlocks, rl_len(range)), freeBlocks);
678
679 if (count >= minBlocks) {
680 extent->startBlock = (uint32_t)range->rl_start;
681 extent->blockCount = count;
682
683 // Should we go straight to commit?
684 if (!ISSET(flags, HFS_ALLOC_LOCKED))
685 SET(flags, HFS_ALLOC_COMMIT);
686
687 goto mark_allocated;
688 }
689 }
690
691 /*
692 * We can't use the tentative reservation so free it and allocate
693 * normally.
694 */
695 hfs_free_tentative(hfsmp, ap->reservation_in);
696 CLR(flags, HFS_ALLOC_USE_TENTATIVE);
697 }
698
699 if (ISSET(flags, HFS_ALLOC_FORCECONTIG | HFS_ALLOC_TRY_HARD))
700 forceContiguous = true;
701
702 if (flags & HFS_ALLOC_FLUSHTXN) {
703 forceFlush = true;
704 }
705 else {
706 forceFlush = false;
707 }
708
709 hfs_assert(hfsmp->freeBlocks >= hfsmp->tentativeBlocks);
710
711 // See if we have to steal tentative blocks
712 if (freeBlocks < hfsmp->tentativeBlocks + minBlocks)
713 SET(flags, HFS_ALLOC_IGNORE_TENTATIVE);
714
715 /* Skip free block check if blocks are being allocated for relocating
716 * data during truncating a volume.
717 *
718 * During hfs_truncatefs(), the volume free block count is updated
719 * before relocating data to reflect the total number of free blocks
720 * that will exist on the volume after resize is successful. This
721 * means that we have reserved allocation blocks required for relocating
722 * the data and hence there is no need to check the free blocks.
723 * It will also prevent resize failure when the number of blocks in
724 * an extent being relocated is more than the free blocks that will
725 * exist after the volume is resized.
726 */
727 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
728 // If the disk is already full, don't bother.
729 if (freeBlocks == 0) {
730 err = dskFulErr;
731 goto exit;
732 }
733 if (forceContiguous && freeBlocks < minBlocks) {
734 err = dskFulErr;
735 goto exit;
736 }
737
738 /*
739 * Clip if necessary so we don't over-subscribe the free blocks.
740 */
741 if (minBlocks > freeBlocks) {
742 minBlocks = freeBlocks;
743 }
744 if (maxBlocks > freeBlocks) {
745 maxBlocks = freeBlocks;
746 }
747 }
748
749 if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
750 err = hfs_alloc_try_hard(hfsmp, extent, maxBlocks, flags);
751 if (err)
752 goto exit;
753
754 goto mark_allocated;
755 }
756
757 //
758 // If caller didn't specify a starting block number, then use the volume's
759 // next block to allocate from.
760 //
761 if (startingBlock == 0) {
762 hfs_lock_mount (hfsmp);
763 startingBlock = hfsmp->nextAllocation;
764 hfs_unlock_mount(hfsmp);
765 updateAllocPtr = true;
766 }
767
768 if (startingBlock >= hfsmp->allocLimit) {
769 startingBlock = 0; /* overflow so start at beginning */
770 }
771
772 //
773 // If the request must be contiguous, then find a sequence of free blocks
774 // that is long enough. Otherwise, find the first free block.
775 //
776 if (forceContiguous) {
777 err = BlockFindContig(hfsmp, startingBlock, minBlocks, maxBlocks,
778 flags, &extent->startBlock, &extent->blockCount);
779 /*
780 * If we allocated from a new position then also update the roving allocator.
781 * This will keep the roving allocation pointer up-to-date even
782 * if we are using the new R/B tree allocator, since
783 * it doesn't matter to us here, how the underlying allocator found
784 * the block to vend out.
785 */
786 if ((err == noErr) &&
787 (extent->startBlock > startingBlock) &&
788 ((extent->startBlock < hfsmp->hfs_metazone_start) ||
789 (extent->startBlock > hfsmp->hfs_metazone_end))) {
790 updateAllocPtr = true;
791 }
792 } else {
793 /*
794 * Scan the bitmap once, gather the N largest free extents, then
795 * allocate from these largest extents. Repeat as needed until
796 * we get all the space we needed. We could probably build up
797 * that list when the higher level caller tried (and failed) a
798 * contiguous allocation first.
799 *
800 * Note that the free-extent cache will be cease to be updated if
801 * we are using the red-black tree for allocations. If we jettison
802 * the tree, then we will reset the free-extent cache and start over.
803 */
804
805 /* Disable HFS_ALLOC_FLUSHTXN if needed */
806 if (forceFlush) {
807 flags &= ~HFS_ALLOC_FLUSHTXN;
808 }
809
810 /*
811 * BlockFindKnown only examines the free extent cache; anything in there will
812 * have been committed to stable storage already.
813 */
814 err = BlockFindKnown(hfsmp, maxBlocks, &extent->startBlock,
815 &extent->blockCount);
816
817 /* dskFulErr out of BlockFindKnown indicates an empty Free Extent Cache */
818
819 if (err == dskFulErr) {
820 /*
821 * Now we have to do a bigger scan. Start at startingBlock and go up until the
822 * allocation limit. We 'trust' the summary bitmap in this call, if it tells us
823 * that it could not find any free space.
824 */
825 err = BlockFindAny(hfsmp, startingBlock, hfsmp->allocLimit,
826 maxBlocks, flags, true,
827 &extent->startBlock, &extent->blockCount);
828 }
829 if (err == dskFulErr) {
830 /*
831 * Vary the behavior here if the summary table is on or off.
832 * If it is on, then we don't trust it it if we get into this case and
833 * basically do a full scan for maximum coverage.
834 * If it is off, then we trust the above and go up until the startingBlock.
835 */
836 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
837 err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
838 flags, false,
839 &extent->startBlock, &extent->blockCount);
840 }
841 else {
842 err = BlockFindAny(hfsmp, 1, startingBlock, maxBlocks,
843 flags, false,
844 &extent->startBlock, &extent->blockCount);
845 }
846
847 /*
848 * Last Resort: Find/use blocks that may require a journal flush.
849 */
850 if (err == dskFulErr && forceFlush) {
851 flags |= HFS_ALLOC_FLUSHTXN;
852 err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
853 flags, false,
854 &extent->startBlock, &extent->blockCount);
855 }
856 }
857 }
858
859 if (err)
860 goto exit;
861
862mark_allocated:
863
864 // Handle alignment
865 if (ap && ap->alignment && extent->blockCount < ap->max_blocks) {
866 /*
867 * See the comment in FileMgrInternal.h for alignment
868 * semantics.
869 */
870 uint32_t rounding = ((extent->blockCount + ap->alignment_offset)
871 % ap->alignment);
872
873 // @minBlocks is still the minimum
874 if (extent->blockCount >= minBlocks + rounding)
875 extent->blockCount -= rounding;
876 }
877
878 err = BlockMarkAllocatedInternal(hfsmp, extent->startBlock,
879 extent->blockCount, flags);
880
881 if (err)
882 goto exit;
883
884 // if we actually allocated something then go update the
885 // various bits of state that we maintain regardless of
886 // whether there was an error (i.e. partial allocations
887 // still need to update things like the free block count).
888 //
889 if (extent->blockCount != 0) {
890 //
891 // If we used the volume's roving allocation pointer, then we need to update it.
892 // Adding in the length of the current allocation might reduce the next allocate
893 // call by avoiding a re-scan of the already allocated space. However, the clump
894 // just allocated can quite conceivably end up being truncated or released when
895 // the file is closed or its EOF changed. Leaving the allocation pointer at the
896 // start of the last allocation will avoid unnecessary fragmentation in this case.
897 //
898 hfs_lock_mount (hfsmp);
899
900 if (!ISSET(flags, HFS_ALLOC_USE_TENTATIVE | HFS_ALLOC_COMMIT)) {
901 lf_lck_spin_lock(&hfsmp->vcbFreeExtLock);
902 if (hfsmp->vcbFreeExtCnt == 0 && hfsmp->hfs_freed_block_count == 0) {
903 hfsmp->sparseAllocation = extent->startBlock;
904 }
905 lf_lck_spin_unlock(&hfsmp->vcbFreeExtLock);
906 if (extent->blockCount < hfsmp->hfs_freed_block_count) {
907 hfsmp->hfs_freed_block_count -= extent->blockCount;
908 } else {
909 hfsmp->hfs_freed_block_count = 0;
910 }
911
912 if (updateAllocPtr &&
913 ((extent->startBlock < hfsmp->hfs_metazone_start) ||
914 (extent->startBlock > hfsmp->hfs_metazone_end))) {
915 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, extent->startBlock);
916 }
917
918 (void) remove_free_extent_cache(hfsmp, extent->startBlock, extent->blockCount);
919 }
920
921 if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
922 if (ap == NULL || ap->reservation_in == NULL) {
923 err = paramErr;
924 goto exit;
925 }
926 (*ap->reservation_in)->rl_start += extent->blockCount;
927 hfsmp->tentativeBlocks -= extent->blockCount;
928 if (rl_len(*ap->reservation_in) <= 0)
929 hfs_free_tentative(hfsmp, ap->reservation_in);
930 } else if (ISSET(flags, HFS_ALLOC_COMMIT)) {
931 // Handle committing locked extents
932 hfs_assert(hfsmp->lockedBlocks >= extent->blockCount);
933 (*ap->reservation_in)->rl_start += extent->blockCount;
934 hfsmp->lockedBlocks -= extent->blockCount;
935 hfs_free_locked(hfsmp, ap->reservation_in);
936 }
937
938 /*
939 * Update the number of free blocks on the volume
940 *
941 * Skip updating the free blocks count if the block are
942 * being allocated to relocate data as part of hfs_truncatefs()
943 */
944
945 if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
946 hfsmp->tentativeBlocks += extent->blockCount;
947 } else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
948 hfsmp->lockedBlocks += extent->blockCount;
949 } else if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
950 hfsmp->freeBlocks -= extent->blockCount;
951 }
952 MarkVCBDirty(hfsmp);
953 hfs_unlock_mount(hfsmp);
954
955 if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
956 hfs_assert(ap);
957 add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount,
958 0, ap->reservation_out);
959 } else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
960 hfs_assert(ap);
961 add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount,
962 1, ap->reservation_out);
963 }
964
965 if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
966 /*
967 * See if we used tentative blocks. Note that we cannot
968 * free the reservations here because we don't have access
969 * to the external pointers. All we can do is update the
970 * reservations and they'll be cleaned up when whatever is
971 * holding the pointers calls us back.
972 *
973 * We use the rangelist code to detect overlaps and
974 * constrain the tentative block allocation. Note that
975 * @end is inclusive so that our rangelist code will
976 * resolve the various cases for us. As a result, we need
977 * to ensure that we account for it properly when removing
978 * the blocks from the tentative count in the mount point
979 * and re-inserting the remainder (either head or tail)
980 */
981 struct rl_entry *range, *next_range;
982 struct rl_head *ranges = &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS];
983 const uint32_t start = extent->startBlock;
984 const uint32_t end = start + extent->blockCount - 1;
985 TAILQ_FOREACH_SAFE(range, ranges, rl_link, next_range) {
986 switch (rl_overlap(range, start, end)) {
987 case RL_OVERLAPCONTAINSRANGE:
988 // Keep the bigger part
989 if (start - range->rl_start > range->rl_end - end) {
990 // Discard the tail
991 hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
992 hfs_release_summary(hfsmp, end + 1, (uint32_t)(range->rl_end - end));
993 const uint32_t old_end = (uint32_t)range->rl_end;
994 range->rl_end = start - 1;
995 add_free_extent_cache(hfsmp, end + 1, old_end - end);
996 } else {
997 // Discard the head
998 hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
999 hfs_release_summary(hfsmp, (uint32_t)range->rl_start, (uint32_t)(start - range->rl_start));
1000 const uint32_t old_start = (uint32_t)range->rl_start;
1001 range->rl_start = end + 1;
1002 add_free_extent_cache(hfsmp, old_start,
1003 start - old_start);
1004 }
1005 hfs_assert(range->rl_end >= range->rl_start);
1006 break;
1007 case RL_MATCHINGOVERLAP:
1008 case RL_OVERLAPISCONTAINED:
1009 hfsmp->tentativeBlocks -= rl_len(range);
1010 range->rl_end = range->rl_start - 1;
1011 hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
1012 break;
1013 case RL_OVERLAPSTARTSBEFORE:
1014 hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
1015 range->rl_end = start - 1;
1016 hfs_assert(range->rl_end >= range->rl_start);
1017 break;
1018 case RL_OVERLAPENDSAFTER:
1019 hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
1020 range->rl_start = end + 1;
1021 hfs_assert(range->rl_end >= range->rl_start);
1022 break;
1023 case RL_NOOVERLAP:
1024 break;
1025 }
1026 }
1027 }
1028 }
1029
1030exit:
1031
1032 if (ALLOC_DEBUG) {
1033 if (err == noErr) {
1034 if (extent->startBlock >= hfsmp->totalBlocks) {
1035 LFHFS_LOG(LEVEL_ERROR, "BlockAllocate: vending invalid blocks!");
1036 hfs_assert(0);
1037 }
1038 if (extent->startBlock >= hfsmp->allocLimit) {
1039 LFHFS_LOG(LEVEL_ERROR, "BlockAllocate: vending block past allocLimit!");
1040 hfs_assert(0);
1041 }
1042
1043 if ((extent->startBlock + extent->blockCount) >= hfsmp->totalBlocks) {
1044 LFHFS_LOG(LEVEL_ERROR, "BlockAllocate: vending too many invalid blocks!");
1045 hfs_assert(0);
1046 }
1047
1048 if ((extent->startBlock + extent->blockCount) >= hfsmp->allocLimit) {
1049 LFHFS_LOG(LEVEL_ERROR, "BlockAllocate: vending too many invalid blocks past allocLimit!");
1050 hfs_assert(0);
1051 }
1052 }
1053 }
1054
1055 if (err) {
1056 // Just to be safe...
1057 extent->startBlock = 0;
1058 extent->blockCount = 0;
1059 }
1060
1061 // KBZ : For now, make sure clusters fills with zeros.
1062 raw_readwrite_zero_fill_fill( hfsmp, extent->startBlock, extent->blockCount );
1063
1064 return err;
1065}
1066
1067
1068/*
1069 ;________________________________________________________________________________
1070 ;
1071 ; Routine: BlockDeallocate
1072 ;
1073 ; Function: Update the bitmap to deallocate a run of disk allocation blocks
1074 ;
1075 ; Input Arguments:
1076 ; vcb - Pointer to ExtendedVCB for the volume to free space on
1077 ; firstBlock - First allocation block to be freed
1078 ; numBlocks - Number of allocation blocks to free up (must be > 0!)
1079 ;
1080 ; Output:
1081 ; (result) - Result code
1082 ;
1083 ; Side effects:
1084 ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
1085 ; The Allocator's red-black trees may also be modified as a result.
1086 ;
1087 ;________________________________________________________________________________
1088 */
1089
1090OSErr BlockDeallocate (
1091 ExtendedVCB *vcb, // Which volume to deallocate space on
1092 u_int32_t firstBlock, // First block in range to deallocate
1093 u_int32_t numBlocks, // Number of contiguous blocks to deallocate
1094 hfs_block_alloc_flags_t flags)
1095{
1096 if (ISSET(flags, HFS_ALLOC_TENTATIVE | HFS_ALLOC_LOCKED))
1097 return 0;
1098
1099 OSErr err;
1100 struct hfsmount *hfsmp;
1101 hfsmp = VCBTOHFS(vcb);
1102
1103 //
1104 // If no blocks to deallocate, then exit early
1105 //
1106 if (numBlocks == 0) {
1107 err = noErr;
1108 goto Exit;
1109 }
1110
1111
1112 if (ALLOC_DEBUG) {
1113 if (firstBlock >= hfsmp->totalBlocks) {
1114 LFHFS_LOG(LEVEL_ERROR, "BlockDeallocate: freeing invalid blocks!");
1115 hfs_assert(0);
1116 }
1117
1118 if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {
1119 LFHFS_LOG(LEVEL_ERROR, "BlockDeallocate: freeing too many invalid blocks!");
1120 hfs_assert(0);
1121 }
1122 }
1123
1124 /*
1125 * If we're using the summary bitmap, then try to mark the bits
1126 * as potentially usable/free before actually deallocating them.
1127 * It is better to be slightly speculative here for correctness.
1128 */
1129
1130 (void) hfs_release_summary (hfsmp, firstBlock, numBlocks);
1131
1132 err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
1133
1134 if (err) {
1135 goto Exit;
1136 }
1137
1138 //
1139 // Update the volume's free block count, and mark the VCB as dirty.
1140 //
1141 hfs_lock_mount(hfsmp);
1142 /*
1143 * Do not update the free block count. This flags is specified
1144 * when a volume is being truncated.
1145 */
1146 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1147 vcb->freeBlocks += numBlocks;
1148 }
1149
1150 vcb->hfs_freed_block_count += numBlocks;
1151
1152 if (vcb->nextAllocation == (firstBlock + numBlocks)) {
1153 HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
1154 }
1155
1156 if (hfsmp->jnl == NULL)
1157 {
1158 /*
1159 * In the journal case, we'll add the free extent once the journal
1160 * calls us back to tell us it wrote the transaction to disk.
1161 */
1162 (void) add_free_extent_cache(vcb, firstBlock, numBlocks);
1163
1164 /*
1165 * If the journal case, we'll only update sparseAllocation once the
1166 * free extent cache becomes empty (when we remove the last entry
1167 * from the cache). Skipping it here means we're less likely to
1168 * find a recently freed extent via the bitmap before it gets added
1169 * to the free extent cache.
1170 */
1171 if (firstBlock < vcb->sparseAllocation) {
1172 vcb->sparseAllocation = firstBlock;
1173 }
1174 }
1175
1176 MarkVCBDirty(vcb);
1177 hfs_unlock_mount(hfsmp);
1178
1179Exit:
1180
1181 return err;
1182}
1183
1184
1185u_int8_t freebitcount[16] = {
1186 4, 3, 3, 2, 3, 2, 2, 1, /* 0 1 2 3 4 5 6 7 */
1187 3, 2, 2, 1, 2, 1, 1, 0, /* 8 9 A B C D E F */
1188};
1189
1190u_int32_t
1191MetaZoneFreeBlocks(ExtendedVCB *vcb)
1192{
1193 u_int32_t freeblocks;
1194 u_int32_t *currCache;
1195 GenericLFBufPtr blockRef;
1196 u_int32_t bit;
1197 u_int32_t lastbit;
1198 int bytesleft;
1199 int bytesperblock;
1200 u_int8_t byte;
1201 u_int8_t *buffer;
1202
1203 blockRef = 0;
1204 bytesleft = freeblocks = 0;
1205 buffer = NULL;
1206 bit = VCBTOHFS(vcb)->hfs_metazone_start;
1207 if (bit == 1)
1208 bit = 0;
1209
1210 lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
1211 bytesperblock = vcb->vcbVBMIOSize;
1212
1213 /*
1214 * Count all the bits from bit to lastbit.
1215 */
1216 while (bit < lastbit) {
1217 /*
1218 * Get next bitmap block.
1219 */
1220 if (bytesleft == 0) {
1221 if (blockRef) {
1222 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1223 blockRef = 0;
1224 }
1225 if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef,
1226 HFS_ALLOC_IGNORE_TENTATIVE) != 0) {
1227 return (0);
1228 }
1229 buffer = (u_int8_t *)currCache;
1230 bytesleft = bytesperblock;
1231 }
1232 byte = *buffer++;
1233 freeblocks += freebitcount[byte & 0x0F];
1234 freeblocks += freebitcount[(byte >> 4) & 0x0F];
1235 bit += kBitsPerByte;
1236 --bytesleft;
1237 }
1238 if (blockRef)
1239 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1240
1241 return (freeblocks);
1242}
1243
1244
1245/*
1246 * Obtain the next allocation block (bit) that's
1247 * outside the metadata allocation zone.
1248 */
1249static u_int32_t NextBitmapBlock(
1250 ExtendedVCB *vcb,
1251 u_int32_t bit)
1252{
1253 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1254
1255 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
1256 return (bit);
1257 /*
1258 * Skip over metadata allocation zone.
1259 */
1260 if ((bit >= hfsmp->hfs_metazone_start) &&
1261 (bit <= hfsmp->hfs_metazone_end)) {
1262 bit = hfsmp->hfs_metazone_end + 1;
1263 }
1264 return (bit);
1265}
1266
1267
1268// Assumes @bitmap is aligned to 8 bytes and multiple of 8 bytes.
1269static void bits_set(void *bitmap, int start, int end)
1270{
1271 const int start_bit = start & 63;
1272 const int end_bit = end & 63;
1273
1274#define LEFT_MASK(bit) OSSwapHostToBigInt64(0xffffffffffffffffull << (64 - bit))
1275#define RIGHT_MASK(bit) OSSwapHostToBigInt64(0xffffffffffffffffull >> bit)
1276
1277 uint64_t *p = (uint64_t *)bitmap + start / 64;
1278
1279 if ((start & ~63) == (end & ~63)) {
1280 // Start and end in same 64 bits
1281 *p |= RIGHT_MASK(start_bit) & LEFT_MASK(end_bit);
1282 } else {
1283 *p++ |= RIGHT_MASK(start_bit);
1284
1285 int nquads = (end - end_bit - start - 1) / 64;
1286
1287 while (nquads--)
1288 *p++ = 0xffffffffffffffffull;
1289
1290 if (end_bit)
1291 *p |= LEFT_MASK(end_bit);
1292 }
1293}
1294
1295// Modifies the buffer and applies any reservations that we might have
1296static GenericLFBufPtr process_reservations(hfsmount_t *hfsmp, GenericLFBufPtr bp, off_t offset, hfs_block_alloc_flags_t flags, bool always_copy)
1297{
1298
1299#if 0
1300 bool taken_copy = false;
1301#else
1302#pragma unused (always_copy)
1303#endif
1304
1305 void *buffer = bp->pvData;
1306 const uint64_t nbytes = bp->uValidBytes;
1307 const off_t end = offset + nbytes * 8 - 1;
1308
1309 for (int i = (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
1310 ? HFS_LOCKED_BLOCKS : HFS_TENTATIVE_BLOCKS); i < 2; ++i) {
1311 struct rl_entry *entry;
1312 TAILQ_FOREACH(entry, &hfsmp->hfs_reserved_ranges[i], rl_link) {
1313 uint32_t a, b;
1314
1315 enum rl_overlaptype overlap_type = rl_overlap(entry, offset, end);
1316
1317 if (overlap_type == RL_NOOVERLAP)
1318 continue;
1319
1320#if 0
1321 /*
1322 * If always_copy is false, we only take a copy if B_LOCKED is
1323 * set because ReleaseScanBitmapRange doesn't invalidate the
1324 * buffer in that case.
1325 */
1326 if (!taken_copy && (always_copy || ISSET(buf_flags(bp), B_LOCKED))) {
1327 buf_t new_bp = buf_create_shadow(bp, true, 0, NULL, NULL);
1328 buf_brelse(bp);
1329 bp = new_bp;
1330 buf_setflags(bp, B_NOCACHE);
1331 buffer = (void *)buf_dataptr(bp);
1332 taken_copy = true;
1333 }
1334#endif
1335 switch (overlap_type) {
1336 case RL_OVERLAPCONTAINSRANGE:
1337 case RL_MATCHINGOVERLAP:
1338 memset(buffer, 0xff, nbytes);
1339 return bp;
1340 case RL_OVERLAPISCONTAINED:
1341 a = (uint32_t)entry->rl_start;
1342 b = (uint32_t)entry->rl_end;
1343 break;
1344 case RL_OVERLAPSTARTSBEFORE:
1345 a = (uint32_t)offset;
1346 b = (uint32_t)entry->rl_end;
1347 break;
1348 case RL_OVERLAPENDSAFTER:
1349 a = (uint32_t)entry->rl_start;
1350 b = (uint32_t)end;
1351 break;
1352 case RL_NOOVERLAP:
1353 __builtin_unreachable();
1354 }
1355
1356 a -= offset;
1357 b -= offset;
1358
1359 hfs_assert(b >= a);
1360
1361 // b is inclusive
1362 bits_set(buffer, a, b + 1);
1363 }
1364 } // for (;;)
1365
1366 return bp;
1367}
1368
1369/*
1370 ;_______________________________________________________________________
1371 ;
1372 ; Routine: ReadBitmapBlock
1373 ;
1374 ; Function: Read in a bitmap block corresponding to a given allocation
1375 ; block (bit). Return a pointer to the bitmap block.
1376 ;
1377 ; Inputs:
1378 ; vcb -- Pointer to ExtendedVCB
1379 ; bit -- Allocation block whose bitmap block is desired
1380 ;
1381 ; Outputs:
1382 ; buffer -- Pointer to bitmap block corresonding to "block"
1383 ; blockRef
1384 ;_______________________________________________________________________
1385 */
1386static OSErr ReadBitmapBlock(ExtendedVCB *vcb,
1387 u_int32_t bit,
1388 u_int32_t **buffer,
1389 GenericLFBufPtr *blockRef,
1390 hfs_block_alloc_flags_t flags)
1391{
1392 OSErr err = 0;
1393 GenericLFBufPtr bp = NULL;
1394 struct vnode *vp = NULL;
1395 daddr64_t block;
1396 u_int32_t blockSize;
1397
1398 /*
1399 * volume bitmap blocks are protected by the allocation file lock
1400 */
1401 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1402
1403 blockSize = (u_int32_t)vcb->vcbVBMIOSize;
1404 if (blockSize == 0) return EINVAL; //Devision protection
1405 block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
1406
1407 /* HFS+ / HFSX */
1408 vp = vcb->hfs_allocation_vp; /* use allocation file vnode */
1409
1410 bp = lf_hfs_generic_buf_allocate(vp, block, blockSize, 0);
1411 err = lf_hfs_generic_buf_read(bp);
1412
1413 if ( err )
1414 {
1415 lf_hfs_generic_buf_release(bp);
1416 *blockRef = NULL;
1417 *buffer = NULL;
1418 }
1419 else
1420 {
1421 if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED)) {
1422 bp = process_reservations(vcb, bp, block * blockSize * 8, flags, /* always_copy: */ true);
1423 }
1424
1425 bp->uFlags = flags;
1426
1427 *blockRef = bp;
1428 *buffer = bp->pvData;
1429 }
1430
1431 return err;
1432}
1433
1434
1435/*
1436 ;_______________________________________________________________________
1437 ;
1438 ; Routine: ReadBitmapRange
1439 ;
1440 ; Function: Read in a range of the bitmap starting at the given offset.
1441 ; Use the supplied size to determine the amount of I/O to generate
1442 ; against the bitmap file. Return a pointer to the bitmap block.
1443 ;
1444 ; Inputs:
1445 ; hfsmp -- Pointer to hfs mount
1446 ; offset -- byte offset into the bitmap file
1447 ; size -- How much I/O to generate against the bitmap file.
1448 ;
1449 ; Outputs:
1450 ; buffer -- Pointer to bitmap block data corresonding to "block"
1451 ; blockRef -- struct 'buf' pointer which MUST be released in a subsequent call.
1452 ;_______________________________________________________________________
1453 */
1454static OSErr ReadBitmapRange(struct hfsmount *hfsmp, uint32_t offset, uint32_t iosize, uint32_t **buffer, GenericLFBuf **blockRef)
1455{
1456
1457 OSErr err = 0;
1458 GenericLFBufPtr bp = NULL;
1459 struct vnode *vp = NULL;
1460 daddr64_t block;
1461
1462 /*
1463 * volume bitmap blocks are protected by the allocation file lock
1464 */
1465 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
1466
1467 vp = hfsmp->hfs_allocation_vp; /* use allocation file vnode */
1468
1469 /*
1470 * The byte offset argument must be converted into bitmap-relative logical
1471 * block numbers before using it in buf_meta_bread.
1472 *
1473 * lf_hfs_generic_buf_read (and the things it calls) will eventually try to
1474 * reconstruct the byte offset into the file by multiplying the logical
1475 * block number passed in below by the given iosize.
1476 * So we prepare for that by converting the byte offset back into
1477 * logical blocks in terms of iosize units.
1478 *
1479 * The amount of I/O requested and the byte offset should be computed
1480 * based on the helper function in the frame that called us, so we can
1481 * get away with just doing a simple divide here.
1482 */
1483 block = (daddr64_t)(offset / iosize);
1484
1485 bp = lf_hfs_generic_buf_allocate(vp, block, iosize, 0);
1486 err = lf_hfs_generic_buf_read(bp);
1487
1488 if ( err )
1489 {
1490 lf_hfs_generic_buf_release(bp);
1491 *blockRef = 0;
1492 *buffer = NULL;
1493 }
1494 else
1495 {
1496 bp = process_reservations(hfsmp, bp, (offset * 8), 0, /* always_copy: */ false);
1497 *blockRef = bp;
1498 *buffer = bp->pvData;
1499 }
1500
1501 return err;
1502}
1503
1504
1505/*
1506 ;_______________________________________________________________________
1507 ;
1508 ; Routine: ReleaseBitmapBlock
1509 ;
1510 ; Function: Relase a bitmap block.
1511 ;
1512 ; Inputs:
1513 ; vcb
1514 ; blockRef
1515 ; dirty
1516 ;_______________________________________________________________________
1517 */
1518static OSErr ReleaseBitmapBlock( ExtendedVCB *vcb, GenericLFBufPtr blockRef, Boolean dirty)
1519{
1520
1521 GenericLFBufPtr bp = blockRef;
1522
1523 if (blockRef == 0) {
1524 if (dirty)
1525 {
1526 LFHFS_LOG(LEVEL_ERROR, "ReleaseBitmapBlock: missing bp");
1527 hfs_assert(0);
1528 }
1529 return (0);
1530 }
1531
1532 if (bp)
1533 {
1534 if (dirty)
1535 {
1536 hfs_block_alloc_flags_t flags = (uint32_t)bp->uFlags;
1537
1538 if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED))
1539 {
1540 LFHFS_LOG(LEVEL_ERROR, "Modified read-only bitmap buffer!");
1541 hfs_assert(0);
1542 }
1543
1544 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1545 if (hfsmp->jnl)
1546 {
1547 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
1548 }
1549 else
1550 {
1551 lf_hfs_generic_buf_write(bp);
1552 lf_hfs_generic_buf_release(bp);
1553 }
1554 } else {
1555 lf_hfs_generic_buf_release(bp);
1556 }
1557 }
1558
1559 return (0);
1560}
1561
1562/*
1563 * ReleaseScanBitmapRange
1564 *
1565 * This is used to release struct bufs that were created for use by
1566 * bitmap scanning code. Because they may be of sizes different than the
1567 * typical runtime manipulation code, we want to force them to be purged out
1568 * of the buffer cache ASAP, so we'll release them differently than in the
1569 * ReleaseBitmapBlock case.
1570 *
1571 * Additionally, because we know that we're only reading the blocks and that they
1572 * should have been clean prior to reading them, we will never
1573 * issue a write to them (thus dirtying them).
1574 */
1575
1576static OSErr ReleaseScanBitmapRange( GenericLFBufPtr bp )
1577{
1578 if (bp)
1579 {
1580 lf_hfs_generic_buf_release(bp);
1581 }
1582
1583 return (0);
1584}
1585
1586/*
1587 * @extent.startBlock, on input, contains a preferred block for the
1588 * allocation. @extent.blockCount, on input, contains the minimum
1589 * number of blocks acceptable. Upon success, the result is conveyed
1590 * in @extent.
1591 */
1592static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
1593 HFSPlusExtentDescriptor *extent,
1594 uint32_t max_blocks,
1595 hfs_block_alloc_flags_t flags)
1596{
1597 OSErr err = dskFulErr;
1598
1599 const uint32_t min_blocks = extent->blockCount;
1600
1601 // It's > rather than >= because the last block is always reserved
1602 if (extent->startBlock > 0 && extent->startBlock < hfsmp->allocLimit
1603 && hfsmp->allocLimit - extent->startBlock > max_blocks) {
1604 /*
1605 * This is just checking to see if there's an extent starting
1606 * at extent->startBlock that will suit. We only check for
1607 * @max_blocks here; @min_blocks is ignored.
1608 */
1609
1610 err = BlockFindContiguous(hfsmp, extent->startBlock, extent->startBlock + max_blocks,
1611 max_blocks, max_blocks, true, true,
1612 &extent->startBlock, &extent->blockCount, flags);
1613
1614 if (err != dskFulErr)
1615 return err;
1616 }
1617
1618 err = BlockFindKnown(hfsmp, max_blocks, &extent->startBlock,
1619 &extent->blockCount);
1620
1621 if (!err) {
1622 if (extent->blockCount >= max_blocks)
1623 return 0;
1624 } else if (err != dskFulErr)
1625 return err;
1626
1627 // Try a more exhaustive search
1628 return BlockFindContiguous(hfsmp, 1, hfsmp->allocLimit,
1629 min_blocks, max_blocks,
1630 /* useMetaZone: */ true,
1631 /* trustSummary: */ true,
1632 &extent->startBlock, &extent->blockCount, flags);
1633}
1634
1635/*
1636 _______________________________________________________________________
1637
1638 Routine: BlockFindContig
1639
1640 Function: Find a contiguous group of allocation blocks. If the
1641 minimum cannot be satisfied, nothing is returned. The
1642 caller guarantees that there are enough free blocks
1643 (though they may not be contiguous, in which case this
1644 call will fail).
1645
1646 Inputs:
1647 vcb Pointer to volume where space is to be allocated
1648 startingBlock Preferred first block for allocation
1649 minBlocks Minimum number of contiguous blocks to allocate
1650 maxBlocks Maximum number of contiguous blocks to allocate
1651 flags
1652
1653 Outputs:
1654 actualStartBlock First block of range allocated, or 0 if error
1655 actualNumBlocks Number of blocks allocated, or 0 if error
1656 _______________________________________________________________________
1657 */
1658static OSErr BlockFindContig(
1659 ExtendedVCB *vcb,
1660 u_int32_t startingBlock,
1661 u_int32_t minBlocks,
1662 u_int32_t maxBlocks,
1663 hfs_block_alloc_flags_t flags,
1664 u_int32_t *actualStartBlock,
1665 u_int32_t *actualNumBlocks)
1666{
1667 OSErr retval = noErr;
1668 uint32_t currentStart = startingBlock;
1669
1670 uint32_t foundStart = 0; // values to emit to caller
1671 uint32_t foundCount = 0;
1672
1673 uint32_t collision_start = 0; // if we have to re-allocate a recently deleted extent, use this
1674 uint32_t collision_count = 0;
1675
1676 int allowReuse = (flags & HFS_ALLOC_FLUSHTXN);
1677 Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
1678
1679 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1680
1681 while ((retval == noErr) && (foundStart == 0) && (foundCount == 0)) {
1682
1683 /* Try and find something that works. */
1684
1685 /*
1686 * NOTE: If the only contiguous free extent of at least minBlocks
1687 * crosses startingBlock (i.e. starts before, ends after), then we
1688 * won't find it. Earlier versions *did* find this case by letting
1689 * the second search look past startingBlock by minBlocks. But
1690 * with the free extent cache, this can lead to duplicate entries
1691 * in the cache, causing the same blocks to be allocated twice.
1692 */
1693 retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks,
1694 maxBlocks, useMetaZone, true, &foundStart, &foundCount, flags);
1695
1696 if (retval == dskFulErr && currentStart != 0) {
1697 /*
1698 * We constrain the endingBlock so we don't bother looking for ranges
1699 * that would overlap those found in the previous call, if the summary bitmap
1700 * is not on for this volume. If it is, then we assume that it was not trust
1701 * -worthy and do a full scan.
1702 */
1703 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1704 retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks,
1705 maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
1706 }
1707 else {
1708 retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks,
1709 maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
1710 }
1711 }
1712
1713 if (retval != noErr) {
1714 goto bailout;
1715 }
1716
1717 /* Do we overlap with the recently found collision extent? */
1718 if (collision_start) {
1719 if (extents_overlap (foundStart, foundCount, collision_start, collision_count)) {
1720 /*
1721 * We've looped around, and the only thing we could use was the collision extent.
1722 * Since we are allowed to use it, go ahead and do so now.
1723 */
1724 if(allowReuse) {
1725 /*
1726 * then we couldn't find anything except values which might have been
1727 * recently deallocated. just return our cached value if we are allowed to.
1728 */
1729 foundStart = collision_start;
1730 foundCount = collision_count;
1731 goto bailout;
1732 }
1733 else {
1734 /* Otherwise, we looped around and couldn't find anything that wouldn't require a journal flush. */
1735 retval = dskFulErr;
1736 goto bailout;
1737 }
1738 }
1739 }
1740 /*
1741 * If we found something good, we'd break out of the loop at the top; foundCount
1742 * and foundStart should be set.
1743 */
1744
1745 } // end while loop.
1746
1747bailout:
1748
1749 if (retval == noErr) {
1750 *actualStartBlock = foundStart;
1751 *actualNumBlocks = foundCount;
1752 }
1753
1754 return retval;
1755
1756}
1757
1758
1759/*
1760 _______________________________________________________________________
1761
1762 Routine: BlockFindAny
1763
1764 Function: Find one or more allocation blocks and may return fewer than
1765 requested. The caller guarantees that there is at least one
1766 free block.
1767
1768 Inputs:
1769 vcb Pointer to volume where space is to be allocated
1770 startingBlock Preferred first block for allocation
1771 endingBlock Last block to check + 1
1772 maxBlocks Maximum number of contiguous blocks to allocate
1773 useMetaZone
1774
1775 Outputs:
1776 actualStartBlock First block of range allocated, or 0 if error
1777 actualNumBlocks Number of blocks allocated, or 0 if error
1778 _______________________________________________________________________
1779 */
1780
1781static OSErr BlockFindAny(
1782 ExtendedVCB *vcb,
1783 u_int32_t startingBlock,
1784 register u_int32_t endingBlock,
1785 u_int32_t maxBlocks,
1786 hfs_block_alloc_flags_t flags,
1787 Boolean trustSummary,
1788 u_int32_t *actualStartBlock,
1789 u_int32_t *actualNumBlocks)
1790{
1791
1792 /*
1793 * If it is enabled, scan through the summary table to find the first free block.
1794 *
1795 * If it reports that there are not any free blocks, we could have a false
1796 * positive, so in that case, use the input arguments as a pass through.
1797 */
1798 uint32_t start_blk = startingBlock;
1799 uint32_t end_blk = endingBlock;
1800 struct hfsmount *hfsmp;
1801 OSErr err;
1802
1803 hfsmp = (struct hfsmount*)vcb;
1804 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1805 uint32_t suggested_start;
1806
1807 /*
1808 * If the summary table is enabled, scan through it to find the first free
1809 * block. If there was an error, or we couldn't find anything free in the
1810 * summary table, then just leave the start_blk fields unmodified. We wouldn't
1811 * have gotten to this point if the mount point made it look like there was possibly
1812 * free space in the FS.
1813 */
1814 err = hfs_find_summary_free (hfsmp, startingBlock, &suggested_start);
1815 if (err == 0) {
1816 start_blk = suggested_start;
1817 }
1818 else {
1819 /* Differentiate between ENOSPC and a more esoteric error in the above call. */
1820 if ((err == ENOSPC) && (trustSummary)) {
1821 /*
1822 * The 'trustSummary' argument is for doing a full scan if we really
1823 * really, need the space and we think it's somewhere but can't find it in the
1824 * summary table. If it's true, then we trust the summary table and return
1825 * dskFulErr if we couldn't find it above.
1826 */
1827 return dskFulErr;
1828 }
1829 /*
1830 * If either trustSummary was false or we got a different errno, then we
1831 * want to fall through to the real bitmap single i/o code...
1832 */
1833 }
1834 }
1835
1836 err = BlockFindAnyBitmap(vcb, start_blk, end_blk, maxBlocks,
1837 flags, actualStartBlock, actualNumBlocks);
1838
1839 return err;
1840}
1841
1842
1843/*
1844 * BlockFindAnyBitmap finds free ranges by scanning the bitmap to
1845 * figure out where the free allocation blocks are. Inputs and
1846 * outputs are the same as for BlockFindAny.
1847 */
1848
1849static OSErr BlockFindAnyBitmap(
1850 ExtendedVCB *vcb,
1851 u_int32_t startingBlock,
1852 register u_int32_t endingBlock,
1853 u_int32_t maxBlocks,
1854 hfs_block_alloc_flags_t flags,
1855 u_int32_t *actualStartBlock,
1856 u_int32_t *actualNumBlocks)
1857{
1858 OSErr err;
1859 register u_int32_t block = 0; // current block number
1860 register u_int32_t currentWord; // Pointer to current word within bitmap block
1861 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
1862 register u_int32_t wordsLeft; // Number of words left in this bitmap block
1863 u_int32_t *buffer = NULL;
1864 u_int32_t *currCache = NULL;
1865 GenericLFBufPtr blockRef = 0;
1866 u_int32_t bitsPerBlock;
1867 u_int32_t wordsPerBlock;
1868 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1869 Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
1870
1871 /*
1872 * When we're skipping the metadata zone and the start/end
1873 * range overlaps with the metadata zone then adjust the
1874 * start to be outside of the metadata zone. If the range
1875 * is entirely inside the metadata zone then we can deny the
1876 * request (dskFulErr).
1877 */
1878 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
1879 if (startingBlock <= vcb->hfs_metazone_end) {
1880 if (endingBlock > (vcb->hfs_metazone_end + 2))
1881 startingBlock = vcb->hfs_metazone_end + 1;
1882 else {
1883 err = dskFulErr;
1884 goto Exit;
1885 }
1886 }
1887 }
1888
1889 // Since this routine doesn't wrap around
1890 if (maxBlocks > (endingBlock - startingBlock)) {
1891 maxBlocks = endingBlock - startingBlock;
1892 }
1893
1894 //
1895 // Pre-read the first bitmap block
1896 //
1897 err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef, flags);
1898 if (err != noErr) goto Exit;
1899 buffer = currCache;
1900
1901 //
1902 // Set up the current position within the block
1903 //
1904 {
1905 u_int32_t wordIndexInBlock;
1906
1907 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
1908 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
1909
1910 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
1911 buffer += wordIndexInBlock;
1912 wordsLeft = wordsPerBlock - wordIndexInBlock;
1913 currentWord = SWAP_BE32 (*buffer);
1914 bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
1915 }
1916
1917 /*
1918 * While loop 1:
1919 * Find the first unallocated block starting at 'block'
1920 */
1921 uint32_t summary_block_scan = 0;
1922
1923 block=startingBlock;
1924 while (block < endingBlock) {
1925 if ((currentWord & bitMask) == 0)
1926 break;
1927
1928 // Next bit
1929 ++block;
1930 bitMask >>= 1;
1931 if (bitMask == 0) {
1932 // Next word
1933 bitMask = kHighBitInWordMask;
1934 ++buffer;
1935
1936 if (--wordsLeft == 0) {
1937 // Next block
1938 buffer = currCache = NULL;
1939 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1940 /*
1941 * If summary_block_scan is non-zero, then we must have
1942 * pulled a bitmap file block into core, and scanned through
1943 * the entire thing. Because we're in this loop, we are
1944 * implicitly trusting that the bitmap didn't have any knowledge
1945 * about this particular block. As a result, update the bitmap
1946 * (lazily, now that we've scanned it) with our findings that
1947 * this particular block is completely used up.
1948 */
1949 if (summary_block_scan != 0) {
1950 uint32_t summary_bit;
1951 (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
1952 hfs_set_summary (hfsmp, summary_bit, 1);
1953 }
1954 }
1955
1956 err = ReleaseBitmapBlock(vcb, blockRef, false);
1957 if (err != noErr) goto Exit;
1958
1959 /*
1960 * Skip over metadata blocks.
1961 */
1962 if (!useMetaZone) {
1963 block = NextBitmapBlock(vcb, block);
1964 }
1965 if (block >= endingBlock) {
1966 err = dskFulErr;
1967 goto Exit;
1968 }
1969
1970 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
1971 if (err != noErr) goto Exit;
1972 buffer = currCache;
1973 summary_block_scan = block;
1974 wordsLeft = wordsPerBlock;
1975 }
1976 currentWord = SWAP_BE32 (*buffer);
1977 }
1978 }
1979
1980 // Did we get to the end of the bitmap before finding a free block?
1981 // If so, then couldn't allocate anything.
1982 if (block >= endingBlock) {
1983 err = dskFulErr;
1984 goto Exit;
1985 }
1986
1987#if LF_HFS_CHECK_UNMAPPED
1988 /*
1989 * Don't move forward just yet. Verify that either one of the following
1990 * two conditions is true:
1991 * 1) journaling is not enabled
1992 * 2) block is not currently on any pending TRIM list.
1993 */
1994 if (hfsmp->jnl != NULL && (forceFlush == false)) {
1995 int recently_deleted = 0;
1996 uint32_t nextblk;
1997 err = CheckUnmappedBytes (hfsmp, (uint64_t) block, 1, &recently_deleted, &nextblk);
1998 if ((err == 0) && (recently_deleted)) {
1999
2000 /* release the bitmap block & unset currCache. we may jump past it. */
2001 err = ReleaseBitmapBlock(vcb, blockRef, false);
2002 currCache = NULL;
2003 if (err != noErr) {
2004 goto Exit;
2005 }
2006 /* set our start to nextblk, and re-do the search. */
2007 startingBlock = nextblk;
2008 goto restartSearchAny;
2009 }
2010 }
2011#endif
2012
2013 // Return the first block in the allocated range
2014 *actualStartBlock = block;
2015
2016 // If we could get the desired number of blocks before hitting endingBlock,
2017 // then adjust endingBlock so we won't keep looking. Ideally, the comparison
2018 // would be (block + maxBlocks) < endingBlock, but that could overflow. The
2019 // comparison below yields identical results, but without overflow.
2020 if (block < (endingBlock-maxBlocks)) {
2021 endingBlock = block + maxBlocks; // if we get this far, we've found enough
2022 }
2023
2024 /*
2025 * While loop 2:
2026 * Scan the bitmap, starting at 'currentWord' in the current
2027 * bitmap block. Continue iterating through the bitmap until
2028 * either we hit an allocated block, or until we have accumuluated
2029 * maxBlocks worth of bitmap.
2030 */
2031
2032 /* Continue until we see an allocated block */
2033 while ((currentWord & bitMask) == 0) {
2034 // Move to the next block. If no more, then exit.
2035 ++block;
2036 if (block == endingBlock) {
2037 break;
2038 }
2039
2040 // Next bit
2041 bitMask >>= 1;
2042 if (bitMask == 0) {
2043 // Next word
2044 bitMask = kHighBitInWordMask;
2045 ++buffer;
2046
2047 if (--wordsLeft == 0) {
2048 // Next block
2049 buffer = currCache = NULL;
2050
2051 /* We're only reading the bitmap here, so mark it as clean */
2052 err = ReleaseBitmapBlock(vcb, blockRef, false);
2053 if (err != noErr) {
2054 goto Exit;
2055 }
2056
2057 /*
2058 * Skip over metadata blocks.
2059 */
2060 if (!useMetaZone) {
2061 u_int32_t nextBlock;
2062 nextBlock = NextBitmapBlock(vcb, block);
2063 if (nextBlock != block) {
2064 goto Exit; /* allocation gap, so stop */
2065 }
2066 }
2067
2068 if (block >= endingBlock) {
2069 goto Exit;
2070 }
2071
2072 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
2073 if (err != noErr) {
2074 goto Exit;
2075 }
2076 buffer = currCache;
2077 wordsLeft = wordsPerBlock;
2078 }
2079 currentWord = SWAP_BE32 (*buffer);
2080 }
2081 }
2082
2083Exit:
2084 if (currCache) {
2085 /* Release the bitmap reference prior to marking bits in-use */
2086 (void) ReleaseBitmapBlock(vcb, blockRef, false);
2087 currCache = NULL;
2088 }
2089
2090 if (err == noErr) {
2091 *actualNumBlocks = block - *actualStartBlock;
2092
2093 // sanity check
2094 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
2095 LFHFS_LOG(LEVEL_ERROR, "BlockFindAnyBitmap: allocation overflow on \"%s\"", vcb->vcbVN);
2096 hfs_assert(0);
2097 }
2098 }
2099 else {
2100 *actualStartBlock = 0;
2101 *actualNumBlocks = 0;
2102 }
2103
2104 return err;
2105}
2106
2107
2108/*
2109 _______________________________________________________________________
2110
2111 Routine: BlockFindKnown
2112
2113 Function: Return a potential extent from the free extent cache. The
2114 returned extent *must* be marked allocated and removed
2115 from the cache by the *caller*.
2116
2117 Inputs:
2118 vcb Pointer to volume where space is to be allocated
2119 maxBlocks Maximum number of contiguous blocks to allocate
2120
2121 Outputs:
2122 actualStartBlock First block of range allocated, or 0 if error
2123 actualNumBlocks Number of blocks allocated, or 0 if error
2124
2125 Returns:
2126 dskFulErr Free extent cache is empty
2127 _______________________________________________________________________
2128 */
2129
2130static OSErr BlockFindKnown(
2131 ExtendedVCB *vcb,
2132 u_int32_t maxBlocks,
2133 u_int32_t *actualStartBlock,
2134 u_int32_t *actualNumBlocks)
2135{
2136 OSErr err;
2137 u_int32_t foundBlocks;
2138 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2139
2140 hfs_lock_mount (hfsmp);
2141 lf_lck_spin_lock(&vcb->vcbFreeExtLock);
2142 if ( vcb->vcbFreeExtCnt == 0 ||
2143 vcb->vcbFreeExt[0].blockCount == 0) {
2144 lf_lck_spin_unlock(&vcb->vcbFreeExtLock);
2145 hfs_unlock_mount(hfsmp);
2146 return dskFulErr;
2147 }
2148 lf_lck_spin_unlock(&vcb->vcbFreeExtLock);
2149 hfs_unlock_mount(hfsmp);
2150
2151 lf_lck_spin_lock(&vcb->vcbFreeExtLock);
2152
2153 // Just grab up to maxBlocks of the first (largest) free exent.
2154 *actualStartBlock = vcb->vcbFreeExt[0].startBlock;
2155 foundBlocks = vcb->vcbFreeExt[0].blockCount;
2156 if (foundBlocks > maxBlocks)
2157 foundBlocks = maxBlocks;
2158 *actualNumBlocks = foundBlocks;
2159
2160 lf_lck_spin_unlock(&vcb->vcbFreeExtLock);
2161
2162 // sanity check
2163 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
2164 {
2165 LFHFS_LOG(LEVEL_ERROR, "BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
2166 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
2167 err = EIO;
2168 } else
2169 err = 0;
2170
2171 return err;
2172}
2173
2174/*
2175 * BlockMarkAllocated
2176 *
2177 * This is a wrapper function around the internal calls which will actually mark the blocks
2178 * as in-use. It will mark the blocks in the red-black tree if appropriate. We need to do
2179 * this logic here to avoid callers having to deal with whether or not the red-black tree
2180 * is enabled.
2181 */
2182
2183OSErr BlockMarkAllocated(
2184 ExtendedVCB *vcb,
2185 u_int32_t startingBlock,
2186 register u_int32_t numBlocks)
2187{
2188 return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks, 0);
2189}
2190
2191
2192/*
2193 _______________________________________________________________________
2194
2195 Routine: BlockMarkAllocatedInternal
2196
2197 Function: Mark a contiguous group of blocks as allocated (set in the
2198 bitmap). It assumes those bits are currently marked
2199 deallocated (clear in the bitmap). Note that this function
2200 must be called regardless of whether or not the bitmap or
2201 tree-based allocator is used, as all allocations must correctly
2202 be marked on-disk. If the tree-based approach is running, then
2203 this will be done before the node is removed from the tree.
2204
2205 Inputs:
2206 vcb Pointer to volume where space is to be allocated
2207 startingBlock First block number to mark as allocated
2208 numBlocks Number of blocks to mark as allocated
2209 _______________________________________________________________________
2210 */
2211static
2212OSErr BlockMarkAllocatedInternal (
2213 ExtendedVCB *vcb,
2214 u_int32_t startingBlock,
2215 u_int32_t numBlocks,
2216 hfs_block_alloc_flags_t flags)
2217{
2218 OSErr err;
2219 register u_int32_t *currentWord; // Pointer to current word within bitmap block
2220 register u_int32_t wordsLeft; // Number of words left in this bitmap block
2221 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
2222 u_int32_t firstBit; // Bit index within word of first bit to allocate
2223 u_int32_t numBits; // Number of bits in word to allocate
2224 u_int32_t *buffer = NULL;
2225 GenericLFBufPtr blockRef = NULL;
2226 u_int32_t bitsPerBlock;
2227 u_int32_t wordsPerBlock;
2228 // XXXdbg
2229 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2230
2231#if DEBUG
2232
2233 if (!ISSET(flags, HFS_ALLOC_COMMIT)
2234 || ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
2235 struct rl_entry *range;
2236 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS], rl_link) {
2237 hfs_assert(rl_overlap(range, startingBlock,
2238 startingBlock + numBlocks - 1) == RL_NOOVERLAP);
2239 }
2240 }
2241
2242#endif
2243
2244#if LF_HFS_CHECK_UNMAPPED
2245 int force_flush = 0;
2246 /*
2247 * Since we are about to mark these bits as in-use
2248 * in the bitmap, decide if we need to alert the caller
2249 * that a journal flush might be appropriate. It's safe to
2250 * poke at the journal pointer here since we MUST have
2251 * called start_transaction by the time this function is invoked.
2252 * If the journal is enabled, then it will have taken the requisite
2253 * journal locks. If it is not enabled, then we have taken
2254 * a shared lock on the global lock.
2255 */
2256 if (hfsmp->jnl) {
2257 uint32_t ignore;
2258 err = CheckUnmappedBytes (hfsmp, (uint64_t) startingBlock, (uint64_t)numBlocks, &force_flush, &ignore);
2259 if ((err == 0) && (force_flush)) {
2260 journal_request_immediate_flush (hfsmp->jnl);
2261 }
2262 }
2263
2264 hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
2265#endif
2266
2267 /*
2268 * Don't make changes to the disk if we're just reserving. Note that
2269 * we could do better in the tentative case because we could, in theory,
2270 * avoid the journal flush above. However, that would mean that we would
2271 * need to catch the callback to stop it incorrectly addding the extent
2272 * to our free cache.
2273 */
2274 if (ISSET(flags, HFS_ALLOC_LOCKED | HFS_ALLOC_TENTATIVE)) {
2275 err = 0;
2276 goto Exit;
2277 }
2278
2279 //
2280 // Pre-read the bitmap block containing the first word of allocation
2281 //
2282
2283 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2284 HFS_ALLOC_IGNORE_RESERVED);
2285 if (err != noErr) goto Exit;
2286 //
2287 // Initialize currentWord, and wordsLeft.
2288 //
2289 {
2290 u_int32_t wordIndexInBlock;
2291
2292 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
2293 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2294
2295 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2296 currentWord = buffer + wordIndexInBlock;
2297 wordsLeft = wordsPerBlock - wordIndexInBlock;
2298 }
2299
2300 // XXXdbg
2301 if (hfsmp->jnl) {
2302 journal_modify_block_start(hfsmp->jnl, blockRef);
2303 }
2304
2305 //
2306 // If the first block to allocate doesn't start on a word
2307 // boundary in the bitmap, then treat that first word
2308 // specially.
2309 //
2310
2311 firstBit = startingBlock % kBitsPerWord;
2312 if (firstBit != 0) {
2313 bitMask = kAllBitsSetInWord >> firstBit; // turn off all bits before firstBit
2314 numBits = kBitsPerWord - firstBit; // number of remaining bits in this word
2315 if (numBits > numBlocks) {
2316 numBits = numBlocks; // entire allocation is inside this one word
2317 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); // turn off bits after last
2318 }
2319#if DEBUG
2320 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2321 LFHFS_LOG(LEVEL_ERROR, "BlockMarkAllocatedInternal: blocks already allocated!");
2322 hfs_assert(0);
2323 }
2324#endif
2325 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
2326 numBlocks -= numBits; // adjust number of blocks left to allocate
2327
2328 ++currentWord; // move to next word
2329 --wordsLeft; // one less word left in this block
2330 }
2331
2332 //
2333 // Allocate whole words (32 blocks) at a time.
2334 //
2335
2336 bitMask = kAllBitsSetInWord; // put this in a register for 68K
2337 while (numBlocks >= kBitsPerWord) {
2338 if (wordsLeft == 0) {
2339 // Read in the next bitmap block
2340 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
2341
2342 buffer = NULL;
2343 err = ReleaseBitmapBlock(vcb, blockRef, true);
2344 if (err != noErr) goto Exit;
2345
2346 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2347 HFS_ALLOC_IGNORE_RESERVED);
2348 if (err != noErr) goto Exit;
2349
2350 // XXXdbg
2351 if (hfsmp->jnl) {
2352 journal_modify_block_start(hfsmp->jnl, blockRef);
2353 }
2354
2355 // Readjust currentWord and wordsLeft
2356 currentWord = buffer;
2357 wordsLeft = wordsPerBlock;
2358 }
2359#if DEBUG
2360 if (*currentWord != 0) {
2361 LFHFS_LOG(LEVEL_ERROR, "BlockMarkAllocatedInternal: blocks already allocated!");
2362 hfs_assert(0);
2363 }
2364#endif
2365 *currentWord = SWAP_BE32 (bitMask);
2366 numBlocks -= kBitsPerWord;
2367
2368 ++currentWord; // move to next word
2369 --wordsLeft; // one less word left in this block
2370 }
2371
2372 //
2373 // Allocate any remaining blocks.
2374 //
2375
2376 if (numBlocks != 0) {
2377 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
2378 if (wordsLeft == 0) {
2379 // Read in the next bitmap block
2380 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
2381
2382 buffer = NULL;
2383 err = ReleaseBitmapBlock(vcb, blockRef, true);
2384 if (err != noErr) goto Exit;
2385
2386 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2387 HFS_ALLOC_IGNORE_RESERVED);
2388 if (err != noErr) goto Exit;
2389 // XXXdbg
2390 if (hfsmp->jnl) {
2391 journal_modify_block_start(hfsmp->jnl, blockRef);
2392 }
2393 currentWord = buffer;
2394 }
2395#if DEBUG
2396 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
2397 LFHFS_LOG(LEVEL_ERROR, "BlockMarkAllocatedInternal: blocks already allocated!");
2398 hfs_assert(0);
2399 }
2400#endif
2401 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
2402
2403 // No need to update currentWord or wordsLeft
2404 }
2405
2406Exit:
2407
2408 if (buffer)
2409 (void)ReleaseBitmapBlock(vcb, blockRef, true);
2410
2411 return err;
2412}
2413
2414
2415/*
2416 * BlockMarkFree
2417 *
2418 * This is a wrapper function around the internal calls which will actually mark the blocks
2419 * as freed. It will mark the blocks in the red-black tree if appropriate. We need to do
2420 * this logic here to avoid callers having to deal with whether or not the red-black tree
2421 * is enabled.
2422 *
2423 */
2424OSErr BlockMarkFree(
2425 ExtendedVCB *vcb,
2426 u_int32_t startingBlock,
2427 register u_int32_t numBlocks)
2428{
2429 return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
2430}
2431
2432
2433/*
2434 * BlockMarkFreeUnused
2435 *
2436 * Scan the bitmap block beyond end of current file system for bits
2437 * that are marked as used. If any of the bits are marked as used,
2438 * this function marks them free.
2439 *
2440 * Note: This was specifically written to mark all bits beyond
2441 * end of current file system during hfs_extendfs(), which makes
2442 * sure that all the new blocks added to the file system are
2443 * marked as free. We expect that all the blocks beyond end of
2444 * current file system are always marked as free, but there might
2445 * be cases where are marked as used. This function assumes that
2446 * the number of blocks marked as used incorrectly are relatively
2447 * small, otherwise this can overflow journal transaction size
2448 * on certain file system configurations (example, large unused
2449 * bitmap with relatively small journal).
2450 *
2451 * Input:
2452 * startingBlock: First block of the range to mark unused
2453 * numBlocks: Number of blocks in the range to mark unused
2454 *
2455 * Returns: zero on success, non-zero on error.
2456 */
2457OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t numBlocks)
2458{
2459 int error = 0;
2460 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2461 u_int32_t curNumBlocks;
2462 u_int32_t bitsPerBlock;
2463 u_int32_t lastBit;
2464
2465 /* Use the optimal bitmap I/O size instead of bitmap block size */
2466 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
2467
2468 /*
2469 * First clear any non bitmap allocation block aligned bits
2470 *
2471 * Calculate the first bit in the bitmap block next to
2472 * the bitmap block containing the bit for startingBlock.
2473 * Using this value, we calculate the total number of
2474 * bits to be marked unused from startingBlock to the
2475 * end of bitmap block containing startingBlock.
2476 */
2477 lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
2478 curNumBlocks = lastBit - startingBlock;
2479 if (curNumBlocks > numBlocks) {
2480 curNumBlocks = numBlocks;
2481 }
2482 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
2483 if (error) {
2484 return error;
2485 }
2486 startingBlock += curNumBlocks;
2487 numBlocks -= curNumBlocks;
2488
2489 /*
2490 * Check a full bitmap block for any 'used' bit. If any bit is used,
2491 * mark all the bits only in that bitmap block as free. This ensures
2492 * that we do not write unmodified bitmap blocks and do not
2493 * overwhelm the journal.
2494 *
2495 * The code starts by checking full bitmap block at a time, and
2496 * marks entire bitmap block as free only if any bit in that bitmap
2497 * block is marked as used. In the end, it handles the last bitmap
2498 * block which might be partially full by only checking till the
2499 * caller-specified last bit and if any bit is set, only mark that
2500 * range as free.
2501 */
2502 while (numBlocks) {
2503 if (numBlocks >= bitsPerBlock) {
2504 curNumBlocks = bitsPerBlock;
2505 } else {
2506 curNumBlocks = numBlocks;
2507 }
2508 if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
2509 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
2510 if (error) {
2511 return error;
2512 }
2513 }
2514 startingBlock += curNumBlocks;
2515 numBlocks -= curNumBlocks;
2516 }
2517
2518 return error;
2519}
2520
2521/*
2522 _______________________________________________________________________
2523
2524 Routine: BlockMarkFreeInternal
2525
2526 Function: Mark a contiguous group of blocks as free (clear in the
2527 bitmap). It assumes those bits are currently marked
2528 allocated (set in the bitmap).
2529
2530 Inputs:
2531 vcb Pointer to volume where space is to be freed
2532 startingBlock First block number to mark as freed
2533 numBlocks Number of blocks to mark as freed
2534 do_validate If true, validate that the blocks being
2535 deallocated to check if they are within totalBlocks
2536 for current volume and whether they were allocated
2537 before they are marked free.
2538 _______________________________________________________________________
2539 */
2540static
2541OSErr BlockMarkFreeInternal(
2542 ExtendedVCB *vcb,
2543 u_int32_t startingBlock_in,
2544 register u_int32_t numBlocks_in,
2545 Boolean do_validate)
2546{
2547 OSErr err;
2548 u_int32_t startingBlock = startingBlock_in;
2549 u_int32_t numBlocks = numBlocks_in;
2550 uint32_t unmapStart = startingBlock_in;
2551 uint32_t unmapCount = numBlocks_in;
2552 uint32_t wordIndexInBlock;
2553 u_int32_t *currentWord; // Pointer to current word within bitmap block
2554 u_int32_t wordsLeft; // Number of words left in this bitmap block
2555 u_int32_t bitMask; // Word with given bits already set (ready to OR in)
2556 u_int32_t currentBit; // Bit index within word of current bit to allocate
2557 u_int32_t numBits; // Number of bits in word to allocate
2558 u_int32_t *buffer = NULL;
2559 GenericLFBufPtr blockRef = NULL;
2560 u_int32_t bitsPerBlock;
2561 u_int32_t wordsPerBlock;
2562 // XXXdbg
2563 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2564
2565 /*
2566 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
2567 * need to be able to free blocks being relocated during hfs_truncatefs.
2568 */
2569 if ((do_validate == true) &&
2570 (startingBlock + numBlocks > vcb->totalBlocks)) {
2571#if ALLOC_DEBUG || DEBUG
2572 LFHFS_LOG(LEVEL_ERROR, "lockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
2573 hfs_assert(0);
2574 __builtin_unreachable();
2575#else
2576 LFHFS_LOG(LEVEL_ERROR, "BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
2577 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
2578 err = EIO;
2579 goto Exit;
2580#endif
2581 }
2582
2583 //
2584 // Pre-read the bitmap block containing the first word of allocation
2585 //
2586
2587 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2588 HFS_ALLOC_IGNORE_RESERVED);
2589 if (err != noErr) goto Exit;
2590
2591 // XXXdbg
2592 if (hfsmp->jnl) {
2593 journal_modify_block_start(hfsmp->jnl, blockRef);
2594 }
2595
2596 uint32_t min_unmap = 0, max_unmap = UINT32_MAX;
2597
2598 // Work out the bounds of any unmap we can send down
2599 struct rl_entry *range;
2600 for (int i = 0; i < 2; ++i) {
2601 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
2602 if (range->rl_start < startingBlock
2603 && range->rl_end >= min_unmap) {
2604 min_unmap = (uint32_t)(range->rl_end + 1);
2605 }
2606 if (range->rl_end >= startingBlock + numBlocks
2607 && range->rl_start < max_unmap) {
2608 max_unmap = (uint32_t)range->rl_start;
2609 }
2610 }
2611 }
2612
2613 //
2614 // Figure out how many bits and words per bitmap block.
2615 //
2616 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
2617 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2618 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
2619
2620 //
2621 // Look for a range of free blocks immediately before startingBlock
2622 // (up to the start of the current bitmap block). Set unmapStart to
2623 // the first free block.
2624 //
2625 currentWord = buffer + wordIndexInBlock;
2626 currentBit = startingBlock % kBitsPerWord;
2627 bitMask = kHighBitInWordMask >> currentBit;
2628 while (unmapStart > min_unmap) {
2629 // Move currentWord/bitMask back by one bit
2630 bitMask <<= 1;
2631 if (bitMask == 0) {
2632 if (--currentWord < buffer)
2633 break;
2634 bitMask = kLowBitInWordMask;
2635 }
2636
2637 if (*currentWord & SWAP_BE32(bitMask))
2638 break; // Found an allocated block. Stop searching.
2639 --unmapStart;
2640 ++unmapCount;
2641 }
2642
2643 //
2644 // If the first block to free doesn't start on a word
2645 // boundary in the bitmap, then treat that first word
2646 // specially.
2647 //
2648
2649 currentWord = buffer + wordIndexInBlock;
2650 wordsLeft = wordsPerBlock - wordIndexInBlock;
2651 currentBit = startingBlock % kBitsPerWord;
2652 if (currentBit != 0) {
2653 bitMask = kAllBitsSetInWord >> currentBit; // turn off all bits before currentBit
2654 numBits = kBitsPerWord - currentBit; // number of remaining bits in this word
2655 if (numBits > numBlocks) {
2656 numBits = numBlocks; // entire allocation is inside this one word
2657 bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits)); // turn off bits after last
2658 }
2659 if ((do_validate == true) &&
2660 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
2661 goto Corruption;
2662 }
2663 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
2664 numBlocks -= numBits; // adjust number of blocks left to free
2665
2666 ++currentWord; // move to next word
2667 --wordsLeft; // one less word left in this block
2668 }
2669
2670 //
2671 // Free whole words (32 blocks) at a time.
2672 //
2673
2674 while (numBlocks >= kBitsPerWord) {
2675 if (wordsLeft == 0) {
2676 // Read in the next bitmap block
2677 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
2678
2679 buffer = NULL;
2680 err = ReleaseBitmapBlock(vcb, blockRef, true);
2681 if (err != noErr) goto Exit;
2682
2683 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2684 HFS_ALLOC_IGNORE_RESERVED);
2685 if (err != noErr) goto Exit;
2686 // XXXdbg
2687 if (hfsmp->jnl) {
2688 journal_modify_block_start(hfsmp->jnl, blockRef);
2689 }
2690
2691 // Readjust currentWord and wordsLeft
2692 currentWord = buffer;
2693 wordsLeft = wordsPerBlock;
2694 }
2695 if ((do_validate == true) &&
2696 (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
2697 goto Corruption;
2698 }
2699 *currentWord = 0; // clear the entire word
2700 numBlocks -= kBitsPerWord;
2701
2702 ++currentWord; // move to next word
2703 --wordsLeft; // one less word left in this block
2704 }
2705
2706 //
2707 // Free any remaining blocks.
2708 //
2709
2710 if (numBlocks != 0) {
2711 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
2712 if (wordsLeft == 0) {
2713 // Read in the next bitmap block
2714 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
2715
2716 buffer = NULL;
2717 err = ReleaseBitmapBlock(vcb, blockRef, true);
2718 if (err != noErr) goto Exit;
2719
2720 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
2721 HFS_ALLOC_IGNORE_RESERVED);
2722 if (err != noErr) goto Exit;
2723
2724 // XXXdbg
2725 if (hfsmp->jnl) {
2726 journal_modify_block_start(hfsmp->jnl, blockRef);
2727 }
2728
2729 currentWord = buffer;
2730 }
2731 if ((do_validate == true) &&
2732 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
2733 goto Corruption;
2734 }
2735 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
2736
2737 // No need to update currentWord or wordsLeft
2738 }
2739
2740 //
2741 // Look for a range of free blocks immediately after the range we just freed
2742 // (up to the end of the current bitmap block).
2743 //
2744 wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord;
2745 wordsLeft = wordsPerBlock - wordIndexInBlock;
2746 currentWord = buffer + wordIndexInBlock;
2747 currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord;
2748 bitMask = kHighBitInWordMask >> currentBit;
2749 while (unmapStart + unmapCount < max_unmap) {
2750 // Move currentWord/bitMask/wordsLeft forward one bit
2751 bitMask >>= 1;
2752 if (bitMask == 0) {
2753 if (--wordsLeft == 0)
2754 break;
2755 ++currentWord;
2756 bitMask = kHighBitInWordMask;
2757 }
2758
2759 if (*currentWord & SWAP_BE32(bitMask))
2760 break; // Found an allocated block. Stop searching.
2761 ++unmapCount;
2762 }
2763
2764Exit:
2765
2766 if (buffer)
2767 (void)ReleaseBitmapBlock(vcb, blockRef, true);
2768 return err;
2769
2770Corruption:
2771#if DEBUG
2772 LFHFS_LOG(LEVEL_ERROR, "BlockMarkFreeInternal: blocks not allocated!");
2773 hfs_assert(0);
2774 __builtin_unreachable();
2775#else
2776 LFHFS_LOG(LEVEL_ERROR, "BlockMarkFreeInternal() trying to free unallocated blocks on volume %s <%u, %u>\n",
2777 vcb->vcbVN, startingBlock_in, numBlocks_in);
2778
2779 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
2780 err = EIO;
2781 goto Exit;
2782#endif
2783}
2784
2785
2786/*
2787 _______________________________________________________________________
2788
2789 Routine: BlockFindContiguous
2790
2791 Function: Find a contiguous range of blocks that are free (bits
2792 clear in the bitmap). If a contiguous range of the
2793 minimum size can't be found, an error will be returned.
2794 This is only needed to support the bitmap-scanning logic,
2795 as the red-black tree should be able to do this by internally
2796 searching its tree.
2797
2798 Inputs:
2799 vcb Pointer to volume where space is to be allocated
2800 startingBlock Preferred first block of range
2801 endingBlock Last possible block in range + 1
2802 minBlocks Minimum number of blocks needed. Must be > 0.
2803 maxBlocks Maximum (ideal) number of blocks desired
2804 useMetaZone OK to dip into metadata allocation zone
2805
2806 Outputs:
2807 actualStartBlock First block of range found, or 0 if error
2808 actualNumBlocks Number of blocks found, or 0 if error
2809
2810 Returns:
2811 noErr Found at least minBlocks contiguous
2812 dskFulErr No contiguous space found, or all less than minBlocks
2813 _______________________________________________________________________
2814 */
2815
2816static OSErr BlockFindContiguous(
2817 ExtendedVCB *vcb,
2818 u_int32_t startingBlock,
2819 u_int32_t endingBlock,
2820 u_int32_t minBlocks,
2821 u_int32_t maxBlocks,
2822 Boolean useMetaZone,
2823 Boolean trustSummary,
2824 u_int32_t *actualStartBlock,
2825 u_int32_t *actualNumBlocks,
2826 hfs_block_alloc_flags_t flags)
2827{
2828 OSErr err;
2829 register u_int32_t currentBlock; // Block we're currently looking at.
2830 u_int32_t firstBlock; // First free block in current extent.
2831 u_int32_t stopBlock; // If we get to this block, stop searching for first free block.
2832 u_int32_t foundBlocks; // Number of contiguous free blocks in current extent.
2833 u_int32_t *buffer = NULL;
2834 register u_int32_t *currentWord;
2835 register u_int32_t bitMask;
2836 register u_int32_t wordsLeft;
2837 register u_int32_t tempWord;
2838 GenericLFBufPtr blockRef = 0;
2839 u_int32_t wordsPerBlock;
2840 struct hfsmount *hfsmp = (struct hfsmount*) vcb;
2841 HFSPlusExtentDescriptor best = { 0, 0 };
2842
2843 /*
2844 * When we're skipping the metadata zone and the start/end
2845 * range overlaps with the metadata zone then adjust the
2846 * start to be outside of the metadata zone. If the range
2847 * is entirely inside the metadata zone then we can deny the
2848 * request (dskFulErr).
2849 */
2850 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
2851 if (startingBlock <= vcb->hfs_metazone_end) {
2852 if (endingBlock > (vcb->hfs_metazone_end + 2))
2853 startingBlock = vcb->hfs_metazone_end + 1;
2854 else
2855 goto DiskFull;
2856 }
2857 }
2858
2859 if ((endingBlock - startingBlock) < minBlocks)
2860 {
2861 // The set of blocks we're checking is smaller than the minimum number
2862 // of blocks, so we couldn't possibly find a good range.
2863 goto DiskFull;
2864 }
2865
2866 stopBlock = endingBlock - minBlocks + 1;
2867 currentBlock = startingBlock;
2868 firstBlock = 0;
2869
2870 /*
2871 * Skip over metadata blocks.
2872 */
2873 if (!useMetaZone)
2874 currentBlock = NextBitmapBlock(vcb, currentBlock);
2875
2876 /*
2877 * Use the summary table if we can. Skip over any totally
2878 * allocated blocks. currentBlock should now point to the first
2879 * block beyond the metadata zone if the metazone allocations are not
2880 * allowed in this invocation.
2881 */
2882 if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
2883 uint32_t suggestion;
2884 err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
2885 if (err && err != ENOSPC)
2886 goto ErrorExit;
2887 if (err == ENOSPC || suggestion >= stopBlock)
2888 goto DiskFull;
2889 currentBlock = suggestion;
2890 }
2891
2892
2893 //
2894 // Pre-read the first bitmap block.
2895 //
2896 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
2897 if ( err != noErr ) goto ErrorExit;
2898
2899 //
2900 // Figure out where currentBlock is within the buffer.
2901 //
2902 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2903
2904 wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1); // Current index into buffer
2905 currentWord = buffer + wordsLeft;
2906 wordsLeft = wordsPerBlock - wordsLeft;
2907
2908 uint32_t remaining = (hfsmp->freeBlocks - hfsmp->lockedBlocks
2909 - (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
2910 ? 0 : hfsmp->tentativeBlocks));
2911
2912 /*
2913 * This outer do-while loop is the main body of this function. Its job is
2914 * to search through the blocks (until we hit 'stopBlock'), and iterate
2915 * through swaths of allocated bitmap until it finds free regions.
2916 */
2917
2918 do
2919 {
2920 foundBlocks = 0;
2921 /*
2922 * We will try and update the summary table as we search
2923 * below. Note that we will never update the summary table
2924 * for the first and last blocks that the summary table
2925 * covers. Ideally, we should, but the benefits probably
2926 * aren't that significant so we leave things alone for now.
2927 */
2928 uint32_t summary_block_scan = 0;
2929 /*
2930 * Inner while loop 1:
2931 * Look for free blocks, skipping over allocated ones.
2932 *
2933 * Initialization starts with checking the initial partial word
2934 * if applicable.
2935 */
2936 bitMask = currentBlock & kBitsWithinWordMask;
2937 if (bitMask)
2938 {
2939 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
2940 bitMask = kHighBitInWordMask >> bitMask;
2941 while (tempWord & bitMask)
2942 {
2943 bitMask >>= 1;
2944 ++currentBlock;
2945 }
2946
2947 // Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)?
2948 if (bitMask)
2949 goto FoundUnused;
2950
2951 // Didn't find any unused bits, so we're done with this word.
2952 ++currentWord;
2953 --wordsLeft;
2954 }
2955
2956 //
2957 // Check whole words
2958 //
2959 while (currentBlock < stopBlock)
2960 {
2961 // See if it's time to read another block.
2962 if (wordsLeft == 0)
2963 {
2964 buffer = NULL;
2965 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2966 /*
2967 * If summary_block_scan is non-zero, then we must have
2968 * pulled a bitmap file block into core, and scanned through
2969 * the entire thing. Because we're in this loop, we are
2970 * implicitly trusting that the bitmap didn't have any knowledge
2971 * about this particular block. As a result, update the bitmap
2972 * (lazily, now that we've scanned it) with our findings that
2973 * this particular block is completely used up.
2974 */
2975 if (summary_block_scan != 0) {
2976 uint32_t summary_bit;
2977 err = hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
2978 if (err != noErr) goto ErrorExit;
2979 hfs_set_summary (hfsmp, summary_bit, 1);
2980 }
2981 }
2982 err = ReleaseBitmapBlock(vcb, blockRef, false);
2983 if (err != noErr) goto ErrorExit;
2984
2985 /*
2986 * Skip over metadata blocks.
2987 */
2988 if (!useMetaZone) {
2989 currentBlock = NextBitmapBlock(vcb, currentBlock);
2990 if (currentBlock >= stopBlock) {
2991 goto LoopExit;
2992 }
2993 }
2994
2995 /* Skip over fully allocated bitmap blocks if we can */
2996 if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
2997 uint32_t suggestion;
2998 err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
2999 if (err && err != ENOSPC)
3000 goto ErrorExit;
3001 if (err == ENOSPC || suggestion >= stopBlock)
3002 goto LoopExit;
3003 currentBlock = suggestion;
3004 }
3005
3006 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
3007 if ( err != noErr ) goto ErrorExit;
3008
3009 /*
3010 * Set summary_block_scan to be the block we just read into the block cache.
3011 *
3012 * At this point, we've just read an allocation block worth of bitmap file
3013 * into the buffer above, but we don't know if it is completely allocated or not.
3014 * If we find that it is completely allocated/full then we will jump
3015 * through this loop again and set the appropriate summary bit as fully allocated.
3016 */
3017 summary_block_scan = currentBlock;
3018 currentWord = buffer;
3019 wordsLeft = wordsPerBlock;
3020 }
3021
3022 // See if any of the bits are clear
3023 if ((tempWord = SWAP_BE32(*currentWord)) + 1) // non-zero if any bits were clear
3024 {
3025 // Figure out which bit is clear
3026 bitMask = kHighBitInWordMask;
3027 while (tempWord & bitMask)
3028 {
3029 bitMask >>= 1;
3030 ++currentBlock;
3031 }
3032
3033 break; // Found the free bit; break out to FoundUnused.
3034 }
3035
3036 // Keep looking at the next word
3037 currentBlock += kBitsPerWord;
3038 ++currentWord;
3039 --wordsLeft;
3040 }
3041
3042 FoundUnused:
3043 // Make sure the unused bit is early enough to use
3044 if (currentBlock >= stopBlock)
3045 {
3046 break;
3047 }
3048
3049 // Remember the start of the extent
3050 firstBlock = currentBlock;
3051
3052
3053 /*
3054 * Inner while loop 2:
3055 * We get here if we find a free block. Count the number
3056 * of contiguous free blocks observed.
3057 *
3058 * Initialization starts with checking the initial partial word
3059 * if applicable.
3060 */
3061 bitMask = currentBlock & kBitsWithinWordMask;
3062 if (bitMask)
3063 {
3064 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
3065 bitMask = kHighBitInWordMask >> bitMask;
3066 while (bitMask && !(tempWord & bitMask))
3067 {
3068 bitMask >>= 1;
3069 ++currentBlock;
3070 }
3071
3072 // Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)?
3073 if (bitMask)
3074 goto FoundUsed;
3075
3076 // Didn't find any used bits, so we're done with this word.
3077 ++currentWord;
3078 --wordsLeft;
3079 }
3080
3081 //
3082 // Check whole words
3083 //
3084 while (currentBlock < endingBlock)
3085 {
3086 // See if it's time to read another block.
3087 if (wordsLeft == 0)
3088 {
3089 buffer = NULL;
3090 err = ReleaseBitmapBlock(vcb, blockRef, false);
3091 if (err != noErr) goto ErrorExit;
3092
3093 /*
3094 * Skip over metadata blocks.
3095 */
3096 if (!useMetaZone) {
3097 u_int32_t nextBlock;
3098
3099 nextBlock = NextBitmapBlock(vcb, currentBlock);
3100 if (nextBlock != currentBlock) {
3101 goto LoopExit; /* allocation gap, so stop */
3102 }
3103 }
3104
3105 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
3106 if ( err != noErr ) goto ErrorExit;
3107
3108 currentWord = buffer;
3109 wordsLeft = wordsPerBlock;
3110 }
3111
3112 // See if any of the bits are set
3113 if ((tempWord = SWAP_BE32(*currentWord)) != 0)
3114 {
3115 // Figure out which bit is set
3116 bitMask = kHighBitInWordMask;
3117 while (!(tempWord & bitMask))
3118 {
3119 bitMask >>= 1;
3120 ++currentBlock;
3121 }
3122
3123 break; // Found the used bit; break out to FoundUsed.
3124 }
3125
3126 // Keep looking at the next word
3127 currentBlock += kBitsPerWord;
3128 ++currentWord;
3129 --wordsLeft;
3130
3131 // If we found at least maxBlocks, we can quit early.
3132 if ((currentBlock - firstBlock) >= maxBlocks)
3133 break;
3134 }
3135
3136 FoundUsed:
3137 // Make sure we didn't run out of bitmap looking for a used block.
3138 // If so, pin to the end of the bitmap.
3139 if (currentBlock > endingBlock)
3140 currentBlock = endingBlock;
3141
3142 // Figure out how many contiguous free blocks there were.
3143 // Pin the answer to maxBlocks.
3144 foundBlocks = currentBlock - firstBlock;
3145 if (foundBlocks > maxBlocks)
3146 foundBlocks = maxBlocks;
3147
3148 if (remaining) {
3149 if (foundBlocks > remaining) {
3150 LFHFS_LOG( LEVEL_DEBUG, "hfs: found more blocks than are indicated free!\n");
3151 remaining = UINT32_MAX;
3152 } else
3153 remaining -= foundBlocks;
3154 }
3155
3156 if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
3157 if (foundBlocks > best.blockCount) {
3158 best.startBlock = firstBlock;
3159 best.blockCount = foundBlocks;
3160 }
3161
3162 if (foundBlocks >= maxBlocks || best.blockCount >= remaining)
3163 break;
3164
3165 /*
3166 * Note that we will go ahead and add this free extent to our
3167 * cache below but that's OK because we'll remove it again if we
3168 * decide to use this extent.
3169 */
3170 } else if (foundBlocks >= minBlocks)
3171 break; // Found what we needed!
3172
3173 /*
3174 * We did not find the total blocks we were looking for, but
3175 * add this free block run to our free extent cache list, if possible.
3176 */
3177
3178 // If we're ignoring tentative ranges, we need to account for them here
3179 if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
3180 struct rl_entry free_extent = rl_make(firstBlock, firstBlock + foundBlocks - 1);
3181 struct rl_entry *range;;
3182 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS], rl_link) {
3183 rl_subtract(&free_extent, range);
3184 if (rl_len(range) == 0)
3185 break;
3186 }
3187 firstBlock = (uint32_t)free_extent.rl_start;
3188 foundBlocks = (uint32_t)rl_len(&free_extent);
3189 }
3190 } while (currentBlock < stopBlock);
3191LoopExit:
3192
3193 if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
3194 firstBlock = best.startBlock;
3195 foundBlocks = best.blockCount;
3196 }
3197
3198 // Return the outputs.
3199 if (foundBlocks < minBlocks)
3200 {
3201 DiskFull:
3202 err = dskFulErr;
3203 ErrorExit:
3204 *actualStartBlock = 0;
3205 *actualNumBlocks = 0;
3206 }
3207 else
3208 {
3209 err = noErr;
3210 *actualStartBlock = firstBlock;
3211 *actualNumBlocks = foundBlocks;
3212 /*
3213 * Sanity check for overflow
3214 */
3215 if ((firstBlock + foundBlocks) > vcb->allocLimit) {
3216 LFHFS_LOG(LEVEL_ERROR, "blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
3217 vcb->vcbVN, startingBlock, endingBlock, currentBlock,
3218 firstBlock, stopBlock, minBlocks, foundBlocks);
3219 hfs_assert(0);
3220 }
3221 }
3222
3223 if (buffer)
3224 (void) ReleaseBitmapBlock(vcb, blockRef, false);
3225
3226 return err;
3227}
3228
3229
3230/*
3231 * Count number of bits set in the given 32-bit unsigned number
3232 *
3233 * Returns:
3234 * Number of bits set
3235 */
3236static int num_bits_set(u_int32_t num)
3237{
3238 return __builtin_popcount(num);
3239}
3240
3241/*
3242 * For a given range of blocks, find the total number of blocks
3243 * allocated. If 'stop_on_first' is true, it stops as soon as it
3244 * encounters the first allocated block. This option is useful
3245 * to determine if any block is allocated or not.
3246 *
3247 * Inputs:
3248 * startingBlock First allocation block number of the range to be scanned.
3249 * numBlocks Total number of blocks that need to be scanned.
3250 * stop_on_first Stop the search after the first allocated block is found.
3251 *
3252 * Output:
3253 * allocCount Total number of allocation blocks allocated in the given range.
3254 *
3255 * On error, it is the number of allocated blocks found
3256 * before the function got an error.
3257 *
3258 * If 'stop_on_first' is set,
3259 * allocCount = 1 if any allocated block was found.
3260 * allocCount = 0 if no allocated block was found.
3261 *
3262 * Returns:
3263 * 0 on success, non-zero on failure.
3264 */
3265static int
3266hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock,
3267 u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
3268{
3269 u_int32_t *currentWord; // Pointer to current word within bitmap block
3270 u_int32_t wordsLeft; // Number of words left in this bitmap block
3271 u_int32_t bitMask; // Word with given bits already set (ready to test)
3272 u_int32_t firstBit; // Bit index within word of first bit to allocate
3273 u_int32_t numBits; // Number of bits in word to allocate
3274 u_int32_t *buffer = NULL;
3275 GenericLFBufPtr blockRef;
3276 u_int32_t bitsPerBlock;
3277 u_int32_t wordsPerBlock;
3278 u_int32_t blockCount = 0;
3279 int error;
3280
3281 /*
3282 * Pre-read the bitmap block containing the first word of allocation
3283 */
3284 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
3285 HFS_ALLOC_IGNORE_TENTATIVE);
3286 if (error)
3287 goto JustReturn;
3288
3289 /*
3290 * Initialize currentWord, and wordsLeft.
3291 */
3292 {
3293 u_int32_t wordIndexInBlock;
3294
3295 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
3296 wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
3297
3298 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3299 currentWord = buffer + wordIndexInBlock;
3300 wordsLeft = wordsPerBlock - wordIndexInBlock;
3301 }
3302
3303 /*
3304 * First test any non word aligned bits.
3305 */
3306 firstBit = startingBlock % kBitsPerWord;
3307 if (firstBit != 0) {
3308 bitMask = kAllBitsSetInWord >> firstBit;
3309 numBits = kBitsPerWord - firstBit;
3310 if (numBits > numBlocks) {
3311 numBits = numBlocks;
3312 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
3313 }
3314 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3315 if (stop_on_first) {
3316 blockCount = 1;
3317 goto Exit;
3318 }
3319 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3320 }
3321 numBlocks -= numBits;
3322 ++currentWord;
3323 --wordsLeft;
3324 }
3325
3326 /*
3327 * Test whole words (32 blocks) at a time.
3328 */
3329 while (numBlocks >= kBitsPerWord) {
3330 if (wordsLeft == 0) {
3331 /* Read in the next bitmap block. */
3332 startingBlock += bitsPerBlock;
3333
3334 buffer = NULL;
3335 error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3336 if (error) goto Exit;
3337
3338 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
3339 HFS_ALLOC_IGNORE_TENTATIVE);
3340 if (error) goto Exit;
3341
3342 /* Readjust currentWord and wordsLeft. */
3343 currentWord = buffer;
3344 wordsLeft = wordsPerBlock;
3345 }
3346 if (*currentWord != 0) {
3347 if (stop_on_first) {
3348 blockCount = 1;
3349 goto Exit;
3350 }
3351 blockCount += num_bits_set(*currentWord);
3352 }
3353 numBlocks -= kBitsPerWord;
3354 ++currentWord;
3355 --wordsLeft;
3356 }
3357
3358 /*
3359 * Test any remaining blocks.
3360 */
3361 if (numBlocks != 0) {
3362 bitMask = ~(kAllBitsSetInWord >> numBlocks);
3363 if (wordsLeft == 0) {
3364 /* Read in the next bitmap block */
3365 startingBlock += bitsPerBlock;
3366
3367 buffer = NULL;
3368 error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3369 if (error) goto Exit;
3370
3371 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
3372 HFS_ALLOC_IGNORE_TENTATIVE);
3373 if (error) goto Exit;
3374
3375 currentWord = buffer;
3376 }
3377 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3378 if (stop_on_first) {
3379 blockCount = 1;
3380 goto Exit;
3381 }
3382 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3383 }
3384 }
3385Exit:
3386 if (buffer) {
3387 (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
3388 }
3389 if (allocCount) {
3390 *allocCount = blockCount;
3391 }
3392
3393JustReturn:
3394
3395 return (error);
3396}
3397
3398/*
3399 * Count total number of blocks that are allocated in the given
3400 * range from the bitmap. This is used to preflight total blocks
3401 * that need to be relocated during volume resize.
3402 *
3403 * The journal or allocation file lock must be held.
3404 *
3405 * Returns:
3406 * 0 on success, non-zero on failure.
3407 * On failure, allocCount is zero.
3408 */
3409int
3410hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
3411 u_int32_t numBlocks, u_int32_t *allocCount)
3412{
3413 return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
3414}
3415
3416/*
3417 * Test to see if any blocks in a range are allocated.
3418 *
3419 * Note: On error, this function returns 1, which means that
3420 * one or more blocks in the range are allocated. This function
3421 * is primarily used for volume resize and we do not want
3422 * to report to the caller that the blocks are free when we
3423 * were not able to deterministically find it out. So on error,
3424 * we always report that the blocks are allocated.
3425 *
3426 * The journal or allocation file lock must be held.
3427 *
3428 * Returns
3429 * 0 if all blocks in the range are free.
3430 * 1 if blocks in the range are allocated, or there was an error.
3431 */
3432int
3433hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
3434{
3435 int error;
3436 u_int32_t allocCount;
3437
3438 error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
3439 if (error) {
3440 /* On error, we always say that the blocks are allocated
3441 * so that volume resize does not return false success.
3442 */
3443 return 1;
3444 } else {
3445 /* The function was deterministically able to find out
3446 * if there was any block allocated or not. In that case,
3447 * the value in allocCount is good enough to be returned
3448 * back to the caller.
3449 */
3450 return allocCount;
3451 }
3452}
3453
3454/*
3455 * CONFIG_HFS_RBTREE
3456 * Check to see if the red-black tree is live. Allocation file lock must be held
3457 * shared or exclusive to call this function. Note that we may call this even if
3458 * HFS is built without activating the red-black tree code.
3459 */
3460int
3461hfs_isrbtree_active(struct hfsmount *hfsmp){
3462
3463#pragma unused (hfsmp)
3464
3465 /* Just return 0 for now */
3466 return 0;
3467}
3468
3469
3470
3471/* Summary Table Functions */
3472/*
3473 * hfs_check_summary:
3474 *
3475 * This function should be used to query the summary table to see if we can
3476 * bypass a bitmap block or not when we're trying to find a free allocation block.
3477 *
3478 *
3479 * Inputs:
3480 * allocblock - allocation block number. Will be used to infer the correct summary bit.
3481 * hfsmp -- filesystem in question.
3482 *
3483 * Output Arg:
3484 * *freeblocks - set to 1 if we believe at least one free blocks in this vcbVBMIOSize
3485 * page of bitmap file.
3486 *
3487 *
3488 * Returns:
3489 * 0 on success
3490 * EINVAL on error
3491 *
3492 */
3493
3494static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t allocblock, uint32_t *freeblocks) {
3495
3496 int err = EINVAL;
3497 if (hfsmp->vcbVBMIOSize) {
3498 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3499 uint32_t index;
3500 if (hfs_get_summary_index (hfsmp, allocblock, &index)) {
3501 *freeblocks = 0;
3502 return EINVAL;
3503 }
3504
3505 /* Ok, now that we have the bit index into the array, what byte is it in ? */
3506 uint32_t byteindex = index / kBitsPerByte;
3507 uint8_t current_byte = hfsmp->hfs_summary_table[byteindex];
3508 uint8_t bit_in_byte = index % kBitsPerByte;
3509
3510 if (current_byte & (1 << bit_in_byte)) {
3511 /*
3512 * We do not believe there is anything free in the
3513 * entire vcbVBMIOSize'd block.
3514 */
3515 *freeblocks = 0;
3516 }
3517 else {
3518 /* Looks like there might be a free block here... */
3519 *freeblocks = 1;
3520 }
3521 }
3522 err = 0;
3523 }
3524
3525 return err;
3526}
3527
3528/*
3529 * hfs_release_summary
3530 *
3531 * Given an extent that is about to be de-allocated on-disk, determine the number
3532 * of summary bitmap bits that need to be marked as 'potentially available'.
3533 * Then go ahead and mark them as free.
3534 *
3535 * Inputs:
3536 * hfsmp - hfs mount
3537 * block - starting allocation block.
3538 * length - length of the extent.
3539 *
3540 * Returns:
3541 * EINVAL upon any errors.
3542 */
3543static int hfs_release_summary(struct hfsmount *hfsmp, uint32_t start_blk, uint32_t length) {
3544 int err = EINVAL;
3545 uint32_t end_blk = (start_blk + length) - 1;
3546
3547 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3548 /* Figure out what the starting / ending block's summary bits are */
3549 uint32_t start_bit;
3550 uint32_t end_bit;
3551 uint32_t current_bit;
3552
3553 err = hfs_get_summary_index (hfsmp, start_blk, &start_bit);
3554 if (err) {
3555 goto release_err;
3556 }
3557 err = hfs_get_summary_index (hfsmp, end_blk, &end_bit);
3558 if (err) {
3559 goto release_err;
3560 }
3561
3562 if (ALLOC_DEBUG) {
3563 if (start_bit > end_bit) {
3564 LFHFS_LOG(LEVEL_ERROR, "hfs_release_summary: start > end!, %d %d ", start_bit, end_bit);
3565 hfs_assert(0);
3566 }
3567 }
3568 current_bit = start_bit;
3569 while (current_bit <= end_bit) {
3570 err = hfs_set_summary (hfsmp, current_bit, 0);
3571 current_bit++;
3572 }
3573 }
3574
3575release_err:
3576 return err;
3577}
3578
3579/*
3580 * hfs_find_summary_free
3581 *
3582 * Given a allocation block as input, returns an allocation block number as output as a
3583 * suggestion for where to start scanning the bitmap in order to find free blocks. It will
3584 * determine the vcbVBMIOsize of the input allocation block, convert that into a summary
3585 * bit, then keep iterating over the summary bits in order to find the first free one.
3586 *
3587 * Inputs:
3588 * hfsmp - hfs mount
3589 * block - starting allocation block
3590 * newblock - output block as suggestion
3591 *
3592 * Returns:
3593 * 0 on success
3594 * ENOSPC if we could not find a free block
3595 */
3596
3597int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
3598
3599 int err = ENOSPC;
3600 uint32_t bit_index = 0;
3601 uint32_t maybe_has_blocks = 0;
3602
3603 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3604 uint32_t byte_index;
3605 uint8_t curbyte;
3606 uint8_t bit_in_byte;
3607 uint32_t summary_cap;
3608
3609 /*
3610 * We generate a cap for the summary search because the summary table
3611 * always represents a full summary of the bitmap FILE, which may
3612 * be way more bits than are necessary for the actual filesystem
3613 * whose allocations are mapped by the bitmap.
3614 *
3615 * Compute how much of hfs_summary_size is useable for the given number
3616 * of allocation blocks eligible on this FS.
3617 */
3618 err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit - 1, &summary_cap);
3619 if (err) {
3620 goto summary_exit;
3621 }
3622
3623 /* Check the starting block first */
3624 err = hfs_check_summary (hfsmp, block, &maybe_has_blocks);
3625 if (err) {
3626 goto summary_exit;
3627 }
3628
3629 if (maybe_has_blocks) {
3630 /*
3631 * It looks like the initial start block could have something.
3632 * Short-circuit and just use that.
3633 */
3634 *newblock = block;
3635 goto summary_exit;
3636 }
3637
3638 /*
3639 * OK, now we know that the first block was useless.
3640 * Get the starting summary bit, and find it in the array
3641 */
3642 maybe_has_blocks = 0;
3643 err = hfs_get_summary_index (hfsmp, block, &bit_index);
3644 if (err) {
3645 goto summary_exit;
3646 }
3647
3648 /* Iterate until we find something. */
3649 while (bit_index <= summary_cap) {
3650 byte_index = bit_index / kBitsPerByte;
3651 curbyte = hfsmp->hfs_summary_table[byte_index];
3652 bit_in_byte = bit_index % kBitsPerByte;
3653
3654 if (curbyte & (1 << bit_in_byte)) {
3655 /* nothing here. increment and move on */
3656 bit_index++;
3657 }
3658 else {
3659 /*
3660 * found something! convert bit_index back into
3661 * an allocation block for use. 'newblock' will now
3662 * contain the proper allocation block # based on the bit
3663 * index.
3664 */
3665 err = hfs_get_summary_allocblock (hfsmp, bit_index, newblock);
3666 if (err) {
3667 goto summary_exit;
3668 }
3669 maybe_has_blocks = 1;
3670 break;
3671 }
3672 }
3673
3674 /* If our loop didn't find anything, set err to ENOSPC */
3675 if (maybe_has_blocks == 0) {
3676 err = ENOSPC;
3677 }
3678 }
3679
3680 /* If the summary table is not active for this mount, we'll just return ENOSPC */
3681summary_exit:
3682 if (maybe_has_blocks) {
3683 err = 0;
3684 }
3685
3686 return err;
3687}
3688
3689/*
3690 * hfs_get_summary_allocblock
3691 *
3692 * Convert a summary bit into an allocation block number to use to start searching for free blocks.
3693 *
3694 * Inputs:
3695 * hfsmp - hfs mount
3696 * summarybit - summmary bit index
3697 * *alloc - allocation block number in the bitmap file.
3698 *
3699 * Output:
3700 * 0 on success
3701 * EINVAL on failure
3702 */
3703int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t
3704 summarybit, uint32_t *alloc) {
3705 uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
3706 uint32_t allocblk;
3707
3708 allocblk = summarybit * bits_per_iosize;
3709
3710 if (allocblk >= hfsmp->totalBlocks) {
3711 return EINVAL;
3712 }
3713 else {
3714 *alloc = allocblk;
3715 }
3716
3717 return 0;
3718}
3719
3720
3721/*
3722 * hfs_set_summary:
3723 *
3724 * This function should be used to manipulate the summary table
3725 *
3726 * The argument 'inuse' will set the value of the bit in question to one or zero
3727 * depending on its value.
3728 *
3729 * Inputs:
3730 * hfsmp - hfs mount
3731 * summarybit - the bit index into the summary table to set/unset.
3732 * inuse - the value to assign to the bit.
3733 *
3734 * Returns:
3735 * 0 on success
3736 * EINVAL on error
3737 *
3738 */
3739
3740static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse) {
3741
3742 int err = EINVAL;
3743 if (hfsmp->vcbVBMIOSize) {
3744 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3745
3746 if (ALLOC_DEBUG) {
3747 if (hfsmp->hfs_summary_table == NULL) {
3748 LFHFS_LOG(LEVEL_ERROR, "hfs_set_summary: no table for %p ", hfsmp);
3749 hfs_assert(0);
3750 }
3751 }
3752
3753 /* Ok, now that we have the bit index into the array, what byte is it in ? */
3754 uint32_t byte_index = summarybit / kBitsPerByte;
3755 uint8_t current_byte = hfsmp->hfs_summary_table[byte_index];
3756 uint8_t bit_in_byte = summarybit % kBitsPerByte;
3757
3758 if (inuse) {
3759 current_byte = (current_byte | (1 << bit_in_byte));
3760 }
3761 else {
3762 current_byte = (current_byte & ~(1 << bit_in_byte));
3763 }
3764
3765 hfsmp->hfs_summary_table[byte_index] = current_byte;
3766 }
3767 err = 0;
3768 }
3769
3770 return err;
3771}
3772
3773
3774/*
3775 * hfs_get_summary_index:
3776 *
3777 * This is a helper function which determines what summary bit represents the vcbVBMIOSize worth
3778 * of IO against the bitmap file.
3779 *
3780 * Returns:
3781 * 0 on success
3782 * EINVAL on failure
3783 */
3784static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t* index) {
3785 uint32_t summary_bit;
3786 uint32_t bits_per_iosize;
3787 int err = EINVAL;
3788
3789 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3790 /* Is the input block bigger than the total number of blocks? */
3791 if (block >= hfsmp->totalBlocks) {
3792 return EINVAL;
3793 }
3794
3795 /* Is there even a vbmIOSize set? */
3796 if (hfsmp->vcbVBMIOSize == 0) {
3797 return EINVAL;
3798 }
3799
3800 bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
3801
3802 summary_bit = block / bits_per_iosize;
3803
3804 *index = summary_bit;
3805 err = 0;
3806 }
3807
3808 return err;
3809}
3810
3811/*
3812 * hfs_init_summary
3813 *
3814 * From a given mount structure, compute how big the summary table should be for the given
3815 * filesystem, then allocate and bzero the memory.
3816 *
3817 * Returns:
3818 * 0 on success
3819 * EINVAL on failure
3820 */
3821int
3822hfs_init_summary (struct hfsmount *hfsmp) {
3823
3824 uint32_t summary_size;
3825 uint32_t summary_size_bytes;
3826 uint8_t *summary_table;
3827
3828 if (hfsmp->hfs_allocation_cp == NULL) {
3829 if (ALLOC_DEBUG) {
3830 LFHFS_LOG(LEVEL_DEBUG, "hfs_init_summary: summary table cannot progress without a bitmap cnode! \n");
3831 }
3832 return EINVAL;
3833 }
3834 /*
3835 * The practical maximum size of the summary table is 16KB:
3836 *
3837 * (512MB maximum bitmap size / (4k -- min alloc block size)) / 8 bits/byte.
3838 *
3839 * HFS+ will allow filesystems with allocation block sizes smaller than 4k, but
3840 * the end result is that we'll start to issue I/O in 2k or 1k sized chunks, which makes
3841 * supporting this much worse. The math would instead look like this:
3842 * (512MB / 2k) / 8 == 32k.
3843 *
3844 * So, we will disallow the summary table if the allocation block size is < 4k.
3845 */
3846
3847 if (hfsmp->blockSize < HFS_MIN_SUMMARY_BLOCKSIZE) {
3848 LFHFS_LOG(LEVEL_ERROR, "hfs_init_summary: summary table not allowed on FS with block size of %d\n", hfsmp->blockSize);
3849 return EINVAL;
3850 }
3851
3852 summary_size = hfsmp->hfs_allocation_cp->c_blocks;
3853
3854 if (ALLOC_DEBUG) {
3855 LFHFS_LOG(LEVEL_DEBUG, "HFS Summary Table Initialization: Bitmap %u blocks\n",
3856 hfsmp->hfs_allocation_cp->c_blocks);
3857 }
3858
3859 /*
3860 * If the bitmap IO size is not the same as the allocation block size then
3861 * then re-compute the number of summary bits necessary. Note that above, the
3862 * the default size is the number of allocation blocks in the bitmap *FILE*
3863 * (not the number of bits in the bitmap itself). If the allocation block size
3864 * is large enough though, we may need to increase this.
3865 */
3866 if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
3867 uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
3868 lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
3869
3870 /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
3871 summary_size = (uint32_t) lrg_size;
3872 }
3873
3874 /*
3875 * If the block size is the same as the IO Size, then the total number of blocks
3876 * is already equal to the number of IO units, which is our number of summary bits.
3877 */
3878
3879 summary_size_bytes = summary_size / kBitsPerByte;
3880 /* Always add one byte, just in case we have a dangling number of bits */
3881 summary_size_bytes++;
3882
3883 if (ALLOC_DEBUG) {
3884 LFHFS_LOG(LEVEL_DEBUG, "HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, summary_size);
3885 LFHFS_LOG(LEVEL_DEBUG, "HFS Summary Table Size (in bytes) %d \n", summary_size_bytes);
3886
3887
3888 }
3889
3890 /* Store the field in the mount point */
3891 hfsmp->hfs_summary_size = summary_size;
3892 hfsmp->hfs_summary_bytes = summary_size_bytes;
3893
3894 summary_table = hfs_mallocz(summary_size_bytes);
3895
3896 /* enable the summary table */
3897 hfsmp->hfs_flags |= HFS_SUMMARY_TABLE;
3898 hfsmp->hfs_summary_table = summary_table;
3899
3900 if (ALLOC_DEBUG) {
3901 if (hfsmp->hfs_summary_table == NULL) {
3902 LFHFS_LOG(LEVEL_ERROR, "HFS Summary Init: no table for %p\n", hfsmp);
3903 hfs_assert(0);
3904 }
3905 }
3906 return 0;
3907}
3908
3909#if ALLOC_DEBUG
3910/*
3911 * hfs_validate_summary
3912 *
3913 * Validation routine for the summary table. Debug-only function.
3914 *
3915 * Bitmap lock must be held.
3916 *
3917 */
3918void hfs_validate_summary (struct hfsmount *hfsmp) {
3919 uint32_t i;
3920 int err;
3921
3922 /*
3923 * Iterate over all of the bits in the summary table, and verify if
3924 * there really are free blocks in the pages that we believe may
3925 * may contain free blocks.
3926 */
3927
3928 if (hfsmp->hfs_summary_table == NULL) {
3929 LFHFS_LOG(LEVEL_ERROR, "hfs_validate_summary: No HFS summary table!");
3930 hfs_assert(0);
3931 }
3932
3933 /* 131072 bits == 16384 bytes. This is the theoretical max size of the summary table. we add 1 byte for slop */
3934 if (hfsmp->hfs_summary_size == 0 || hfsmp->hfs_summary_size > 131080) {
3935 LFHFS_LOG(LEVEL_ERROR, "hfs_validate_summary: Size is bad! %d", hfsmp->hfs_summary_size);
3936 hfs_assert(0);
3937 }
3938
3939 if (hfsmp->vcbVBMIOSize == 0) {
3940 LFHFS_LOG(LEVEL_ERROR, "hfs_validate_summary: no VCB VBM IO Size !");
3941 hfs_assert(0);
3942 }
3943
3944 LFHFS_LOG(LEVEL_ERROR, "hfs_validate_summary: summary validation beginning on %s\n", hfsmp->vcbVN);
3945 LFHFS_LOG(LEVEL_ERROR, "hfs_validate_summary: summary validation %d summary bits, %d summary blocks\n", hfsmp->hfs_summary_size, hfsmp->totalBlocks);
3946
3947 /* iterate through all possible summary bits */
3948 for (i = 0; i < hfsmp->hfs_summary_size ; i++) {
3949
3950 uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
3951 uint32_t byte_offset = hfsmp->vcbVBMIOSize * i;
3952
3953 /* Compute the corresponding allocation block for the summary bit. */
3954 uint32_t alloc_block = i * bits_per_iosize;
3955
3956 /*
3957 * We use a uint32_t pointer here because it will speed up
3958 * access to the real bitmap data on disk.
3959 */
3960 uint32_t *block_data;
3961 struct buf *bp;
3962 int counter;
3963 int counter_max;
3964 int saw_free_bits = 0;
3965
3966 /* Get the block */
3967 if ((err = ReadBitmapRange (hfsmp, byte_offset, hfsmp->vcbVBMIOSize, &block_data, &bp))) {
3968 LFHFS_LOG(LEVEL_ERROR, "hfs_validate_summary: error (%d) in ReadBitmapRange!", err);
3969 hfs_assert(0);
3970 }
3971
3972 /* Query the status of the bit and then make sure we match */
3973 uint32_t maybe_has_free_blocks;
3974 err = hfs_check_summary (hfsmp, alloc_block, &maybe_has_free_blocks);
3975 if (err) {
3976 LFHFS_LOG(LEVEL_ERROR, "hfs_validate_summary: hfs_check_summary returned error (%d) ", err);
3977 hfs_assert(0);
3978 }
3979 counter_max = hfsmp->vcbVBMIOSize / kBytesPerWord;
3980
3981 for (counter = 0; counter < counter_max; counter++) {
3982 uint32_t word = block_data[counter];
3983
3984 /* We assume that we'll not find any free bits here. */
3985 if (word != kAllBitsSetInWord) {
3986 if (maybe_has_free_blocks) {
3987 /* All done */
3988 saw_free_bits = 1;
3989 break;
3990 } else {
3991 LFHFS_LOG(LEVEL_ERROR, "hfs_validate_summary: hfs_check_summary saw free bits!");
3992 hfs_assert(0);
3993 }
3994 }
3995 }
3996
3997 if (maybe_has_free_blocks && (saw_free_bits == 0)) {
3998 LFHFS_LOG(LEVEL_ERROR, "hfs_validate_summary: did not see free bits !");
3999 hfs_assert(0);
4000 }
4001
4002 /* Release the block. */
4003 if ((err = ReleaseScanBitmapRange (bp))) {
4004 LFHFS_LOG(LEVEL_ERROR, "hfs_validate_summary: Error (%d) in ReleaseScanBitmapRange", err);
4005 hfs_assert(0);
4006 }
4007 }
4008
4009 LFHFS_LOG(LEVEL_ERROR, "hfs_validate_summary: summary validation completed successfully on %s\n", hfsmp->vcbVN);
4010 return;
4011}
4012#endif
4013
4014/*
4015 * hfs_alloc_scan_range:
4016 *
4017 * This function should be used to scan large ranges of the allocation bitmap
4018 * at one time. It makes two key assumptions:
4019 *
4020 * 1) Bitmap lock is held during the duration of the call (exclusive)
4021 * 2) There are no pages in the buffer cache for any of the bitmap
4022 * blocks that we may encounter. It *MUST* be completely empty.
4023 *
4024 * The expected use case is when we are scanning the bitmap in full while we are
4025 * still mounting the filesystem in order to issue TRIMs or build up the summary
4026 * table for the mount point. It should be done after any potential journal replays
4027 * are completed and their I/Os fully issued.
4028 *
4029 * The key reason for assumption (2) above is that this function will try to issue
4030 * I/O against the bitmap file in chunks as large a possible -- essentially as
4031 * much as the buffer layer will handle (1MB). Because the size of these I/Os
4032 * is larger than what would be expected during normal runtime we must invalidate
4033 * the buffers as soon as we are done with them so that they do not persist in
4034 * the buffer cache for other threads to find, as they'll typically be doing
4035 * allocation-block size I/Os instead.
4036 *
4037 * Input Args:
4038 * hfsmp - hfs mount data structure
4039 * startbit - allocation block # to start our scan. It must be aligned
4040 * on a vcbVBMIOsize boundary.
4041 * list - journal trim list data structure for issuing TRIMs
4042 *
4043 * Output Args:
4044 * bitToScan - Return the next bit to scan if this function is called again.
4045 * Caller will supply this into the next invocation
4046 * of this call as 'startbit'.
4047 */
4048
4049static int hfs_alloc_scan_range(struct hfsmount *hfsmp, u_int32_t startbit,
4050 u_int32_t *bitToScan, struct jnl_trim_list *list) {
4051
4052 int error;
4053 int readwrite = 1;
4054 u_int32_t curAllocBlock;
4055 GenericLFBufPtr blockRef = NULL;
4056 u_int32_t *buffer = NULL;
4057 u_int32_t free_offset = 0; //tracks the start of the current free range
4058 u_int32_t size = 0; // tracks the length of the current free range.
4059 u_int32_t iosize = 0; //how much io we should generate against the bitmap
4060 u_int32_t byte_off; // byte offset into the bitmap file.
4061 u_int32_t completed_size; // how much io was actually completed
4062 u_int32_t last_bitmap_block;
4063 u_int32_t current_word;
4064 u_int32_t word_index = 0;
4065
4066 /* summary table building */
4067 uint32_t summary_bit = 0;
4068 uint32_t saw_free_blocks = 0;
4069 uint32_t last_marked = 0;
4070
4071 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
4072 readwrite = 0;
4073 }
4074
4075 /*
4076 * Compute how much I/O we should generate here.
4077 * hfs_scan_range_size will validate that the start bit
4078 * converted into a byte offset into the bitmap file,
4079 * is aligned on a VBMIOSize boundary.
4080 */
4081 error = hfs_scan_range_size (hfsmp, startbit, &iosize);
4082 if (error) {
4083 if (ALLOC_DEBUG) {
4084 LFHFS_LOG(LEVEL_ERROR, "hfs_alloc_scan_range: hfs_scan_range_size error %d\n", error);
4085 hfs_assert(0);
4086 }
4087 return error;
4088 }
4089
4090 if (iosize < hfsmp->vcbVBMIOSize) {
4091 if (ALLOC_DEBUG) {
4092 LFHFS_LOG(LEVEL_ERROR, "hfs_alloc_scan_range: iosize too small! (iosize %d)\n", iosize);
4093 hfs_assert(0);
4094 }
4095 return EINVAL;
4096 }
4097
4098 /* hfs_scan_range_size should have verified startbit. Convert it to bytes */
4099 byte_off = startbit / kBitsPerByte;
4100
4101 /*
4102 * When the journal replays blocks, it does so by writing directly to the disk
4103 * device (bypassing any filesystem vnodes and such). When it finishes its I/Os
4104 * it also immediately re-reads and invalidates the range covered by the bp so
4105 * it does not leave anything lingering in the cache (for iosize reasons).
4106 *
4107 * As such, it is safe to do large I/Os here with ReadBitmapRange.
4108 *
4109 * NOTE: It is not recommended, but it is possible to call the function below
4110 * on sections of the bitmap that may be in core already as long as the pages are not
4111 * dirty. In that case, we'd notice that something starting at that
4112 * logical block of the bitmap exists in the metadata cache, and we'd check
4113 * if the iosize requested is the same as what was already allocated for it.
4114 * Odds are pretty good we're going to request something larger. In that case,
4115 * we just free the existing memory associated with the buf and reallocate a
4116 * larger range. This function should immediately invalidate it as soon as we're
4117 * done scanning, so this shouldn't cause any coherency issues.
4118 */
4119
4120 error = ReadBitmapRange(hfsmp, byte_off, iosize, &buffer, &blockRef);
4121 if (error) {
4122 if (ALLOC_DEBUG) {
4123 LFHFS_LOG(LEVEL_ERROR, "hfs_alloc_scan_range: start %d iosize %d ReadBitmapRange error %d\n", startbit, iosize, error);
4124 hfs_assert(0);
4125 }
4126 return error;
4127 }
4128
4129 /*
4130 * At this point, we have a giant wired buffer that represents some portion of
4131 * the bitmap file that we want to analyze. We may not have gotten all 'iosize'
4132 * bytes though, so clip our ending bit to what we actually read in.
4133 */
4134 completed_size = blockRef->uValidBytes;
4135 last_bitmap_block = completed_size * kBitsPerByte;
4136 last_bitmap_block = last_bitmap_block + startbit;
4137
4138 /* Cap the last block to the total number of blocks if required */
4139 if (last_bitmap_block > hfsmp->totalBlocks) {
4140 last_bitmap_block = hfsmp->totalBlocks;
4141 }
4142
4143 /* curAllocBlock represents the logical block we're analyzing. */
4144 curAllocBlock = startbit;
4145 word_index = 0;
4146 size = 0;
4147
4148 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4149 if (hfs_get_summary_index (hfsmp, startbit, &summary_bit)) {
4150 error = EINVAL;
4151 if (ALLOC_DEBUG) {
4152 LFHFS_LOG(LEVEL_ERROR, "hfs_alloc_scan_range: Could not acquire summary index for %u", startbit);
4153 hfs_assert(0);
4154 }
4155 return error;
4156 }
4157 /*
4158 * summary_bit should now be set to the summary bit corresponding to
4159 * the allocation block of the first bit that we're supposed to scan
4160 */
4161 }
4162 saw_free_blocks = 0;
4163
4164 while (curAllocBlock < last_bitmap_block) {
4165 u_int32_t bit;
4166
4167 /* Update the summary table as needed */
4168 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4169 if (ALLOC_DEBUG) {
4170 if (hfsmp->hfs_summary_table == NULL) {
4171 LFHFS_LOG(LEVEL_ERROR, "hfs_alloc_scan_range: no summary table!");
4172 hfs_assert(0);
4173 }
4174 }
4175
4176 uint32_t temp_summary;
4177 error = hfs_get_summary_index (hfsmp, curAllocBlock, &temp_summary);
4178 if (error) {
4179 if (ALLOC_DEBUG) {
4180 LFHFS_LOG(LEVEL_ERROR, "hfs_alloc_scan_range: could not get summary index for %u", curAllocBlock);
4181 hfs_assert(0);
4182 }
4183 return EINVAL;
4184 }
4185
4186 if (ALLOC_DEBUG) {
4187 if (temp_summary < summary_bit) {
4188 LFHFS_LOG(LEVEL_ERROR, "hfs_alloc_scan_range: backwards summary bit?\n");
4189 hfs_assert(0);
4190 }
4191 }
4192
4193 /*
4194 * If temp_summary is greater than summary_bit, then this
4195 * means that the next allocation block crosses a vcbVBMIOSize boundary
4196 * and we should treat this range of on-disk data as part of a new summary
4197 * bit.
4198 */
4199 if (temp_summary > summary_bit) {
4200 if (saw_free_blocks == 0) {
4201 /* Mark the bit as totally consumed in the summary table */
4202 hfs_set_summary (hfsmp, summary_bit, 1);
4203 }
4204 else {
4205 /* Mark the bit as potentially free in summary table */
4206 hfs_set_summary (hfsmp, summary_bit, 0);
4207 }
4208 last_marked = summary_bit;
4209 /*
4210 * Any time we set the summary table, update our counter which tracks
4211 * what the last bit that was fully marked in the summary table.
4212 *
4213 * Then reset our marker which says we haven't seen a free bit yet.
4214 */
4215 saw_free_blocks = 0;
4216 summary_bit = temp_summary;
4217 }
4218 } /* End summary table conditions */
4219
4220 current_word = SWAP_BE32(buffer[word_index]);
4221 /* Iterate through the word 1 bit at a time... */
4222 for (bit = 0 ; bit < kBitsPerWord ; bit++, curAllocBlock++) {
4223 if (curAllocBlock >= last_bitmap_block) {
4224 break;
4225 }
4226 u_int32_t allocated = (current_word & (kHighBitInWordMask >> bit));
4227
4228 if (allocated) {
4229 if (size != 0) {
4230 if (readwrite) {
4231 /* Insert the previously tracked range of free blocks to the trim list */
4232 hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
4233 }
4234 add_free_extent_cache (hfsmp, free_offset, size);
4235 size = 0;
4236 free_offset = 0;
4237 }
4238 }
4239 else {
4240 /* Not allocated */
4241 size++;
4242 if (free_offset == 0) {
4243 /* Start a new run of free spcae at curAllocBlock */
4244 free_offset = curAllocBlock;
4245 }
4246 if (saw_free_blocks == 0) {
4247 saw_free_blocks = 1;
4248 }
4249 }
4250 } /* end for loop iterating through the word */
4251
4252 if (curAllocBlock < last_bitmap_block) {
4253 word_index++;
4254 }
4255
4256 } /* End while loop (iterates through last_bitmap_block) */
4257
4258
4259 /*
4260 * We've (potentially) completed our pass through this region of bitmap,
4261 * but one thing we may not have done is updated that last summary bit for
4262 * the last page we scanned, because we would have never transitioned across
4263 * a vcbVBMIOSize boundary again. Check for that and update the last bit
4264 * as needed.
4265 *
4266 * Note that 'last_bitmap_block' is *not* inclusive WRT the very last bit in the bitmap
4267 * for the region of bitmap on-disk that we were scanning. (it is one greater).
4268 */
4269 if ((curAllocBlock >= last_bitmap_block) &&
4270 (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
4271 uint32_t temp_summary;
4272 /* temp_block should be INSIDE the region we just scanned, so subtract 1 */
4273 uint32_t temp_block = last_bitmap_block - 1;
4274 error = hfs_get_summary_index (hfsmp, temp_block, &temp_summary);
4275 if (error) {
4276 if (ALLOC_DEBUG) {
4277 LFHFS_LOG(LEVEL_ERROR, "hfs_alloc_scan_range: end bit curAllocBlock %u, last_bitmap_block %u", curAllocBlock, last_bitmap_block);
4278 hfs_assert(0);
4279 }
4280 return EINVAL;
4281 }
4282
4283 /* Did we already update this in the table? */
4284 if (temp_summary > last_marked) {
4285 if (saw_free_blocks == 0) {
4286 hfs_set_summary (hfsmp, temp_summary, 1);
4287 }
4288 else {
4289 hfs_set_summary (hfsmp, temp_summary, 0);
4290 }
4291 }
4292 }
4293
4294 /*
4295 * We may have been tracking a range of free blocks that hasn't been inserted yet.
4296 * Keep the logic for the TRIM and free extent separate from that of the summary
4297 * table management even though they are closely linked.
4298 */
4299 if (size != 0) {
4300 if (readwrite) {
4301 hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
4302 }
4303 add_free_extent_cache (hfsmp, free_offset, size);
4304 }
4305
4306 /*
4307 * curAllocBlock represents the next block we need to scan when we return
4308 * to this function.
4309 */
4310 *bitToScan = curAllocBlock;
4311 ReleaseScanBitmapRange(blockRef);
4312
4313 return 0;
4314
4315}
4316
4317
4318
4319/*
4320 * Compute the maximum I/O size to generate against the bitmap file
4321 * Will attempt to generate at LEAST VBMIOsize I/Os for interior ranges of the bitmap.
4322 *
4323 * Inputs:
4324 * hfsmp -- hfsmount to look at
4325 * bitmap_off -- bit offset into the bitmap file
4326 *
4327 * Outputs:
4328 * iosize -- iosize to generate.
4329 *
4330 * Returns:
4331 * 0 on success; EINVAL otherwise
4332 */
4333static int hfs_scan_range_size (struct hfsmount *hfsmp, uint32_t bitmap_st, uint32_t *iosize) {
4334
4335 /*
4336 * The maximum bitmap size is 512MB regardless of ABN size, so we can get away
4337 * with 32 bit math in this function.
4338 */
4339
4340 uint32_t bitmap_len;
4341 uint32_t remaining_bitmap;
4342 uint32_t target_iosize;
4343 uint32_t bitmap_off;
4344
4345 /* Is this bit index not word aligned? If so, immediately fail. */
4346 if (bitmap_st % kBitsPerWord) {
4347 if (ALLOC_DEBUG) {
4348 LFHFS_LOG(LEVEL_ERROR, "hfs_scan_range_size: unaligned start bit! bitmap_st %d \n", bitmap_st);
4349 hfs_assert(0);
4350 }
4351 return EINVAL;
4352 }
4353
4354 /* bitmap_off is in bytes, not allocation blocks/bits */
4355 bitmap_off = bitmap_st / kBitsPerByte;
4356
4357 if ((hfsmp->totalBlocks <= bitmap_st) || (bitmap_off > (512 * 1024 * 1024))) {
4358 if (ALLOC_DEBUG) {
4359 LFHFS_LOG(LEVEL_ERROR, "hfs_scan_range_size: invalid start! bitmap_st %d, bitmap_off %d\n", bitmap_st, bitmap_off);
4360 hfs_assert(0);
4361 }
4362 return EINVAL;
4363 }
4364
4365 /*
4366 * Also invalid if it's not at least aligned to HFS bitmap logical
4367 * block boundaries. We don't have to emit an iosize that's an
4368 * exact multiple of the VBMIOSize, but it must start on such
4369 * a boundary.
4370 *
4371 * The vcbVBMIOSize may be SMALLER than the allocation block size
4372 * on a FS with giant allocation blocks, but it will never be
4373 * greater than it, so it should be safe to start I/O
4374 * aligned on a VBMIOsize boundary.
4375 */
4376 if (bitmap_off & (hfsmp->vcbVBMIOSize - 1)) {
4377 if (ALLOC_DEBUG) {
4378 LFHFS_LOG(LEVEL_ERROR, "hfs_scan_range_size: unaligned start! bitmap_off %d\n", bitmap_off);
4379 hfs_assert(0);
4380 }
4381 return EINVAL;
4382 }
4383
4384 /*
4385 * Generate the total bitmap file length in bytes, then round up
4386 * that value to the end of the last allocation block, if needed (It
4387 * will probably be needed). We won't scan past the last actual
4388 * allocation block.
4389 *
4390 * Unless we're completing the bitmap scan (or bitmap < 1MB), we
4391 * have to complete the I/O on VBMIOSize boundaries, but we can only read
4392 * up until the end of the bitmap file.
4393 */
4394 bitmap_len = roundup(hfsmp->totalBlocks, hfsmp->blockSize * 8) / 8;
4395
4396 remaining_bitmap = bitmap_len - bitmap_off;
4397
4398 /*
4399 * io size is the MIN of the maximum I/O we can generate or the
4400 * remaining amount of bitmap.
4401 */
4402 target_iosize = MIN((MAXBSIZE), remaining_bitmap);
4403 *iosize = target_iosize;
4404
4405 return 0;
4406}
4407
4408/*
4409 * Remove an extent from the list of free extents.
4410 *
4411 * This is a low-level routine. It does not handle overlaps or splitting;
4412 * that is the responsibility of the caller. The input extent must exactly
4413 * match an extent already in the list; it will be removed, and any following
4414 * extents in the list will be shifted up.
4415 *
4416 * Inputs:
4417 * startBlock - Start of extent to remove
4418 * blockCount - Number of blocks in extent to remove
4419 *
4420 * Result:
4421 * The index of the extent that was removed.
4422 */
4423static void remove_free_extent_list(struct hfsmount *hfsmp, int index)
4424{
4425 if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) {
4426 if (ALLOC_DEBUG)
4427 {
4428 LFHFS_LOG(LEVEL_ERROR, "remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
4429 hfs_assert(0);
4430 }
4431 else
4432 LFHFS_LOG(LEVEL_ERROR, "remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
4433 return;
4434 }
4435 int shift_count = hfsmp->vcbFreeExtCnt - index - 1;
4436 if (shift_count > 0) {
4437 memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
4438 }
4439 hfsmp->vcbFreeExtCnt--;
4440}
4441
4442
4443/*
4444 * Add an extent to the list of free extents.
4445 *
4446 * This is a low-level routine. It does not handle overlaps or coalescing;
4447 * that is the responsibility of the caller. This routine *does* make
4448 * sure that the extent it is adding is inserted in the correct location.
4449 * If the list is full, this routine will handle either removing the last
4450 * extent in the list to make room for the new extent, or ignoring the
4451 * new extent if it is "worse" than the last extent in the list.
4452 *
4453 * Inputs:
4454 * startBlock - Start of extent to add
4455 * blockCount - Number of blocks in extent to add
4456 *
4457 * Result:
4458 * The index where the extent that was inserted, or kMaxFreeExtents
4459 * if the extent was not inserted (the list was full, and the extent
4460 * being added was "worse" than everything in the list).
4461 */
4462static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4463{
4464 uint32_t i;
4465
4466 /* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */
4467 if (ALLOC_DEBUG) {
4468 uint32_t endBlock = startBlock + blockCount;
4469 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4470 if (endBlock < hfsmp->vcbFreeExt[i].startBlock ||
4471 startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) {
4472 continue;
4473 }
4474 LFHFS_LOG(LEVEL_ERROR, "add_free_extent_list: extent(%u %u) overlaps existing extent (%u %u) at index %d",
4475 startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i);
4476 hfs_assert(0);
4477 }
4478 }
4479
4480 /* Figure out what index the new extent should be inserted at. */
4481 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4482 /* The list is sorted by decreasing size. */
4483 if (blockCount > hfsmp->vcbFreeExt[i].blockCount) {
4484 break;
4485 }
4486 }
4487
4488 /* When we get here, i is the index where the extent should be inserted. */
4489 if (i == kMaxFreeExtents) {
4490 /*
4491 * The new extent is worse than anything already in the list,
4492 * and the list is full, so just ignore the extent to be added.
4493 */
4494 return i;
4495 }
4496
4497 /*
4498 * Grow the list (if possible) to make room for an insert.
4499 */
4500 if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents)
4501 hfsmp->vcbFreeExtCnt++;
4502
4503 /*
4504 * If we'll be keeping any extents after the insert position, then shift them.
4505 */
4506 int shift_count = hfsmp->vcbFreeExtCnt - i - 1;
4507 if (shift_count > 0) {
4508 memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
4509 }
4510
4511 /* Finally, store the new extent at its correct position. */
4512 hfsmp->vcbFreeExt[i].startBlock = startBlock;
4513 hfsmp->vcbFreeExt[i].blockCount = blockCount;
4514 return i;
4515}
4516
4517
4518/*
4519 * Remove an entry from free extent cache after it has been allocated.
4520 *
4521 * This is a high-level routine. It handles removing a portion of a
4522 * cached extent, potentially splitting it into two (if the cache was
4523 * already full, throwing away the extent that would sort last). It
4524 * also handles removing an extent that overlaps multiple extents in
4525 * the cache.
4526 *
4527 * Inputs:
4528 * hfsmp - mount point structure
4529 * startBlock - starting block of the extent to be removed.
4530 * blockCount - number of blocks of the extent to be removed.
4531 */
4532static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4533{
4534 u_int32_t i, insertedIndex;
4535 u_int32_t currentStart, currentEnd, endBlock;
4536 int extentsRemoved = 0;
4537
4538 endBlock = startBlock + blockCount;
4539
4540 lf_lck_spin_lock(&hfsmp->vcbFreeExtLock);
4541
4542 /*
4543 * Iterate over all of the extents in the free extent cache, removing or
4544 * updating any entries that overlap with the input extent.
4545 */
4546 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4547 currentStart = hfsmp->vcbFreeExt[i].startBlock;
4548 currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount;
4549
4550 /*
4551 * If the current extent is entirely before or entirely after the
4552 * the extent to be removed, then we keep it as-is.
4553 */
4554 if (currentEnd <= startBlock || currentStart >= endBlock) {
4555 continue;
4556 }
4557
4558 /*
4559 * If the extent being removed entirely contains the current extent,
4560 * then remove the current extent.
4561 */
4562 if (startBlock <= currentStart && endBlock >= currentEnd) {
4563 remove_free_extent_list(hfsmp, i);
4564
4565 /*
4566 * We just removed the extent at index i. The extent at
4567 * index i+1 just got shifted to index i. So decrement i
4568 * to undo the loop's "++i", and the next iteration will
4569 * examine index i again, which contains the next extent
4570 * in the list.
4571 */
4572 --i;
4573 ++extentsRemoved;
4574 continue;
4575 }
4576
4577 /*
4578 * If the extent being removed is strictly "in the middle" of the
4579 * current extent, then we need to split the current extent into
4580 * two discontiguous extents (the "head" and "tail"). The good
4581 * news is that we don't need to examine any other extents in
4582 * the list.
4583 */
4584 if (startBlock > currentStart && endBlock < currentEnd) {
4585 remove_free_extent_list(hfsmp, i);
4586 add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
4587 add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
4588 break;
4589 }
4590
4591 /*
4592 * The only remaining possibility is that the extent to be removed
4593 * overlaps the start or end (but not both!) of the current extent.
4594 * So we need to replace the current extent with a shorter one.
4595 *
4596 * The only tricky part is that the updated extent might be at a
4597 * different index than the original extent. If the updated extent
4598 * was inserted after the current extent, then we need to re-examine
4599 * the entry at index i, since it now contains the extent that was
4600 * previously at index i+1. If the updated extent was inserted
4601 * before or at the same index as the removed extent, then the
4602 * following extents haven't changed position.
4603 */
4604 remove_free_extent_list(hfsmp, i);
4605 if (startBlock > currentStart) {
4606 /* Remove the tail of the current extent. */
4607 insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
4608 } else {
4609 /* Remove the head of the current extent. */
4610 insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
4611 }
4612 if (insertedIndex > i) {
4613 --i; /* Undo the "++i" in the loop, so we examine the entry at index i again. */
4614 }
4615 }
4616
4617 lf_lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4618 sanity_check_free_ext(hfsmp, 0);
4619
4620 return;
4621}
4622
4623
4624/*
4625 * Add an entry to free extent cache after it has been deallocated.
4626 *
4627 * This is a high-level routine. It will merge overlapping or contiguous
4628 * extents into a single, larger extent.
4629 *
4630 * If the extent provided has blocks beyond current allocLimit, it is
4631 * clipped to allocLimit (so that we won't accidentally find and allocate
4632 * space beyond allocLimit).
4633 *
4634 * Inputs:
4635 * hfsmp - mount point structure
4636 * startBlock - starting block of the extent to be removed.
4637 * blockCount - number of blocks of the extent to be removed.
4638 *
4639 * Returns:
4640 * true - if the extent was added successfully to the list
4641 * false - if the extent was not added to the list, maybe because
4642 * the extent was beyond allocLimit, or is not best
4643 * candidate to be put in the cache.
4644 */
4645static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4646{
4647 Boolean retval = false;
4648 uint32_t endBlock;
4649 uint32_t currentEnd;
4650 uint32_t i;
4651
4652#if DEBUG
4653 for (i = 0; i < 2; ++i) {
4654 struct rl_entry *range;
4655 TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
4656 hfs_assert(rl_overlap(range, startBlock,
4657 startBlock + blockCount - 1) == RL_NOOVERLAP);
4658 }
4659 }
4660#endif
4661
4662 /* No need to add extent that is beyond current allocLimit */
4663 if (startBlock >= hfsmp->allocLimit) {
4664 goto out_not_locked;
4665 }
4666
4667 /* If end of the free extent is beyond current allocLimit, clip the extent */
4668 if ((startBlock + blockCount) > hfsmp->allocLimit) {
4669 blockCount = hfsmp->allocLimit - startBlock;
4670 }
4671
4672 lf_lck_spin_lock(&hfsmp->vcbFreeExtLock);
4673
4674 /*
4675 * Make a pass through the free extent cache, looking for known extents that
4676 * overlap or are contiguous with the extent to be added. We'll remove those
4677 * extents from the cache, and incorporate them into the new extent to be added.
4678 */
4679 endBlock = startBlock + blockCount;
4680 for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) {
4681 currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount;
4682 if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) {
4683 /* Extent i does not overlap and is not contiguous, so keep it. */
4684 continue;
4685 } else {
4686 /* We need to remove extent i and combine it with the input extent. */
4687 if (hfsmp->vcbFreeExt[i].startBlock < startBlock)
4688 startBlock = hfsmp->vcbFreeExt[i].startBlock;
4689 if (currentEnd > endBlock)
4690 endBlock = currentEnd;
4691
4692 remove_free_extent_list(hfsmp, i);
4693 /*
4694 * We just removed the extent at index i. The extent at
4695 * index i+1 just got shifted to index i. So decrement i
4696 * to undo the loop's "++i", and the next iteration will
4697 * examine index i again, which contains the next extent
4698 * in the list.
4699 */
4700 --i;
4701 }
4702 }
4703 add_free_extent_list(hfsmp, startBlock, endBlock - startBlock);
4704
4705 lf_lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4706
4707out_not_locked:
4708 sanity_check_free_ext(hfsmp, 0);
4709
4710 return retval;
4711}
4712
4713/* Debug function to check if the free extent cache is good or not */
4714static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
4715{
4716 u_int32_t i, j;
4717
4718 /* Do not do anything if debug is not on */
4719 if (ALLOC_DEBUG == 0) {
4720 return;
4721 }
4722
4723 lf_lck_spin_lock(&hfsmp->vcbFreeExtLock);
4724
4725 if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents)
4726 {
4727 LFHFS_LOG(LEVEL_ERROR, "sanity_check_free_ext: free extent count (%u) is too large", hfsmp->vcbFreeExtCnt);
4728 }
4729
4730 /*
4731 * Iterate the Free extent cache and ensure no entries are bogus or refer to
4732 * allocated blocks.
4733 */
4734 for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
4735 u_int32_t start, nblocks;
4736
4737 start = hfsmp->vcbFreeExt[i].startBlock;
4738 nblocks = hfsmp->vcbFreeExt[i].blockCount;
4739
4740 /* Check if any of the blocks in free extent cache are allocated.
4741 * This should not be enabled always because it might take
4742 * very long for large extents that get added to the list.
4743 *
4744 * We have to drop vcbFreeExtLock while we call hfs_isallocated
4745 * because it is going to do I/O. Note that the free extent
4746 * cache could change. That's a risk we take when using this
4747 * debugging code. (Another alternative would be to try to
4748 * detect when the free extent cache changed, and perhaps
4749 * restart if the list changed while we dropped the lock.)
4750 */
4751 if (check_allocated) {
4752 lf_lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4753 if (hfs_isallocated(hfsmp, start, nblocks)) {
4754 LFHFS_LOG(LEVEL_ERROR, "sanity_check_free_ext: slot %d:(%u,%u) in the free extent array is allocated\n",
4755 i, start, nblocks);
4756 hfs_assert(0);
4757 }
4758 lf_lck_spin_lock(&hfsmp->vcbFreeExtLock);
4759 }
4760
4761 /* Check if any part of the extent is beyond allocLimit */
4762 if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
4763 LFHFS_LOG(LEVEL_ERROR, "sanity_check_free_ext: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
4764 i, start, nblocks, hfsmp->allocLimit);
4765 hfs_assert(0);
4766 }
4767
4768 /* Check if there are any duplicate start blocks */
4769 for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
4770 if (start == hfsmp->vcbFreeExt[j].startBlock) {
4771 LFHFS_LOG(LEVEL_ERROR, "sanity_check_free_ext: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n",
4772 i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock,
4773 hfsmp->vcbFreeExt[j].blockCount);
4774 hfs_assert(0);
4775 }
4776 }
4777
4778 /* Check if the entries are out of order */
4779 if ((i+1) != hfsmp->vcbFreeExtCnt) {
4780 /* normally sorted by block count (descending) */
4781 if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
4782 LFHFS_LOG(LEVEL_ERROR, "sanity_check_free_ext: %d:(%u,%u) and %d:(%u,%u) are out of order\n",
4783 i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
4784 hfsmp->vcbFreeExt[i+1].blockCount);
4785 hfs_assert(0);
4786 }
4787 }
4788 }
4789 lf_lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4790}