]> git.saurik.com Git - apple/xnu.git/blame - bsd/hfs/hfscommon/Misc/VolumeAllocation.c
xnu-2782.20.48.tar.gz
[apple/xnu.git] / bsd / hfs / hfscommon / Misc / VolumeAllocation.c
CommitLineData
1c79356b 1/*
04b8595b 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 File: VolumeAllocation.c
30
31 Contains: Routines for accessing and modifying the volume bitmap.
32
33 Version: HFS Plus 1.0
34
316670eb 35 Copyright: � 1996-2009 by Apple Computer, Inc., all rights reserved.
0b4e3aa0 36
1c79356b
A
37*/
38
39/*
40Public routines:
41 BlockAllocate
42 Allocate space on a volume. Can allocate space contiguously.
43 If not contiguous, then allocation may be less than what was
44 asked for. Returns the starting block number, and number of
45 blocks. (Will only do a single extent???)
46 BlockDeallocate
47 Deallocate a contiguous run of allocation blocks.
6d2010ae
A
48
49 BlockMarkAllocated
50 Exported wrapper to mark blocks as in-use. This will correctly determine
51 whether or not the red-black tree is enabled and call the appropriate function
52 if applicable.
1c79356b 53 BlockMarkFree
6d2010ae
A
54 Exported wrapper to mark blocks as freed. This will correctly determine whether or
55 not the red-black tree is enabled and call the appropriate function if applicable.
56
57
58 ResetVCBFreeExtCache
59 Since the red-black tree obviates the need to maintain the free extent cache, we do
60 not update it if the tree is also live. As a result, if we ever need to destroy the trees
61 we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
62 bitmap scanning allocator.
63 We also reset and disable the free extent cache when volume resizing is
64 in flight.
65
66 UpdateAllocLimit
67 Adjusts the AllocLimit field in the hfs mount point. This is used when we need to prevent
68 allocations from occupying space in the region we are modifying during a filesystem resize.
69 At other times, it should be consistent with the total number of allocation blocks in the
70 filesystem. It is also used to shrink or grow the number of blocks that the red-black tree should
71 know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
72 number of items in the tree that we can allocate from.
316670eb 73
39236c6e
A
74 ScanUnmapBlocks
75 Traverse the entire allocation bitmap. Potentially issue DKIOCUNMAPs to the device as it
76 tracks unallocated ranges when iterating the volume bitmap. Additionally, build up the in-core
77 summary table of the allocation bitmap.
6d2010ae
A
78
79Internal routines:
6d2010ae 80 BlockMarkFreeInternal
1c79356b 81 Mark a contiguous range of blocks as free. The corresponding
6d2010ae
A
82 bits in the volume bitmap will be cleared. This will actually do the work
83 of modifying the bitmap for us.
84
6d2010ae 85 BlockMarkAllocatedInternal
1c79356b
A
86 Mark a contiguous range of blocks as allocated. The cor-
87 responding bits in the volume bitmap are set. Also tests to see
6d2010ae
A
88 if any of the blocks were previously unallocated.
89 BlockFindContiguous
1c79356b
A
90 Find a contiguous range of blocks of a given size. The caller
91 specifies where to begin the search (by block number). The
6d2010ae
A
92 block number of the first block in the range is returned. This is only
93 called by the bitmap scanning logic as the red-black tree should be able
94 to do this internally by searching its tree.
0b4e3aa0
A
95 BlockAllocateAny
96 Find and allocate a contiguous range of blocks up to a given size. The
97 first range of contiguous free blocks found are allocated, even if there
98 are fewer blocks than requested (and even if a contiguous range of blocks
99 of the given size exists elsewhere).
6d2010ae
A
100 BlockAllocateAnyBitmap
101 Finds a range of blocks per the above requirements without using the
102 Allocation RB Tree. This relies on the bitmap-scanning logic in order to find
103 any valid range of free space needed.
1c79356b
A
104 BlockAllocateContig
105 Find and allocate a contiguous range of blocks of a given size. If
106 a contiguous range of free blocks of the given size isn't found, then
39236c6e 107 the allocation fails (i.e. it is "all or nothing").
0b4e3aa0
A
108 BlockAllocateKnown
109 Try to allocate space from known free space in the volume's
110 free extent cache.
1c79356b
A
111 ReadBitmapBlock
112 Given an allocation block number, read the bitmap block that
113 contains that allocation block into a caller-supplied buffer.
0b4e3aa0
A
114
115 ReleaseBitmapBlock
116 Release a bitmap block back into the buffer cache.
316670eb 117
39236c6e
A
118 ReadBitmapRange
119 Given an allocation block number, read a range of bitmap that
120 must begin at that allocation block into a caller supplied buffer.
121
122 ReleaseBitmapRange
123 Release and invalidate a buf_t corresponding to the bitmap
124 back into the UBC in order to prevent coherency issues.
125
316670eb
A
126 remove_free_extent_cache
127 Remove an extent from the free extent cache. Handles overlaps
128 with multiple extents in the cache, and handles splitting an
129 extent in the cache if the extent to be removed is in the middle
130 of a cached extent.
131
132 add_free_extent_cache
133 Add an extent to the free extent cache. It will merge the
134 input extent with extents already in the cache.
39236c6e
A
135 CheckUnmappedBytes
136 Check whether or not the current transaction
137 has allocated blocks that were recently freed. This may have data safety implications.
138
139
6d2010ae
A
140
141Debug/Test Routines
142 hfs_isallocated
143 Test to see if any blocks in a range are allocated. Journal or
144 allocation file lock must be held.
145
146 hfs_isallocated_scan
147 Test to see if any blocks in a range are allocated. Releases and
148 invalidates the block used when finished.
39236c6e
A
149
150Optimization Routines
6d2010ae
A
151 hfs_alloc_scan_block
152 Given a starting allocation block number, figures out which physical block contains that
153 allocation block's bit, and scans it from the starting bit until either the ending bit or
154 the end of the block. Free space extents are inserted into the appropriate red-black tree.
155
1c79356b
A
156*/
157
1c79356b
A
158#include <sys/types.h>
159#include <sys/buf.h>
04b8595b
A
160
161
162#if !HFS_ALLOC_TEST
163
164#include "../../hfs_macos_defs.h"
1c79356b 165#include <sys/systm.h>
6d2010ae 166#include <sys/ubc.h>
060df5ea 167#include <kern/kalloc.h>
39236c6e 168
316670eb
A
169/* For VM Page size */
170#include <libkern/libkern.h>
1c79356b 171#include "../../hfs.h"
1c79356b 172#include "../../hfs_endian.h"
1c79356b 173#include "../headers/FileMgrInternal.h"
04b8595b
A
174#include <vfs/vfs_journal.h>
175
176#endif // !HFS_ALLOC_TEST
177
178#include <sys/sysctl.h>
179#include <sys/disk.h>
180#include <sys/uio.h>
181#include <sys/malloc.h>
182
183#include "../../hfs_dbg.h"
184#include "../../hfs_format.h"
6d2010ae 185#include "../../hfs_kdebug.h"
1c79356b 186
316670eb 187/* Headers for unmap-on-mount support */
316670eb
A
188#include <sys/disk.h>
189
060df5ea
A
190#ifndef CONFIG_HFS_TRIM
191#define CONFIG_HFS_TRIM 0
192#endif
193
6d2010ae
A
194/*
195 * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
196 * KERNEL_DEBUG_CONSTANT events are enabled at runtime. (They're
197 * disabled by default because there can be a lot of these events,
198 * and we don't want to overwhelm the kernel debug buffer. If you
199 * want to watch these events in particular, just set the sysctl.)
200 */
201static int hfs_kdebug_allocation = 0;
202SYSCTL_DECL(_vfs_generic);
203SYSCTL_NODE(_vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system");
204SYSCTL_NODE(_vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug");
205SYSCTL_INT(_vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations");
206enum {
207 /*
208 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
209 * BlockDeallocate, including the internal BlockAllocateXxx
210 * routines so we can see how an allocation was satisfied.
211 *
212 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
213 * free extent cache.
214 *
215 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
216 *
217 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
218 * or clearing bits, scanning the bitmap).
219 */
220 HFSDBG_ALLOC_ENABLED = 1,
221 HFSDBG_EXT_CACHE_ENABLED = 2,
222 HFSDBG_UNMAP_ENABLED = 4,
223 HFSDBG_BITMAP_ENABLED = 8
224};
1c79356b
A
225
226enum {
0b4e3aa0 227 kBytesPerWord = 4,
1c79356b
A
228 kBitsPerByte = 8,
229 kBitsPerWord = 32,
0b4e3aa0
A
230
231 kBitsWithinWordMask = kBitsPerWord-1
1c79356b
A
232};
233
234#define kLowBitInWordMask 0x00000001ul
235#define kHighBitInWordMask 0x80000000ul
236#define kAllBitsSetInWord 0xFFFFFFFFul
237
39236c6e 238#define HFS_MIN_SUMMARY_BLOCKSIZE 4096
1c79356b 239
6d2010ae
A
240#define ALLOC_DEBUG 0
241
1c79356b 242static OSErr ReadBitmapBlock(
39236c6e
A
243 ExtendedVCB *vcb,
244 u_int32_t bit,
245 u_int32_t **buffer,
246 uintptr_t *blockRef);
0b4e3aa0
A
247
248static OSErr ReleaseBitmapBlock(
39236c6e
A
249 ExtendedVCB *vcb,
250 uintptr_t blockRef,
251 Boolean dirty);
0b4e3aa0
A
252
253static OSErr BlockAllocateAny(
39236c6e
A
254 ExtendedVCB *vcb,
255 u_int32_t startingBlock,
256 u_int32_t endingBlock,
257 u_int32_t maxBlocks,
258 u_int32_t flags,
259 Boolean trustSummary,
260 u_int32_t *actualStartBlock,
261 u_int32_t *actualNumBlocks);
1c79356b 262
6d2010ae 263static OSErr BlockAllocateAnyBitmap(
39236c6e
A
264 ExtendedVCB *vcb,
265 u_int32_t startingBlock,
266 u_int32_t endingBlock,
267 u_int32_t maxBlocks,
268 u_int32_t flags,
269 u_int32_t *actualStartBlock,
270 u_int32_t *actualNumBlocks);
6d2010ae 271
1c79356b 272static OSErr BlockAllocateContig(
39236c6e
A
273 ExtendedVCB *vcb,
274 u_int32_t startingBlock,
275 u_int32_t minBlocks,
276 u_int32_t maxBlocks,
277 u_int32_t flags,
278 u_int32_t *actualStartBlock,
279 u_int32_t *actualNumBlocks);
6d2010ae 280
1c79356b 281static OSErr BlockFindContiguous(
39236c6e
A
282 ExtendedVCB *vcb,
283 u_int32_t startingBlock,
284 u_int32_t endingBlock,
285 u_int32_t minBlocks,
286 u_int32_t maxBlocks,
287 Boolean useMetaZone,
288 Boolean trustSummary,
289 u_int32_t *actualStartBlock,
290 u_int32_t *actualNumBlocks);
1c79356b 291
0b4e3aa0 292static OSErr BlockAllocateKnown(
39236c6e
A
293 ExtendedVCB *vcb,
294 u_int32_t maxBlocks,
295 u_int32_t *actualStartBlock,
296 u_int32_t *actualNumBlocks);
0b4e3aa0 297
6d2010ae 298static OSErr BlockMarkAllocatedInternal (
39236c6e
A
299 ExtendedVCB *vcb,
300 u_int32_t startingBlock,
301 register u_int32_t numBlocks);
6d2010ae
A
302
303static OSErr BlockMarkFreeInternal(
39236c6e
A
304 ExtendedVCB *vcb,
305 u_int32_t startingBlock,
306 u_int32_t numBlocks,
307 Boolean do_validate);
6d2010ae 308
6d2010ae 309
39236c6e
A
310static OSErr ReadBitmapRange (struct hfsmount *hfsmp, uint32_t offset, uint32_t iosize,
311 uint32_t **buffer, struct buf **blockRef);
312
313static OSErr ReleaseScanBitmapRange( struct buf *bp );
316670eb
A
314
315static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset,
39236c6e 316 u_int32_t numBlocks, struct jnl_trim_list *list);
316670eb
A
317
318static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list);
319
39236c6e
A
320static int hfs_alloc_scan_range(struct hfsmount *hfsmp,
321 u_int32_t startbit,
322 u_int32_t *bitToScan,
323 struct jnl_trim_list *list);
316670eb 324
39236c6e
A
325static int hfs_scan_range_size (struct hfsmount* hfsmp, uint32_t start, uint32_t *iosize);
326static uint32_t CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recent, uint32_t *next);
6d2010ae 327
39236c6e
A
328/* Bitmap Re-use Detection */
329static inline int extents_overlap (uint32_t start1, uint32_t len1,
330 uint32_t start2, uint32_t len2) {
331 return !( ((start1 + len1) <= start2) || ((start2 + len2) <= start1) );
332}
6d2010ae 333
6d2010ae 334
39236c6e
A
335int hfs_isallocated_scan (struct hfsmount *hfsmp,
336 u_int32_t startingBlock,
337 u_int32_t *bp_buf);
338
339/* Summary Table Functions */
340static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse);
341static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t *index);
342static int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
343static int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t *alloc);
344static int hfs_release_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t length);
345static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t *freeblocks);
346static int hfs_rebuild_summary (struct hfsmount *hfsmp);
347
348#if 0
349static int hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
350#endif
6d2010ae 351
39236c6e
A
352/* Used in external mount code to initialize the summary table */
353int hfs_init_summary (struct hfsmount *hfsmp);
6d2010ae 354
39236c6e
A
355#if ALLOC_DEBUG
356void hfs_validate_summary (struct hfsmount *hfsmp);
357#endif
6d2010ae 358
6d2010ae
A
359
360/* Functions for manipulating free extent cache */
361static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
362static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
363static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
364
04b8595b
A
365/* Functions for getting free exents */
366
367typedef struct bitmap_context {
368 void *bitmap; // current bitmap chunk
369 uint32_t run_offset; // offset (in bits) from start of bitmap to start of current run
370 uint32_t chunk_current; // next bit to scan in the chunk
371 uint32_t chunk_end; // number of valid bits in this chunk
372 struct hfsmount *hfsmp;
373 struct buf *bp;
374 uint32_t last_free_summary_bit; // last marked free summary bit
375 int lockflags;
376 uint64_t lock_start;
377} bitmap_context_t;
378
379
380static errno_t get_more_bits(bitmap_context_t *bitmap_ctx);
381static int bit_count_set(void *bitmap, int start, int end);
382static int bit_count_clr(void *bitmap, int start, int end);
383static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count);
384static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count);
385static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count);
386static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set);
387static int clzll(uint64_t x);
388
6d2010ae 389#if ALLOC_DEBUG
6d2010ae
A
390/*
391 * Validation Routine to verify that the TRIM list maintained by the journal
392 * is in good shape relative to what we think the bitmap should have. We should
393 * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
394 * we panic.
395 */
316670eb 396int trim_validate_bitmap (struct hfsmount *hfsmp);
6d2010ae
A
397int trim_validate_bitmap (struct hfsmount *hfsmp) {
398 u_int64_t blockno_offset;
399 u_int64_t numblocks;
400 int i;
401 int count;
402 u_int32_t startblk;
403 u_int32_t blks;
404 int err = 0;
405 uint32_t alloccount = 0;
406
407 if (hfsmp->jnl) {
408 struct journal *jnl = (struct journal*)hfsmp->jnl;
409 if (jnl->active_tr) {
410 struct jnl_trim_list *trim = &(jnl->active_tr->trim);
411 count = trim->extent_count;
412 for (i = 0; i < count; i++) {
413 blockno_offset = trim->extents[i].offset;
414 blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
415 blockno_offset = blockno_offset / hfsmp->blockSize;
416 numblocks = trim->extents[i].length / hfsmp->blockSize;
417
418 startblk = (u_int32_t)blockno_offset;
419 blks = (u_int32_t) numblocks;
420 err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);
421
422 if (err == 0 && alloccount != 0) {
423 panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
424 }
425 }
426 }
427 }
428 return 0;
429}
1c79356b 430
6d2010ae 431#endif
060df5ea 432
316670eb 433
060df5ea 434/*
316670eb
A
435 ;________________________________________________________________________________
436 ;
437 ; Routine: hfs_unmap_free_extent
438 ;
439 ; Function: Make note of a range of allocation blocks that should be
440 ; unmapped (trimmed). That is, the given range of blocks no
441 ; longer have useful content, and the device can unmap the
442 ; previous contents. For example, a solid state disk may reuse
443 ; the underlying storage for other blocks.
444 ;
445 ; This routine is only supported for journaled volumes. The extent
446 ; being freed is passed to the journal code, and the extent will
447 ; be unmapped after the current transaction is written to disk.
448 ;
449 ; Input Arguments:
450 ; hfsmp - The volume containing the allocation blocks.
451 ; startingBlock - The first allocation block of the extent being freed.
452 ; numBlocks - The number of allocation blocks of the extent being freed.
453 ;________________________________________________________________________________
454 */
060df5ea
A
455static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
456{
6d2010ae
A
457 u_int64_t offset;
458 u_int64_t length;
316670eb
A
459 u_int64_t device_sz;
460 int err = 0;
39236c6e 461
6d2010ae
A
462 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
463 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
39236c6e 464
316670eb
A
465 if (ALLOC_DEBUG) {
466 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
467 panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks);
468 }
469 }
39236c6e 470
6d2010ae 471 if (hfsmp->jnl != NULL) {
316670eb 472 device_sz = hfsmp->hfs_logical_bytes;
6d2010ae
A
473 offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
474 length = (u_int64_t) numBlocks * hfsmp->blockSize;
475
316670eb
A
476 /* Validate that the trim is in a valid range of bytes */
477 if ((offset >= device_sz) || ((offset + length) > device_sz)) {
39236c6e 478 printf("hfs_unmap_free_ext: ignoring trim vol=%s @ off %lld len %lld \n", hfsmp->vcbVN, offset, length);
316670eb
A
479 err = EINVAL;
480 }
481
482 if (err == 0) {
483 err = journal_trim_add_extent(hfsmp->jnl, offset, length);
484 if (err) {
39236c6e 485 printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent for vol=%s", err, hfsmp->vcbVN);
316670eb 486 }
060df5ea
A
487 }
488 }
39236c6e 489
6d2010ae
A
490 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
491 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0);
060df5ea
A
492}
493
060df5ea 494/*
316670eb
A
495 ;________________________________________________________________________________
496 ;
497 ; Routine: hfs_track_unmap_blocks
498 ;
499 ; Function: Make note of a range of allocation blocks that should be
500 ; unmapped (trimmed). That is, the given range of blocks no
501 ; longer have useful content, and the device can unmap the
502 ; previous contents. For example, a solid state disk may reuse
503 ; the underlying storage for other blocks.
504 ;
505 ; This routine is only supported for journaled volumes.
506 ;
507 ; *****NOTE*****:
508 ; This function should *NOT* be used when the volume is fully
509 ; mounted. This function is intended to support a bitmap iteration
510 ; at mount time to fully inform the SSD driver of the state of all blocks
511 ; at mount time, and assumes that there is no allocation/deallocation
512 ; interference during its iteration.,
513 ;
514 ; Input Arguments:
515 ; hfsmp - The volume containing the allocation blocks.
516 ; offset - The first allocation block of the extent being freed.
517 ; numBlocks - The number of allocation blocks of the extent being freed.
518 ; list - The list of currently tracked trim ranges.
519 ;________________________________________________________________________________
520 */
521static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start,
39236c6e
A
522 u_int32_t numBlocks, struct jnl_trim_list *list) {
523
524 u_int64_t offset;
525 u_int64_t length;
526 int error = 0;
527
528 if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) {
529 int extent_no = list->extent_count;
530 offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
531 length = (u_int64_t) numBlocks * hfsmp->blockSize;
532
533
534 list->extents[extent_no].offset = offset;
535 list->extents[extent_no].length = length;
536 list->extent_count++;
537 if (list->extent_count == list->allocated_count) {
538 error = hfs_issue_unmap (hfsmp, list);
539 }
540 }
541
542 return error;
316670eb
A
543}
544
545/*
546 ;________________________________________________________________________________
547 ;
548 ; Routine: hfs_issue_unmap
549 ;
550 ; Function: Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list
551 ;
552 ; Input Arguments:
553 ; hfsmp - The volume containing the allocation blocks.
554 ; list - The list of currently tracked trim ranges.
555 ;________________________________________________________________________________
556 */
557
fe8ab488
A
558static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list)
559{
39236c6e
A
560 dk_unmap_t unmap;
561 int error = 0;
316670eb 562
fe8ab488
A
563 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
564 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
565 }
566
39236c6e
A
567 if (list->extent_count > 0) {
568 bzero(&unmap, sizeof(unmap));
569 unmap.extents = list->extents;
570 unmap.extentsCount = list->extent_count;
316670eb 571
fe8ab488
A
572 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
573 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_NONE, hfsmp->hfs_raw_dev, unmap.extentsCount, 0, 0, 0);
574 }
575
576#if CONFIG_PROTECT
577 /*
578 * If we have not yet completed the first scan through the bitmap, then
579 * optionally inform the block driver below us that this is an initialization
580 * TRIM scan, if it can deal with this information.
581 */
582 if ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED) == 0) {
583 unmap.options |= _DK_UNMAP_INITIALIZE;
584 }
585#endif
39236c6e
A
586 /* Issue a TRIM and flush them out */
587 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
588
589 bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t)));
fe8ab488 590 bzero (&unmap, sizeof(unmap));
39236c6e
A
591 list->extent_count = 0;
592 }
fe8ab488
A
593
594 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
595 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
596 }
597
39236c6e
A
598 return error;
599}
316670eb
A
600
601/*
602 ;________________________________________________________________________________
603 ;
604 ; Routine: hfs_unmap_alloc_extent
605 ;
606 ; Function: Make note of a range of allocation blocks, some of
607 ; which may have previously been passed to hfs_unmap_free_extent,
608 ; is now in use on the volume. The given blocks will be removed
609 ; from any pending DKIOCUNMAP.
610 ;
611 ; Input Arguments:
612 ; hfsmp - The volume containing the allocation blocks.
613 ; startingBlock - The first allocation block of the extent being allocated.
614 ; numBlocks - The number of allocation blocks being allocated.
615 ;________________________________________________________________________________
616 */
39236c6e 617
060df5ea
A
618static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
619{
6d2010ae
A
620 u_int64_t offset;
621 u_int64_t length;
622 int err;
39236c6e 623
6d2010ae
A
624 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
625 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
39236c6e 626
6d2010ae
A
627 if (hfsmp->jnl != NULL) {
628 offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
629 length = (u_int64_t) numBlocks * hfsmp->blockSize;
39236c6e 630
6d2010ae
A
631 err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
632 if (err) {
39236c6e 633 printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent for vol=%s", err, hfsmp->vcbVN);
060df5ea
A
634 }
635 }
39236c6e 636
6d2010ae
A
637 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
638 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0);
060df5ea
A
639}
640
641
1c79356b
A
642/*
643;________________________________________________________________________________
644;
6d2010ae 645; Routine: hfs_trim_callback
1c79356b 646;
6d2010ae
A
647; Function: This function is called when a transaction that freed extents
648; (via hfs_unmap_free_extent/journal_trim_add_extent) has been
649; written to the on-disk journal. This routine will add those
650; extents to the free extent cache so that they can be reused.
1c79356b 651;
6d2010ae
A
652; CAUTION: This routine is called while the journal's trim lock
653; is held shared, so that no other thread can reuse any portion
654; of those extents. We must be very careful about which locks
655; we take from within this callback, to avoid deadlock. The
656; call to add_free_extent_cache will end up taking the cache's
657; lock (just long enough to add these extents to the cache).
1c79356b 658;
6d2010ae
A
659; CAUTION: If the journal becomes invalid (eg., due to an I/O
660; error when trying to write to the journal), this callback
661; will stop getting called, even if extents got freed before
662; the journal became invalid!
1c79356b 663;
6d2010ae
A
664; Input Arguments:
665; arg - The hfsmount of the volume containing the extents.
666; extent_count - The number of extents freed in the transaction.
667; extents - An array of extents (byte ranges) that were freed.
1c79356b
A
668;________________________________________________________________________________
669*/
39236c6e 670
6d2010ae
A
671__private_extern__ void
672hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
b0d623f7 673{
6d2010ae
A
674 uint32_t i;
675 uint32_t startBlock, numBlocks;
676 struct hfsmount *hfsmp = arg;
39236c6e 677
6d2010ae
A
678 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
679 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0);
39236c6e 680
6d2010ae
A
681 for (i=0; i<extent_count; ++i) {
682 /* Convert the byte range in *extents back to a range of allocation blocks. */
683 startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize;
684 numBlocks = extents[i].length / hfsmp->blockSize;
685 (void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
b0d623f7 686 }
6d2010ae
A
687
688 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
689 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0);
b0d623f7
A
690}
691
1c79356b 692
39236c6e
A
693/*
694 ;________________________________________________________________________________
695 ;
696 ; Routine: CheckUnmappedBytes
697 ;
698 ; Function: From the specified inputs, determine if the extent in question overlaps
699 ; space that was recently freed, where the recently freed space may still be
700 ; lingering in an uncommitted journal transaction. This may have data safety
701 ; implications. The intended use is to decide whether or not to force a journal flush
702 ; before allowing file data I/O to be issued. If we did not do this
703 ; then it would be possible to issue the file I/O ahead of the
704 ; journal, resulting in data being overwritten if the transaction either
705 ; is not committed or cannot be replayed.
706 ;
707 ; NOTE: This function assumes that the journal and catalog/extent locks are held.
708 ;
709 ; Input Arguments:
710 ; hfsmp - The volume containing the allocation blocks.
711 ; foffset - start of the extent in question (in allocation blocks)
712 ; numbytes - number of blocks in the extent.
713 ; recently_freed: - output pointer containing whether or not the blocks were freed recently
714 ; overlap_end - end of the overlap between the argument extent and the trim list (in allocation blocks)
715 ;
716 ; Output:
717 ;
718 ; Returns 0 if we could determine extent validity for this (or a previous transaction)
719 ; Returns errno if there was an error
720 ;
721 ; If returned 0, then recently freed will contain a boolean that indicates
722 ; that it was recently freed.
723 ;________________________________________________________________________________
724 */
725
726u_int32_t
727CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recently_freed, uint32_t *overlap_end) {
728 uint64_t device_offset;
729 uint64_t numbytes;
730 uint32_t err = 0;
731 uint64_t lba_overlap_end;
732
733 if (hfsmp->jnl != NULL) {
734 /*
735 * Convert the allocation block # and the number of blocks into device-relative
736 * offsets so that they can be compared using the TRIM list.
737 */
738 uint64_t device_sz = hfsmp->hfs_logical_bytes;
739 device_offset = blockno * ((uint64_t)hfsmp->blockSize);
740 device_offset += hfsmp->hfsPlusIOPosOffset;
741 numbytes = (((uint64_t)hfsmp->blockSize) * numblocks);
742
743 /*
744 * Since we check that the device_offset isn't too large, it's safe to subtract it
745 * from the size in the second check.
746 */
747 if ((device_offset >= device_sz) || (numbytes > (device_sz - device_offset))) {
748 return EINVAL;
749 }
750
751 /* Ask the journal if this extent overlaps with any pending TRIMs */
752 if (journal_trim_extent_overlap (hfsmp->jnl, device_offset, numbytes, &lba_overlap_end)) {
753 *recently_freed = 1;
754
755 /* Convert lba_overlap_end back into allocation blocks */
756 uint64_t end_offset = lba_overlap_end - hfsmp->hfsPlusIOPosOffset;
757 end_offset = end_offset / ((uint64_t) hfsmp->blockSize);
758 *overlap_end = (uint32_t) end_offset;
759 }
760 else {
761 *recently_freed = 0;
762 }
763 err = 0;
764 }
765 else {
766 /* There may not be a journal. In that case, always return success. */
767 *recently_freed = 0;
768 }
769 return err;
770
771}
772
773
316670eb
A
774/*
775 ;________________________________________________________________________________
776 ;
39236c6e 777 ; Routine: ScanUnmapBlocks
316670eb 778 ;
39236c6e 779 ; Function: Traverse the bitmap, and potentially issue DKIOCUNMAPs to the underlying
316670eb
A
780 ; device as needed so that the underlying disk device is as
781 ; up-to-date as possible with which blocks are unmapped.
39236c6e 782 ; Additionally build up the summary table as needed.
316670eb 783 ;
fe8ab488
A
784 ; This function reads the bitmap in large block size
785 ; (up to 1MB) unlink the runtime which reads the bitmap
786 ; in 4K block size. So if this function is being called
787 ; after the volume is mounted and actively modified, the
788 ; caller needs to invalidate all of the existing buffers
789 ; associated with the bitmap vnode before calling this
790 ; function. If the buffers are not invalidated, it can
791 ; cause but_t collision and potential data corruption.
792 ;
316670eb
A
793 ; Input Arguments:
794 ; hfsmp - The volume containing the allocation blocks.
795 ;________________________________________________________________________________
796 */
797
798__private_extern__
39236c6e
A
799u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp)
800{
316670eb
A
801 u_int32_t blocks_scanned = 0;
802 int error = 0;
39236c6e
A
803 struct jnl_trim_list trimlist;
804
fe8ab488
A
805 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
806 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
807 }
808
39236c6e
A
809 /*
810 *struct jnl_trim_list {
811 uint32_t allocated_count;
812 uint32_t extent_count;
813 dk_extent_t *extents;
814 };
815 */
816
817 /*
818 * The scanning itself here is not tied to the presence of CONFIG_HFS_TRIM
819 * which is now enabled for most architectures. Instead, any trim related
820 * work should be tied to whether the underlying storage media supports
821 * UNMAP, as any solid state device would on desktop or embedded.
822 *
823 * We do this because we may want to scan the full bitmap on desktop
824 * for spinning media for the purposes of building up the
825 * summary table.
826 *
827 * We also avoid sending TRIMs down to the underlying media if the mount is read-only.
828 */
829
830 if ((hfsmp->hfs_flags & HFS_UNMAP) &&
831 ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
832 /* If the underlying device supports unmap and the mount is read-write, initialize */
833 int alloc_count = PAGE_SIZE / sizeof(dk_extent_t);
834 void *extents = kalloc (alloc_count * sizeof(dk_extent_t));
835 if (extents == NULL) {
836 return ENOMEM;
837 }
838 bzero (&trimlist, sizeof(trimlist));
839 trimlist.extents = (dk_extent_t*)extents;
840 trimlist.allocated_count = alloc_count;
841 trimlist.extent_count = 0;
842 }
843
844 while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){
845
846 error = hfs_alloc_scan_range (hfsmp, blocks_scanned, &blocks_scanned, &trimlist);
847
848 if (error) {
849 printf("HFS: bitmap scan range error: %d on vol=%s\n", error, hfsmp->vcbVN);
850 break;
851 }
852 }
853
854 if ((hfsmp->hfs_flags & HFS_UNMAP) &&
855 ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
856 if (error == 0) {
857 hfs_issue_unmap(hfsmp, &trimlist);
858 }
859 if (trimlist.extents) {
860 kfree (trimlist.extents, (trimlist.allocated_count * sizeof(dk_extent_t)));
861 }
862 }
863
864 /*
865 * This is in an #if block because hfs_validate_summary prototype and function body
866 * will only show up if ALLOC_DEBUG is on, to save wired memory ever so slightly.
867 */
868#if ALLOC_DEBUG
869 sanity_check_free_ext(hfsmp, 1);
870 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
871 /* Validate the summary table too! */
872 hfs_validate_summary(hfsmp);
873 printf("HFS: Summary validation complete on %s\n", hfsmp->vcbVN);
316670eb 874 }
39236c6e
A
875#endif
876
fe8ab488
A
877 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
878 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
879 }
880
316670eb
A
881 return error;
882}
883
6d2010ae
A
884/*
885 ;________________________________________________________________________________
886 ;
887 ; Routine: BlockAllocate
888 ;
889 ; Function: Allocate space on a volume. If contiguous allocation is requested,
890 ; at least the requested number of bytes will be allocated or an
891 ; error will be returned. If contiguous allocation is not forced,
892 ; the space will be allocated with the first largest extent available
893 ; at the requested starting allocation block. If there is not enough
894 ; room there, a block allocation of less than the requested size will be
895 ; allocated.
896 ;
897 ; If the requested starting block is 0 (for new file allocations),
898 ; the volume's allocation block pointer will be used as a starting
899 ; point.
900 ;
901 ; Input Arguments:
902 ; vcb - Pointer to ExtendedVCB for the volume to allocate space on
903 ; fcb - Pointer to FCB for the file for which storage is being allocated
904 ; startingBlock - Preferred starting allocation block, 0 = no preference
905 ; minBlocks - Number of blocks requested. If the allocation is non-contiguous,
906 ; less than this may actually be allocated
907 ; maxBlocks - The maximum number of blocks to allocate. If there is additional free
908 ; space after bytesRequested, then up to maxBlocks bytes should really
909 ; be allocated. (Used by ExtendFileC to round up allocations to a multiple
910 ; of the file's clump size.)
911 ; flags - Flags to specify options like contiguous, use metadata zone,
912 ; skip free block check, etc.
913 ;
914 ; Output:
915 ; (result) - Error code, zero for successful allocation
916 ; *startBlock - Actual starting allocation block
39236c6e 917 ; *actualBlccks - Actual number of allocation blocks allocated
6d2010ae
A
918 ;
919 ; Side effects:
920 ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
921 ;________________________________________________________________________________
922 */
1c79356b 923OSErr BlockAllocate (
39236c6e
A
924 ExtendedVCB *vcb, /* which volume to allocate space on */
925 u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */
926 u_int32_t minBlocks, /* desired number of blocks to allocate */
927 u_int32_t maxBlocks, /* maximum number of blocks to allocate */
928 u_int32_t flags, /* option flags */
929 u_int32_t *actualStartBlock, /* actual first block of allocation */
930 u_int32_t *actualNumBlocks)
931/*
932 * actualNumBlocks is the number of blocks actually allocated;
933 * if forceContiguous was zero, then this may represent fewer than minBlocks
934 */
1c79356b 935{
2d21ac55 936 u_int32_t freeBlocks;
1c79356b 937 OSErr err;
1c79356b 938 Boolean updateAllocPtr = false; // true if nextAllocation needs to be updated
6d2010ae 939 struct hfsmount *hfsmp;
0b4c1975
A
940 Boolean useMetaZone;
941 Boolean forceContiguous;
39236c6e 942 Boolean forceFlush;
0b4c1975 943
6d2010ae
A
944 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
945 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
39236c6e 946
0b4c1975
A
947 if (flags & HFS_ALLOC_FORCECONTIG) {
948 forceContiguous = true;
949 } else {
950 forceContiguous = false;
951 }
952
953 if (flags & HFS_ALLOC_METAZONE) {
954 useMetaZone = true;
955 } else {
956 useMetaZone = false;
957 }
1c79356b 958
39236c6e
A
959 if (flags & HFS_ALLOC_FLUSHTXN) {
960 forceFlush = true;
961 }
962 else {
963 forceFlush = false;
964 }
965
966
1c79356b
A
967 //
968 // Initialize outputs in case we get an error
969 //
970 *actualStartBlock = 0;
971 *actualNumBlocks = 0;
6d2010ae
A
972 hfsmp = VCBTOHFS (vcb);
973 freeBlocks = hfs_freeblks(hfsmp, 0);
39236c6e
A
974
975
0b4c1975
A
976 /* Skip free block check if blocks are being allocated for relocating
977 * data during truncating a volume.
978 *
979 * During hfs_truncatefs(), the volume free block count is updated
980 * before relocating data to reflect the total number of free blocks
981 * that will exist on the volume after resize is successful. This
982 * means that we have reserved allocation blocks required for relocating
983 * the data and hence there is no need to check the free blocks.
984 * It will also prevent resize failure when the number of blocks in
985 * an extent being relocated is more than the free blocks that will
986 * exist after the volume is resized.
55e303ae 987 */
0b4c1975
A
988 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
989 // If the disk is already full, don't bother.
990 if (freeBlocks == 0) {
991 err = dskFulErr;
992 goto Exit;
993 }
994 if (forceContiguous && freeBlocks < minBlocks) {
995 err = dskFulErr;
996 goto Exit;
997 }
998
999 /*
1000 * Clip if necessary so we don't over-subscribe the free blocks.
1001 */
1002 if (minBlocks > freeBlocks) {
1003 minBlocks = freeBlocks;
1004 }
1005 if (maxBlocks > freeBlocks) {
1006 maxBlocks = freeBlocks;
1007 }
55e303ae
A
1008 }
1009
1c79356b
A
1010 //
1011 // If caller didn't specify a starting block number, then use the volume's
1012 // next block to allocate from.
1013 //
1014 if (startingBlock == 0) {
39236c6e
A
1015 hfs_lock_mount (hfsmp);
1016
6d2010ae 1017 /* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
b0d623f7
A
1018 if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1019 startingBlock = vcb->sparseAllocation;
6d2010ae
A
1020 }
1021 else {
b0d623f7
A
1022 startingBlock = vcb->nextAllocation;
1023 }
39236c6e 1024 hfs_unlock_mount(hfsmp);
1c79356b
A
1025 updateAllocPtr = true;
1026 }
39236c6e
A
1027
1028
2d21ac55 1029 if (startingBlock >= vcb->allocLimit) {
55e303ae
A
1030 startingBlock = 0; /* overflow so start at beginning */
1031 }
0b4e3aa0 1032
1c79356b
A
1033 //
1034 // If the request must be contiguous, then find a sequence of free blocks
1035 // that is long enough. Otherwise, find the first free block.
1036 //
1037 if (forceContiguous) {
55e303ae 1038 err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks,
39236c6e 1039 flags, actualStartBlock, actualNumBlocks);
9bccf70c 1040 /*
6d2010ae
A
1041 * If we allocated from a new position then also update the roving allocator.
1042 * This will keep the roving allocation pointer up-to-date even
1043 * if we are using the new R/B tree allocator, since
1044 * it doesn't matter to us here, how the underlying allocator found
1045 * the block to vend out.
9bccf70c 1046 */
55e303ae 1047 if ((err == noErr) &&
39236c6e
A
1048 (*actualStartBlock > startingBlock) &&
1049 ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
1050 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
b0d623f7 1051 updateAllocPtr = true;
55e303ae 1052 }
39236c6e 1053 } else {
0b4e3aa0
A
1054 /*
1055 * Scan the bitmap once, gather the N largest free extents, then
1056 * allocate from these largest extents. Repeat as needed until
1057 * we get all the space we needed. We could probably build up
1058 * that list when the higher level caller tried (and failed) a
1059 * contiguous allocation first.
6d2010ae
A
1060 *
1061 * Note that the free-extent cache will be cease to be updated if
1062 * we are using the red-black tree for allocations. If we jettison
1063 * the tree, then we will reset the free-extent cache and start over.
0b4e3aa0 1064 */
39236c6e
A
1065
1066 /* Disable HFS_ALLOC_FLUSHTXN if needed */
1067 if (forceFlush) {
1068 flags &= ~HFS_ALLOC_FLUSHTXN;
1069 }
1070
1071 /*
1072 * BlockAllocateKnown only examines the free extent cache; anything in there will
1073 * have been committed to stable storage already.
1074 */
0b4e3aa0 1075 err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks);
39236c6e 1076
6d2010ae
A
1077 /* dskFulErr out of BlockAllocateKnown indicates an empty Free Extent Cache */
1078
1079 if (err == dskFulErr) {
1080 /*
1081 * Now we have to do a bigger scan. Start at startingBlock and go up until the
39236c6e
A
1082 * allocation limit. We 'trust' the summary bitmap in this call, if it tells us
1083 * that it could not find any free space.
6d2010ae 1084 */
2d21ac55 1085 err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit,
39236c6e
A
1086 maxBlocks, flags, true,
1087 actualStartBlock, actualNumBlocks);
6d2010ae
A
1088 }
1089 if (err == dskFulErr) {
1090 /*
39236c6e
A
1091 * Vary the behavior here if the summary table is on or off.
1092 * If it is on, then we don't trust it it if we get into this case and
1093 * basically do a full scan for maximum coverage.
1094 * If it is off, then we trust the above and go up until the startingBlock.
6d2010ae 1095 */
39236c6e
A
1096 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1097 err = BlockAllocateAny(vcb, 1, vcb->allocLimit, maxBlocks,
1098 flags, false,
1099 actualStartBlock, actualNumBlocks);
1100 }
1101 else {
1102 err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks,
1103 flags, false,
1104 actualStartBlock, actualNumBlocks);
1105 }
1106
1107 /*
1108 * Last Resort: Find/use blocks that may require a journal flush.
1109 */
1110 if (err == dskFulErr && forceFlush) {
1111 flags |= HFS_ALLOC_FLUSHTXN;
1112 err = BlockAllocateAny(vcb, 1, vcb->allocLimit, maxBlocks,
1113 flags, false,
1114 actualStartBlock, actualNumBlocks);
1115 }
6d2010ae 1116 }
1c79356b
A
1117 }
1118
91447636 1119Exit:
39236c6e
A
1120 if ((hfsmp->hfs_flags & HFS_CS) && *actualNumBlocks != 0) {
1121 errno_t ec;
1122 _dk_cs_map_t cm;
1123 uint64_t mapped_blocks;
1124
1125 cm.cm_extent.offset = (uint64_t)*actualStartBlock * hfsmp->blockSize + hfsmp->hfsPlusIOPosOffset;
1126 cm.cm_extent.length = (uint64_t)*actualNumBlocks * hfsmp->blockSize;
1127 cm.cm_bytes_mapped = 0;
1128 ec = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP, (caddr_t)&cm, 0, vfs_context_current());
1129 if (ec != 0 && ec != ENOSPC) {
1130 printf ("VNOP_IOCTL(_DKIOCCSMAP) returned an unexpected error code=%d\n", ec);
1131 err = ec;
1132 goto Exit_CS;
1133 }
1134 mapped_blocks = cm.cm_bytes_mapped / hfsmp->blockSize;
1135 /* CoreStorage returned more blocks than requested */
1136 if (mapped_blocks > *actualNumBlocks) {
1137 printf ("VNOP_IOCTL(_DKIOCCSMAP) mapped too many blocks, mapped=%lld, actual=%d\n",
1138 mapped_blocks, *actualNumBlocks);
1139 }
1140 if (*actualNumBlocks > mapped_blocks) {
1141 if (forceContiguous && mapped_blocks < minBlocks) {
1142 mapped_blocks = 0;
1143 }
1144 }
1145 uint64_t numBlocksToFree = *actualNumBlocks - mapped_blocks;
1146 uint64_t firstBlockToFree = *actualStartBlock + mapped_blocks;
1147 if (numBlocksToFree > 0) {
1148 err = BlockDeallocate(vcb, firstBlockToFree, numBlocksToFree, flags);
1149 if (err != noErr) {
1150 printf ("BlockDeallocate failed (err=%d)\n", err);
1151 goto Exit_CS;
1152 }
1153 }
1154 *actualNumBlocks = mapped_blocks;
1155 if (*actualNumBlocks == 0 && err == noErr) {
1156 err = dskFulErr;
1157 }
1158 }
1159Exit_CS:
91447636
A
1160 // if we actually allocated something then go update the
1161 // various bits of state that we maintain regardless of
1162 // whether there was an error (i.e. partial allocations
1163 // still need to update things like the free block count).
1164 //
1165 if (*actualNumBlocks != 0) {
1c79356b
A
1166 //
1167 // If we used the volume's roving allocation pointer, then we need to update it.
1168 // Adding in the length of the current allocation might reduce the next allocate
1169 // call by avoiding a re-scan of the already allocated space. However, the clump
1170 // just allocated can quite conceivably end up being truncated or released when
1171 // the file is closed or its EOF changed. Leaving the allocation pointer at the
1172 // start of the last allocation will avoid unnecessary fragmentation in this case.
1173 //
39236c6e 1174 hfs_lock_mount (hfsmp);
1c79356b 1175
6d2010ae 1176 lck_spin_lock(&hfsmp->vcbFreeExtLock);
b0d623f7
A
1177 if (vcb->vcbFreeExtCnt == 0 && vcb->hfs_freed_block_count == 0) {
1178 vcb->sparseAllocation = *actualStartBlock;
1179 }
6d2010ae 1180 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
b0d623f7
A
1181 if (*actualNumBlocks < vcb->hfs_freed_block_count) {
1182 vcb->hfs_freed_block_count -= *actualNumBlocks;
1183 } else {
1184 vcb->hfs_freed_block_count = 0;
1185 }
6d2010ae 1186
55e303ae 1187 if (updateAllocPtr &&
6d2010ae
A
1188 ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
1189 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
2d21ac55 1190 HFS_UPDATE_NEXT_ALLOCATION(vcb, *actualStartBlock);
55e303ae 1191 }
b0d623f7 1192
6d2010ae 1193 (void) remove_free_extent_cache(hfsmp, *actualStartBlock, *actualNumBlocks);
0b4c1975
A
1194
1195 /*
1196 * Update the number of free blocks on the volume
1197 *
1198 * Skip updating the free blocks count if the block are
1199 * being allocated to relocate data as part of hfs_truncatefs()
1200 */
1201 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1202 vcb->freeBlocks -= *actualNumBlocks;
1203 }
1c79356b 1204 MarkVCBDirty(vcb);
39236c6e 1205 hfs_unlock_mount(hfsmp);
91447636
A
1206
1207 hfs_generate_volume_notifications(VCBTOHFS(vcb));
1c79356b 1208 }
39236c6e 1209
6d2010ae
A
1210 if (ALLOC_DEBUG) {
1211 if (err == noErr) {
1212 if (*actualStartBlock >= hfsmp->totalBlocks) {
1213 panic ("BlockAllocate: vending invalid blocks!");
1214 }
1215 if (*actualStartBlock >= hfsmp->allocLimit) {
1216 panic ("BlockAllocate: vending block past allocLimit!");
1217 }
39236c6e 1218
6d2010ae
A
1219 if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->totalBlocks) {
1220 panic ("BlockAllocate: vending too many invalid blocks!");
1221 }
39236c6e 1222
6d2010ae
A
1223 if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->allocLimit) {
1224 panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
1225 }
1226 }
1227 }
39236c6e 1228
6d2010ae
A
1229 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1230 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
1231
1c79356b
A
1232 return err;
1233}
1234
1235
1c79356b
A
1236/*
1237;________________________________________________________________________________
1238;
6d2010ae 1239; Routine: BlockDeallocate
1c79356b
A
1240;
1241; Function: Update the bitmap to deallocate a run of disk allocation blocks
1242;
1243; Input Arguments:
1244; vcb - Pointer to ExtendedVCB for the volume to free space on
1245; firstBlock - First allocation block to be freed
1246; numBlocks - Number of allocation blocks to free up (must be > 0!)
1247;
1248; Output:
1249; (result) - Result code
1250;
1251; Side effects:
1252; The volume bitmap is read and updated; the volume bitmap cache may be changed.
6d2010ae 1253; The Allocator's red-black trees may also be modified as a result.
1c79356b
A
1254;________________________________________________________________________________
1255*/
1256
1257OSErr BlockDeallocate (
39236c6e
A
1258 ExtendedVCB *vcb, // Which volume to deallocate space on
1259 u_int32_t firstBlock, // First block in range to deallocate
1260 u_int32_t numBlocks, // Number of contiguous blocks to deallocate
1261 u_int32_t flags)
1c79356b
A
1262{
1263 OSErr err;
6d2010ae
A
1264 struct hfsmount *hfsmp;
1265 hfsmp = VCBTOHFS(vcb);
39236c6e 1266
6d2010ae
A
1267 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1268 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0);
1269
1c79356b
A
1270 //
1271 // If no blocks to deallocate, then exit early
1272 //
1273 if (numBlocks == 0) {
1274 err = noErr;
1275 goto Exit;
1276 }
39236c6e
A
1277
1278
6d2010ae
A
1279 if (ALLOC_DEBUG) {
1280 if (firstBlock >= hfsmp->totalBlocks) {
1281 panic ("BlockDeallocate: freeing invalid blocks!");
1282 }
39236c6e 1283
6d2010ae
A
1284 if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {
1285 panic ("BlockDeallocate: freeing too many invalid blocks!");
1286 }
1287 }
6d2010ae
A
1288
1289 /*
39236c6e
A
1290 * If we're using the summary bitmap, then try to mark the bits
1291 * as potentially usable/free before actually deallocating them.
1292 * It is better to be slightly speculative here for correctness.
6d2010ae 1293 */
1c79356b 1294
39236c6e
A
1295 (void) hfs_release_summary (hfsmp, firstBlock, numBlocks);
1296
6d2010ae 1297 err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
39236c6e
A
1298
1299 if (err) {
1c79356b 1300 goto Exit;
39236c6e 1301 }
1c79356b
A
1302
1303 //
1304 // Update the volume's free block count, and mark the VCB as dirty.
1305 //
39236c6e 1306 hfs_lock_mount(hfsmp);
0b4c1975
A
1307 /*
1308 * Do not update the free block count. This flags is specified
1309 * when a volume is being truncated.
1310 */
1311 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1312 vcb->freeBlocks += numBlocks;
1313 }
1314
b0d623f7 1315 vcb->hfs_freed_block_count += numBlocks;
b0d623f7
A
1316
1317 if (vcb->nextAllocation == (firstBlock + numBlocks)) {
2d21ac55 1318 HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
b0d623f7
A
1319 }
1320
6d2010ae
A
1321 if (hfsmp->jnl == NULL) {
1322 /*
1323 * In the journal case, we'll add the free extent once the journal
1324 * calls us back to tell us it wrote the transaction to disk.
1325 */
1326 (void) add_free_extent_cache(vcb, firstBlock, numBlocks);
39236c6e 1327
6d2010ae
A
1328 /*
1329 * If the journal case, we'll only update sparseAllocation once the
1330 * free extent cache becomes empty (when we remove the last entry
1331 * from the cache). Skipping it here means we're less likely to
1332 * find a recently freed extent via the bitmap before it gets added
1333 * to the free extent cache.
1334 */
1335 if (firstBlock < vcb->sparseAllocation) {
1336 vcb->sparseAllocation = firstBlock;
b0d623f7
A
1337 }
1338 }
39236c6e 1339
1c79356b 1340 MarkVCBDirty(vcb);
39236c6e 1341 hfs_unlock_mount(hfsmp);
91447636
A
1342
1343 hfs_generate_volume_notifications(VCBTOHFS(vcb));
1c79356b
A
1344Exit:
1345
6d2010ae
A
1346 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1347 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0);
1348
1c79356b
A
1349 return err;
1350}
1351
1352
2d21ac55 1353u_int8_t freebitcount[16] = {
55e303ae
A
1354 4, 3, 3, 2, 3, 2, 2, 1, /* 0 1 2 3 4 5 6 7 */
1355 3, 2, 2, 1, 2, 1, 1, 0, /* 8 9 A B C D E F */
1356};
1357
2d21ac55 1358u_int32_t
55e303ae 1359MetaZoneFreeBlocks(ExtendedVCB *vcb)
1c79356b 1360{
2d21ac55
A
1361 u_int32_t freeblocks;
1362 u_int32_t *currCache;
b0d623f7 1363 uintptr_t blockRef;
2d21ac55
A
1364 u_int32_t bit;
1365 u_int32_t lastbit;
55e303ae
A
1366 int bytesleft;
1367 int bytesperblock;
2d21ac55
A
1368 u_int8_t byte;
1369 u_int8_t *buffer;
91447636 1370
55e303ae
A
1371 blockRef = 0;
1372 bytesleft = freeblocks = 0;
91447636 1373 buffer = NULL;
55e303ae
A
1374 bit = VCBTOHFS(vcb)->hfs_metazone_start;
1375 if (bit == 1)
1376 bit = 0;
39236c6e 1377
55e303ae
A
1378 lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
1379 bytesperblock = vcb->vcbVBMIOSize;
1380
1381 /*
1382 * Count all the bits from bit to lastbit.
1383 */
1384 while (bit < lastbit) {
1385 /*
1386 * Get next bitmap block.
1387 */
1388 if (bytesleft == 0) {
1389 if (blockRef) {
1390 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1391 blockRef = 0;
1392 }
1393 if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef) != 0) {
1394 return (0);
1395 }
2d21ac55 1396 buffer = (u_int8_t *)currCache;
55e303ae
A
1397 bytesleft = bytesperblock;
1398 }
1399 byte = *buffer++;
1400 freeblocks += freebitcount[byte & 0x0F];
1401 freeblocks += freebitcount[(byte >> 4) & 0x0F];
1402 bit += kBitsPerByte;
1403 --bytesleft;
1404 }
1405 if (blockRef)
1406 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1407
1408 return (freeblocks);
1c79356b
A
1409}
1410
1411
55e303ae
A
1412/*
1413 * Obtain the next allocation block (bit) that's
1414 * outside the metadata allocation zone.
1415 */
2d21ac55 1416static u_int32_t NextBitmapBlock(
39236c6e
A
1417 ExtendedVCB *vcb,
1418 u_int32_t bit)
55e303ae
A
1419{
1420 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1421
1422 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
1423 return (bit);
1424 /*
1425 * Skip over metadata allocation zone.
1426 */
1427 if ((bit >= hfsmp->hfs_metazone_start) &&
39236c6e 1428 (bit <= hfsmp->hfs_metazone_end)) {
55e303ae
A
1429 bit = hfsmp->hfs_metazone_end + 1;
1430 }
1431 return (bit);
1432}
1433
1c79356b
A
1434
1435/*
1436;_______________________________________________________________________
1437;
1438; Routine: ReadBitmapBlock
1439;
1440; Function: Read in a bitmap block corresponding to a given allocation
0b4e3aa0 1441; block (bit). Return a pointer to the bitmap block.
1c79356b
A
1442;
1443; Inputs:
1444; vcb -- Pointer to ExtendedVCB
0b4e3aa0 1445; bit -- Allocation block whose bitmap block is desired
1c79356b
A
1446;
1447; Outputs:
1448; buffer -- Pointer to bitmap block corresonding to "block"
0b4e3aa0 1449; blockRef
1c79356b
A
1450;_______________________________________________________________________
1451*/
1452static OSErr ReadBitmapBlock(
39236c6e
A
1453 ExtendedVCB *vcb,
1454 u_int32_t bit,
1455 u_int32_t **buffer,
1456 uintptr_t *blockRef)
1c79356b
A
1457{
1458 OSErr err;
0b4e3aa0
A
1459 struct buf *bp = NULL;
1460 struct vnode *vp = NULL;
91447636 1461 daddr64_t block;
2d21ac55 1462 u_int32_t blockSize;
1c79356b 1463
6d2010ae
A
1464 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1465 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0);
1466
0b4e3aa0 1467 /*
91447636 1468 * volume bitmap blocks are protected by the allocation file lock
0b4e3aa0 1469 */
91447636 1470 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1c79356b 1471
2d21ac55 1472 blockSize = (u_int32_t)vcb->vcbVBMIOSize;
91447636 1473 block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
1c79356b 1474
39236c6e
A
1475 /* HFS+ / HFSX */
1476 if (vcb->vcbSigWord != kHFSSigWord) {
91447636 1477 vp = vcb->hfs_allocation_vp; /* use allocation file vnode */
39236c6e
A
1478 }
1479#if CONFIG_HFS_STD
1480 else {
1481 /* HFS Standard */
0b4e3aa0
A
1482 vp = VCBTOHFS(vcb)->hfs_devvp; /* use device I/O vnode */
1483 block += vcb->vcbVBMSt; /* map to physical block */
1c79356b 1484 }
39236c6e 1485#endif
1c79356b 1486
91447636 1487 err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);
1c79356b 1488
0b4e3aa0
A
1489 if (bp) {
1490 if (err) {
91447636 1491 buf_brelse(bp);
2d21ac55 1492 *blockRef = 0;
0b4e3aa0
A
1493 *buffer = NULL;
1494 } else {
b0d623f7 1495 *blockRef = (uintptr_t)bp;
2d21ac55 1496 *buffer = (u_int32_t *)buf_dataptr(bp);
0b4e3aa0 1497 }
1c79356b
A
1498 }
1499
6d2010ae
A
1500 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1501 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0);
1502
1c79356b
A
1503 return err;
1504}
1505
1506
0b4e3aa0
A
1507/*
1508;_______________________________________________________________________
1509;
39236c6e 1510; Routine: ReadBitmapRange
0b4e3aa0 1511;
39236c6e
A
1512; Function: Read in a range of the bitmap starting at the given offset.
1513; Use the supplied size to determine the amount of I/O to generate
1514; against the bitmap file. Return a pointer to the bitmap block.
0b4e3aa0
A
1515;
1516; Inputs:
39236c6e
A
1517; hfsmp -- Pointer to hfs mount
1518; offset -- byte offset into the bitmap file
1519; size -- How much I/O to generate against the bitmap file.
1520;
1521; Outputs:
1522; buffer -- Pointer to bitmap block data corresonding to "block"
1523; blockRef -- struct 'buf' pointer which MUST be released in a subsequent call.
0b4e3aa0
A
1524;_______________________________________________________________________
1525*/
39236c6e
A
1526static OSErr ReadBitmapRange(struct hfsmount *hfsmp, uint32_t offset,
1527 uint32_t iosize, uint32_t **buffer, struct buf **blockRef)
0b4e3aa0 1528{
6d2010ae 1529
39236c6e
A
1530 OSErr err;
1531 struct buf *bp = NULL;
1532 struct vnode *vp = NULL;
1533 daddr64_t block;
0b4e3aa0 1534
39236c6e
A
1535 /* This function isn't supported for HFS standard */
1536 if (hfsmp->vcbSigWord != kHFSPlusSigWord) {
1537 return EINVAL;
0b4e3aa0
A
1538 }
1539
39236c6e
A
1540 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1541 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_START, offset, iosize, 0, 0, 0);
1542 }
1543
1544 /*
1545 * volume bitmap blocks are protected by the allocation file lock
1546 */
1547 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1548
1549 vp = hfsmp->hfs_allocation_vp; /* use allocation file vnode */
1550
1551 /*
1552 * The byte offset argument must be converted into bitmap-relative logical
1553 * block numbers before using it in buf_meta_bread.
1554 *
1555 * buf_meta_bread (and the things it calls) will eventually try to
1556 * reconstruct the byte offset into the file by multiplying the logical
1557 * block number passed in below by the vcbVBMIOSize field in the mount
1558 * point. So we prepare for that by converting the byte offset back into
1559 * logical blocks in terms of VBMIOSize units.
1560 *
1561 * The amount of I/O requested and the byte offset should be computed
1562 * based on the helper function in the frame that called us, so we can
1563 * get away with just doing a simple divide here.
1564 */
1565 block = (daddr64_t)(offset / hfsmp->vcbVBMIOSize);
1566
1567 err = (int) buf_meta_bread(vp, block, iosize, NOCRED, &bp);
1568
1569 if (bp) {
1570 if (err) {
1571 buf_brelse(bp);
1572 *blockRef = 0;
1573 *buffer = NULL;
1574 } else {
1575 *blockRef = bp;
1576 *buffer = (u_int32_t *)buf_dataptr(bp);
1577 }
1578 }
1579
1580 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1581 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_END, err, 0, 0, 0, 0);
1582 }
1583
1584 return err;
1585}
6d2010ae 1586
6d2010ae 1587
6d2010ae 1588/*
39236c6e
A
1589;_______________________________________________________________________
1590;
1591; Routine: ReleaseBitmapBlock
1592;
1593; Function: Relase a bitmap block.
1594;
1595; Inputs:
1596; vcb
1597; blockRef
1598; dirty
1599;_______________________________________________________________________
1600*/
1601static OSErr ReleaseBitmapBlock(
1602 ExtendedVCB *vcb,
1603 uintptr_t blockRef,
1604 Boolean dirty)
1605{
1606 struct buf *bp = (struct buf *)blockRef;
6d2010ae 1607
39236c6e
A
1608 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1609 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0);
1610
1611 if (blockRef == 0) {
1612 if (dirty)
1613 panic("hfs: ReleaseBitmapBlock: missing bp");
6d2010ae
A
1614 return (0);
1615 }
39236c6e
A
1616
1617 if (bp) {
1618 if (dirty) {
1619 // XXXdbg
1620 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1621
1622 if (hfsmp->jnl) {
1623 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
1624 } else {
1625 buf_bdwrite(bp);
1626 }
1627 } else {
1628 buf_brelse(bp);
1629 }
1630 }
1631
1632 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1633 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0);
1634
1635 return (0);
1636}
1637
1638/*
1639 * ReleaseScanBitmapRange
1640 *
1641 * This is used to release struct bufs that were created for use by
1642 * bitmap scanning code. Because they may be of sizes different than the
1643 * typical runtime manipulation code, we want to force them to be purged out
1644 * of the buffer cache ASAP, so we'll release them differently than in the
1645 * ReleaseBitmapBlock case.
1646 *
1647 * Additionally, because we know that we're only reading the blocks and that they
1648 * should have been clean prior to reading them, we will never
1649 * issue a write to them (thus dirtying them).
1650 */
1651
1652static OSErr ReleaseScanBitmapRange(struct buf *bp ) {
1653
1654 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1655 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, 0, 0, 0, 0, 0);
1656 }
1657
6d2010ae
A
1658 if (bp) {
1659 /* Mark the buffer invalid if it isn't locked, then release it */
1660 if ((buf_flags(bp) & B_LOCKED) == 0) {
1661 buf_markinvalid(bp);
1662 }
1663 buf_brelse(bp);
1664 }
39236c6e
A
1665
1666 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
1667 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_SCAN_BITMAP | DBG_FUNC_END, 0, 0, 0, 0, 0);
1668 }
1669
0b4e3aa0
A
1670 return (0);
1671}
1672
1c79356b
A
1673/*
1674_______________________________________________________________________
1675
1676Routine: BlockAllocateContig
1677
1678Function: Allocate a contiguous group of allocation blocks. The
1679 allocation is all-or-nothing. The caller guarantees that
1680 there are enough free blocks (though they may not be
1681 contiguous, in which case this call will fail).
1682
1683Inputs:
1684 vcb Pointer to volume where space is to be allocated
1685 startingBlock Preferred first block for allocation
1686 minBlocks Minimum number of contiguous blocks to allocate
1687 maxBlocks Maximum number of contiguous blocks to allocate
39236c6e 1688 flags
1c79356b
A
1689
1690Outputs:
1691 actualStartBlock First block of range allocated, or 0 if error
1692 actualNumBlocks Number of blocks allocated, or 0 if error
1693_______________________________________________________________________
1694*/
1695static OSErr BlockAllocateContig(
39236c6e
A
1696 ExtendedVCB *vcb,
1697 u_int32_t startingBlock,
1698 u_int32_t minBlocks,
1699 u_int32_t maxBlocks,
1700 u_int32_t flags,
1701 u_int32_t *actualStartBlock,
1702 u_int32_t *actualNumBlocks)
1c79356b 1703{
39236c6e
A
1704 OSErr retval = noErr;
1705 uint32_t currentStart = startingBlock;
1c79356b 1706
39236c6e
A
1707 uint32_t foundStart = 0; // values to emit to caller
1708 uint32_t foundCount = 0;
6d2010ae 1709
39236c6e
A
1710 uint32_t collision_start = 0; // if we have to re-allocate a recently deleted extent, use this
1711 uint32_t collision_count = 0;
6d2010ae 1712
39236c6e
A
1713 int err;
1714 int allowReuse = (flags & HFS_ALLOC_FLUSHTXN);
1715 Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
6d2010ae 1716
39236c6e
A
1717 int recently_deleted = 0;
1718 struct hfsmount *hfsmp = VCBTOHFS(vcb);
6d2010ae 1719
6d2010ae 1720 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
39236c6e 1721 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
6d2010ae 1722
39236c6e 1723 while ((retval == noErr) && (foundStart == 0) && (foundCount == 0)) {
6d2010ae 1724
39236c6e
A
1725 /* Try and find something that works. */
1726 do {
1727 /*
1728 * NOTE: If the only contiguous free extent of at least minBlocks
1729 * crosses startingBlock (i.e. starts before, ends after), then we
1730 * won't find it. Earlier versions *did* find this case by letting
1731 * the second search look past startingBlock by minBlocks. But
1732 * with the free extent cache, this can lead to duplicate entries
1733 * in the cache, causing the same blocks to be allocated twice.
1734 */
1735 retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks,
1736 maxBlocks, useMetaZone, true, &foundStart, &foundCount);
6d2010ae 1737
39236c6e
A
1738 if (retval == dskFulErr && currentStart != 0) {
1739 /*
1740 * We constrain the endingBlock so we don't bother looking for ranges
1741 * that would overlap those found in the previous call, if the summary bitmap
1742 * is not on for this volume. If it is, then we assume that it was not trust
1743 * -worthy and do a full scan.
1744 */
1745 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1746 retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks,
1747 maxBlocks, useMetaZone, false, &foundStart, &foundCount);
6d2010ae 1748 }
39236c6e
A
1749 else {
1750 retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks,
1751 maxBlocks, useMetaZone, false, &foundStart, &foundCount);
1752 }
1753 }
1754 } while (0);
1755
1756 if (retval != noErr) {
1757 goto bailout;
6d2010ae 1758 }
39236c6e
A
1759
1760 /* Do we overlap with the recently found collision extent? */
1761 if (collision_start) {
1762 if (extents_overlap (foundStart, foundCount, collision_start, collision_count)) {
1763 /*
1764 * We've looped around, and the only thing we could use was the collision extent.
1765 * Since we are allowed to use it, go ahead and do so now.
1766 */
1767 if(allowReuse) {
1768 /*
1769 * then we couldn't find anything except values which might have been
1770 * recently deallocated. just return our cached value if we are allowed to.
1771 */
1772 foundStart = collision_start;
1773 foundCount = collision_count;
1774 goto bailout;
6d2010ae 1775 }
39236c6e
A
1776 else {
1777 /* Otherwise, we looped around and couldn't find anything that wouldn't require a journal flush. */
1778 retval = dskFulErr;
1779 goto bailout;
1780 }
6d2010ae
A
1781 }
1782 }
6d2010ae 1783
39236c6e
A
1784 /* OK, we know we must not have collided . See if this one is recently deleted */
1785 if (hfsmp->jnl) {
1786 recently_deleted = 0;
1787 uint32_t nextStart;
1788 err = CheckUnmappedBytes (hfsmp, (uint64_t)foundStart,
1789 (uint64_t) foundCount, &recently_deleted, &nextStart);
1790 if (err == 0) {
1791 if(recently_deleted != 0) {
1792 /*
1793 * these blocks were recently deleted/deallocated. Cache the extent, but
1794 * but keep searching to see if we can find one that won't collide here.
1795 */
1796 if (collision_start == 0) {
1797 collision_start = foundStart;
1798 collision_count = foundCount;
1799 }
1800 recently_deleted = 0;
1801
1802 /*
1803 * advance currentStart to the point just past the overlap we just found. Note that
1804 * we will automatically loop around to start of the bitmap as needed.
1805 */
1806 currentStart = nextStart;
1807 /* Unset foundStart/Count to allow it to loop around again. */
1808 foundStart = 0;
1809 foundCount = 0;
6d2010ae 1810 }
6d2010ae 1811 }
39236c6e
A
1812 } // end jnl/deleted case
1813
1814 /*
1815 * If we found something good, we'd break out of the loop at the top; foundCount
1816 * and foundStart should be set.
6d2010ae 1817 */
6d2010ae 1818
39236c6e 1819 } // end while loop.
6d2010ae 1820
39236c6e
A
1821bailout:
1822 /* mark the blocks as in-use */
1823 if (retval == noErr) {
1824 *actualStartBlock = foundStart;
1825 *actualNumBlocks = foundCount;
1826 err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
1827
1828 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) {
1829 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_END, *actualStartBlock, *actualNumBlocks, 0, 0, 0);
6d2010ae
A
1830 }
1831 }
6d2010ae 1832
39236c6e
A
1833 return retval;
1834
1835}
6d2010ae 1836
1c79356b 1837
1c79356b
A
1838/*
1839_______________________________________________________________________
1840
1841Routine: BlockAllocateAny
1842
1843Function: Allocate one or more allocation blocks. If there are fewer
1844 free blocks than requested, all free blocks will be
1845 allocated. The caller guarantees that there is at least
1846 one free block.
1847
1848Inputs:
1849 vcb Pointer to volume where space is to be allocated
1850 startingBlock Preferred first block for allocation
1851 endingBlock Last block to check + 1
1852 maxBlocks Maximum number of contiguous blocks to allocate
55e303ae 1853 useMetaZone
1c79356b
A
1854
1855Outputs:
1856 actualStartBlock First block of range allocated, or 0 if error
1857 actualNumBlocks Number of blocks allocated, or 0 if error
1858_______________________________________________________________________
1859*/
6d2010ae 1860
0b4e3aa0 1861static OSErr BlockAllocateAny(
39236c6e
A
1862 ExtendedVCB *vcb,
1863 u_int32_t startingBlock,
1864 register u_int32_t endingBlock,
1865 u_int32_t maxBlocks,
1866 u_int32_t flags,
1867 Boolean trustSummary,
1868 u_int32_t *actualStartBlock,
1869 u_int32_t *actualNumBlocks)
1c79356b 1870{
6d2010ae 1871
39236c6e
A
1872 /*
1873 * If it is enabled, scan through the summary table to find the first free block.
1874 *
1875 * If it reports that there are not any free blocks, we could have a false
1876 * positive, so in that case, use the input arguments as a pass through.
1877 */
1878 uint32_t start_blk = startingBlock;
1879 uint32_t end_blk = endingBlock;
1880 struct hfsmount *hfsmp;
1881 OSErr err;
6d2010ae 1882
39236c6e
A
1883 hfsmp = (struct hfsmount*)vcb;
1884 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
1885 uint32_t suggested_start;
1886
1887 /*
1888 * If the summary table is enabled, scan through it to find the first free
1889 * block. If there was an error, or we couldn't find anything free in the
1890 * summary table, then just leave the start_blk fields unmodified. We wouldn't
1891 * have gotten to this point if the mount point made it look like there was possibly
1892 * free space in the FS.
1893 */
1894 err = hfs_find_summary_free (hfsmp, startingBlock, &suggested_start);
1895 if (err == 0) {
1896 start_blk = suggested_start;
1897 }
1898 else {
1899 /* Differentiate between ENOSPC and a more esoteric error in the above call. */
1900 if ((err == ENOSPC) && (trustSummary)) {
1901 /*
1902 * The 'trustSummary' argument is for doing a full scan if we really
1903 * really, need the space and we think it's somewhere but can't find it in the
1904 * summary table. If it's true, then we trust the summary table and return
1905 * dskFulErr if we couldn't find it above.
1906 */
1907 return dskFulErr;
1908 }
1909 /*
1910 * If either trustSummary was false or we got a different errno, then we
1911 * want to fall through to the real bitmap single i/o code...
1912 */
1913 }
1914 }
1915
1916 err = BlockAllocateAnyBitmap(vcb, start_blk, end_blk, maxBlocks,
1917 flags, actualStartBlock, actualNumBlocks);
6d2010ae 1918
6d2010ae 1919 return err;
6d2010ae 1920}
39236c6e 1921
6d2010ae
A
1922
1923/*
1924 * BlockAllocateAnyBitmap finds free ranges by scanning the bitmap to figure out
1925 * where the free allocation blocks are. Inputs and outputs are the same as for
1926 * BlockAllocateAny and BlockAllocateAnyRBTree
1927 */
1928
1929static OSErr BlockAllocateAnyBitmap(
39236c6e
A
1930 ExtendedVCB *vcb,
1931 u_int32_t startingBlock,
1932 register u_int32_t endingBlock,
1933 u_int32_t maxBlocks,
1934 u_int32_t flags,
1935 u_int32_t *actualStartBlock,
1936 u_int32_t *actualNumBlocks)
6d2010ae 1937{
1c79356b 1938 OSErr err;
2d21ac55
A
1939 register u_int32_t block; // current block number
1940 register u_int32_t currentWord; // Pointer to current word within bitmap block
1941 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
1942 register u_int32_t wordsLeft; // Number of words left in this bitmap block
1943 u_int32_t *buffer = NULL;
1944 u_int32_t *currCache = NULL;
b0d623f7 1945 uintptr_t blockRef;
2d21ac55
A
1946 u_int32_t bitsPerBlock;
1947 u_int32_t wordsPerBlock;
0b4e3aa0 1948 Boolean dirty = false;
b4c24cb9 1949 struct hfsmount *hfsmp = VCBTOHFS(vcb);
39236c6e
A
1950 uint32_t summary_block_scan = 0;
1951 Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
1952 Boolean forceFlush = (flags & HFS_ALLOC_FLUSHTXN);
1c79356b 1953
6d2010ae
A
1954 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1955 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);
1956
39236c6e 1957restartSearchAny:
2d21ac55
A
1958 /*
1959 * When we're skipping the metadata zone and the start/end
1960 * range overlaps with the metadata zone then adjust the
1961 * start to be outside of the metadata zone. If the range
1962 * is entirely inside the metadata zone then we can deny the
1963 * request (dskFulErr).
1964 */
1965 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
1966 if (startingBlock <= vcb->hfs_metazone_end) {
1967 if (endingBlock > (vcb->hfs_metazone_end + 2))
1968 startingBlock = vcb->hfs_metazone_end + 1;
1969 else {
1970 err = dskFulErr;
1971 goto Exit;
1972 }
1973 }
1974 }
1975
1c79356b
A
1976 // Since this routine doesn't wrap around
1977 if (maxBlocks > (endingBlock - startingBlock)) {
1978 maxBlocks = endingBlock - startingBlock;
1979 }
1980
1c79356b
A
1981 //
1982 // Pre-read the first bitmap block
1983 //
55e303ae 1984 err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef);
1c79356b 1985 if (err != noErr) goto Exit;
55e303ae 1986 buffer = currCache;
1c79356b
A
1987
1988 //
1989 // Set up the current position within the block
1990 //
1991 {
2d21ac55 1992 u_int32_t wordIndexInBlock;
39236c6e 1993
0b4e3aa0
A
1994 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
1995 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
1c79356b 1996
0b4e3aa0 1997 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
1c79356b 1998 buffer += wordIndexInBlock;
0b4e3aa0 1999 wordsLeft = wordsPerBlock - wordIndexInBlock;
1c79356b
A
2000 currentWord = SWAP_BE32 (*buffer);
2001 bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
2002 }
39236c6e
A
2003
2004 /*
2005 * While loop 1:
2006 * Find the first unallocated block starting at 'block'
2007 */
1c79356b
A
2008 block=startingBlock;
2009 while (block < endingBlock) {
2010 if ((currentWord & bitMask) == 0)
2011 break;
55e303ae 2012
1c79356b
A
2013 // Next bit
2014 ++block;
2015 bitMask >>= 1;
2016 if (bitMask == 0) {
2017 // Next word
2018 bitMask = kHighBitInWordMask;
2019 ++buffer;
55e303ae 2020
1c79356b
A
2021 if (--wordsLeft == 0) {
2022 // Next block
55e303ae 2023 buffer = currCache = NULL;
39236c6e
A
2024 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2025 /*
2026 * If summary_block_scan is non-zero, then we must have
2027 * pulled a bitmap file block into core, and scanned through
2028 * the entire thing. Because we're in this loop, we are
2029 * implicitly trusting that the bitmap didn't have any knowledge
2030 * about this particular block. As a result, update the bitmap
2031 * (lazily, now that we've scanned it) with our findings that
2032 * this particular block is completely used up.
2033 */
2034 if (summary_block_scan != 0) {
2035 uint32_t summary_bit;
2036 (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
2037 hfs_set_summary (hfsmp, summary_bit, 1);
2038 summary_block_scan = 0;
2039 }
2040 }
2041
0b4e3aa0
A
2042 err = ReleaseBitmapBlock(vcb, blockRef, false);
2043 if (err != noErr) goto Exit;
2044
55e303ae
A
2045 /*
2046 * Skip over metadata blocks.
2047 */
2048 if (!useMetaZone) {
2049 block = NextBitmapBlock(vcb, block);
55e303ae 2050 }
2d21ac55
A
2051 if (block >= endingBlock) {
2052 err = dskFulErr;
2053 goto Exit;
2054 }
2055
55e303ae 2056 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
1c79356b 2057 if (err != noErr) goto Exit;
55e303ae 2058 buffer = currCache;
39236c6e 2059 summary_block_scan = block;
0b4e3aa0 2060 wordsLeft = wordsPerBlock;
1c79356b 2061 }
1c79356b
A
2062 currentWord = SWAP_BE32 (*buffer);
2063 }
2064 }
2065
2066 // Did we get to the end of the bitmap before finding a free block?
2067 // If so, then couldn't allocate anything.
55e303ae 2068 if (block >= endingBlock) {
1c79356b
A
2069 err = dskFulErr;
2070 goto Exit;
2071 }
2072
39236c6e
A
2073
2074 /*
2075 * Don't move forward just yet. Verify that either one of the following
2076 * two conditions is true:
2077 * 1) journaling is not enabled
2078 * 2) block is not currently on any pending TRIM list.
2079 */
2080 if (hfsmp->jnl != NULL && (forceFlush == false)) {
2081 int recently_deleted = 0;
2082 uint32_t nextblk;
2083 err = CheckUnmappedBytes (hfsmp, (uint64_t) block, 1, &recently_deleted, &nextblk);
2084 if ((err == 0) && (recently_deleted)) {
2085
2086 /* release the bitmap block & unset currCache. we may jump past it. */
2087 err = ReleaseBitmapBlock(vcb, blockRef, false);
2088 currCache = NULL;
2089 if (err != noErr) {
2090 goto Exit;
2091 }
2092 /* set our start to nextblk, and re-do the search. */
2093 startingBlock = nextblk;
2094 goto restartSearchAny;
2095 }
2096 }
2097
2098
2099 // Return the first block in the allocated range
2100 *actualStartBlock = block;
0b4e3aa0 2101 dirty = true;
39236c6e 2102
1c79356b
A
2103 // If we could get the desired number of blocks before hitting endingBlock,
2104 // then adjust endingBlock so we won't keep looking. Ideally, the comparison
2105 // would be (block + maxBlocks) < endingBlock, but that could overflow. The
2106 // comparison below yields identical results, but without overflow.
2107 if (block < (endingBlock-maxBlocks)) {
2108 endingBlock = block + maxBlocks; // if we get this far, we've found enough
2109 }
b4c24cb9 2110
39236c6e
A
2111 /*
2112 * While loop 2:
2113 * Scan the bitmap, starting at 'currentWord' in the current
2114 * bitmap block. Continue iterating through the bitmap until
2115 * either we hit an allocated block, or until we have accumuluated
2116 * maxBlocks worth of bitmap.
2117 */
2118
2119 /* Continue until we see an allocated block */
2120 while ((currentWord & bitMask) == 0) {
1c79356b
A
2121 // Move to the next block. If no more, then exit.
2122 ++block;
39236c6e 2123 if (block == endingBlock) {
1c79356b 2124 break;
39236c6e 2125 }
1c79356b
A
2126
2127 // Next bit
2128 bitMask >>= 1;
2129 if (bitMask == 0) {
1c79356b
A
2130 // Next word
2131 bitMask = kHighBitInWordMask;
2132 ++buffer;
39236c6e 2133
1c79356b
A
2134 if (--wordsLeft == 0) {
2135 // Next block
55e303ae 2136 buffer = currCache = NULL;
39236c6e
A
2137
2138 /* We're only reading the bitmap here, so mark it as clean */
2139 err = ReleaseBitmapBlock(vcb, blockRef, false);
2140 if (err != noErr) {
2141 goto Exit;
2142 }
0b4e3aa0 2143
55e303ae
A
2144 /*
2145 * Skip over metadata blocks.
2146 */
2147 if (!useMetaZone) {
2d21ac55 2148 u_int32_t nextBlock;
55e303ae
A
2149 nextBlock = NextBitmapBlock(vcb, block);
2150 if (nextBlock != block) {
2151 goto Exit; /* allocation gap, so stop */
2152 }
2153 }
2154
39236c6e
A
2155 if (block >= endingBlock) {
2156 goto Exit;
2157 }
1c79356b 2158
39236c6e
A
2159 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
2160 if (err != noErr) {
2161 goto Exit;
b4c24cb9 2162 }
39236c6e 2163 buffer = currCache;
0b4e3aa0 2164 wordsLeft = wordsPerBlock;
1c79356b 2165 }
1c79356b
A
2166 currentWord = SWAP_BE32 (*buffer);
2167 }
2168 }
1c79356b
A
2169
2170Exit:
39236c6e
A
2171 if (currCache) {
2172 /* Release the bitmap reference prior to marking bits in-use */
2173 (void) ReleaseBitmapBlock(vcb, blockRef, false);
2174 currCache = NULL;
2175 }
2176
1c79356b
A
2177 if (err == noErr) {
2178 *actualNumBlocks = block - *actualStartBlock;
39236c6e 2179
060df5ea
A
2180 // sanity check
2181 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
2182 panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
2183 }
39236c6e
A
2184
2185 /* Mark the bits found as in-use */
2186 err = BlockMarkAllocatedInternal (vcb, *actualStartBlock, *actualNumBlocks);
1c79356b
A
2187 }
2188 else {
2189 *actualStartBlock = 0;
2190 *actualNumBlocks = 0;
2191 }
316670eb 2192
6d2010ae
A
2193 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2194 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
2195
1c79356b
A
2196 return err;
2197}
2198
2199
0b4e3aa0
A
2200/*
2201_______________________________________________________________________
2202
2203Routine: BlockAllocateKnown
2204
2205Function: Try to allocate space from known free space in the free
2206 extent cache.
2207
2208Inputs:
2209 vcb Pointer to volume where space is to be allocated
2210 maxBlocks Maximum number of contiguous blocks to allocate
2211
2212Outputs:
2213 actualStartBlock First block of range allocated, or 0 if error
2214 actualNumBlocks Number of blocks allocated, or 0 if error
2215
2216Returns:
2217 dskFulErr Free extent cache is empty
2218_______________________________________________________________________
2219*/
b0d623f7 2220
0b4e3aa0 2221static OSErr BlockAllocateKnown(
39236c6e
A
2222 ExtendedVCB *vcb,
2223 u_int32_t maxBlocks,
2224 u_int32_t *actualStartBlock,
2225 u_int32_t *actualNumBlocks)
0b4e3aa0 2226{
2d21ac55 2227 OSErr err;
2d21ac55 2228 u_int32_t foundBlocks;
39236c6e 2229 struct hfsmount *hfsmp = VCBTOHFS(vcb);
b0d623f7 2230
6d2010ae
A
2231 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2232 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
2233
39236c6e 2234 hfs_lock_mount (hfsmp);
6d2010ae 2235 lck_spin_lock(&vcb->vcbFreeExtLock);
39236c6e
A
2236 if ( vcb->vcbFreeExtCnt == 0 ||
2237 vcb->vcbFreeExt[0].blockCount == 0) {
6d2010ae 2238 lck_spin_unlock(&vcb->vcbFreeExtLock);
39236c6e 2239 hfs_unlock_mount(hfsmp);
6d2010ae
A
2240 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2241 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
0b4e3aa0 2242 return dskFulErr;
d1ecb069 2243 }
6d2010ae 2244 lck_spin_unlock(&vcb->vcbFreeExtLock);
39236c6e 2245 hfs_unlock_mount(hfsmp);
0b4e3aa0 2246
6d2010ae
A
2247 lck_spin_lock(&vcb->vcbFreeExtLock);
2248
0b4e3aa0
A
2249 // Just grab up to maxBlocks of the first (largest) free exent.
2250 *actualStartBlock = vcb->vcbFreeExt[0].startBlock;
2251 foundBlocks = vcb->vcbFreeExt[0].blockCount;
2252 if (foundBlocks > maxBlocks)
2253 foundBlocks = maxBlocks;
2254 *actualNumBlocks = foundBlocks;
39236c6e 2255
316670eb 2256 lck_spin_unlock(&vcb->vcbFreeExtLock);
b0d623f7 2257
316670eb 2258 remove_free_extent_cache(vcb, *actualStartBlock, *actualNumBlocks);
39236c6e 2259
55e303ae 2260 // sanity check
2d21ac55 2261 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
0c530ab8 2262 {
2d21ac55 2263 printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
fe8ab488 2264 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
6601e61a
A
2265 *actualStartBlock = 0;
2266 *actualNumBlocks = 0;
2267 err = EIO;
2d21ac55
A
2268 }
2269 else
6601e61a
A
2270 {
2271 //
2272 // Now mark the found extent in the bitmap
2273 //
6d2010ae 2274 err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
6601e61a 2275 }
2d21ac55 2276
6d2010ae
A
2277 sanity_check_free_ext(vcb, 0);
2278
2279 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2280 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
b0d623f7 2281
2d21ac55 2282 return err;
0b4e3aa0
A
2283}
2284
6d2010ae
A
2285/*
2286 * BlockMarkAllocated
2287 *
2288 * This is a wrapper function around the internal calls which will actually mark the blocks
2289 * as in-use. It will mark the blocks in the red-black tree if appropriate. We need to do
2290 * this logic here to avoid callers having to deal with whether or not the red-black tree
2291 * is enabled.
2292 */
2293
6d2010ae 2294OSErr BlockMarkAllocated(
39236c6e
A
2295 ExtendedVCB *vcb,
2296 u_int32_t startingBlock,
2297 register u_int32_t numBlocks)
6d2010ae
A
2298{
2299 struct hfsmount *hfsmp;
6d2010ae 2300
39236c6e 2301 hfsmp = VCBTOHFS(vcb);
6d2010ae
A
2302
2303 return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks);
39236c6e 2304
6d2010ae
A
2305}
2306
0b4e3aa0 2307
1c79356b
A
2308
2309/*
2310_______________________________________________________________________
2311
6d2010ae 2312Routine: BlockMarkAllocatedInternal
1c79356b
A
2313
2314Function: Mark a contiguous group of blocks as allocated (set in the
2315 bitmap). It assumes those bits are currently marked
6d2010ae
A
2316 deallocated (clear in the bitmap). Note that this function
2317 must be called regardless of whether or not the bitmap or
2318 tree-based allocator is used, as all allocations must correctly
2319 be marked on-disk. If the tree-based approach is running, then
2320 this will be done before the node is removed from the tree.
1c79356b
A
2321
2322Inputs:
2323 vcb Pointer to volume where space is to be allocated
2324 startingBlock First block number to mark as allocated
2325 numBlocks Number of blocks to mark as allocated
2326_______________________________________________________________________
2327*/
6d2010ae
A
2328static
2329OSErr BlockMarkAllocatedInternal (
39236c6e
A
2330 ExtendedVCB *vcb,
2331 u_int32_t startingBlock,
2332 register u_int32_t numBlocks)
1c79356b
A
2333{
2334 OSErr err;
2d21ac55
A
2335 register u_int32_t *currentWord; // Pointer to current word within bitmap block
2336 register u_int32_t wordsLeft; // Number of words left in this bitmap block
2337 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
2338 u_int32_t firstBit; // Bit index within word of first bit to allocate
2339 u_int32_t numBits; // Number of bits in word to allocate
2340 u_int32_t *buffer = NULL;
b0d623f7 2341 uintptr_t blockRef;
2d21ac55
A
2342 u_int32_t bitsPerBlock;
2343 u_int32_t wordsPerBlock;
b4c24cb9
A
2344 // XXXdbg
2345 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1c79356b 2346
6d2010ae
A
2347 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2348 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
2349
39236c6e
A
2350 int force_flush = 0;
2351 /*
2352 * Since we are about to mark these bits as in-use
2353 * in the bitmap, decide if we need to alert the caller
2354 * that a journal flush might be appropriate. It's safe to
2355 * poke at the journal pointer here since we MUST have
2356 * called start_transaction by the time this function is invoked.
2357 * If the journal is enabled, then it will have taken the requisite
2358 * journal locks. If it is not enabled, then we have taken
2359 * a shared lock on the global lock.
2360 */
2361 if (hfsmp->jnl) {
2362 uint32_t ignore;
2363 err = CheckUnmappedBytes (hfsmp, (uint64_t) startingBlock, (uint64_t)numBlocks, &force_flush, &ignore);
2364 if ((err == 0) && (force_flush)) {
2365 journal_request_immediate_flush (hfsmp->jnl);
2366 }
2367 }
2368
6d2010ae 2369 hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
39236c6e 2370
1c79356b
A
2371 //
2372 // Pre-read the bitmap block containing the first word of allocation
2373 //
2374
0b4e3aa0 2375 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
1c79356b 2376 if (err != noErr) goto Exit;
1c79356b
A
2377 //
2378 // Initialize currentWord, and wordsLeft.
2379 //
2380 {
2d21ac55 2381 u_int32_t wordIndexInBlock;
39236c6e 2382
0b4e3aa0
A
2383 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
2384 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
1c79356b 2385
0b4e3aa0 2386 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
1c79356b 2387 currentWord = buffer + wordIndexInBlock;
0b4e3aa0 2388 wordsLeft = wordsPerBlock - wordIndexInBlock;
1c79356b 2389 }
39236c6e 2390
b4c24cb9
A
2391 // XXXdbg
2392 if (hfsmp->jnl) {
2393 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2394 }
2395
1c79356b
A
2396 //
2397 // If the first block to allocate doesn't start on a word
2398 // boundary in the bitmap, then treat that first word
2399 // specially.
2400 //
2401
2402 firstBit = startingBlock % kBitsPerWord;
2403 if (firstBit != 0) {
2404 bitMask = kAllBitsSetInWord >> firstBit; // turn off all bits before firstBit
2405 numBits = kBitsPerWord - firstBit; // number of remaining bits in this word
2406 if (numBits > numBlocks) {
2407 numBits = numBlocks; // entire allocation is inside this one word
2408 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); // turn off bits after last
2409 }
2410#if DEBUG_BUILD
2411 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
6d2010ae 2412 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
1c79356b
A
2413 }
2414#endif
2415 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
2416 numBlocks -= numBits; // adjust number of blocks left to allocate
2417
2418 ++currentWord; // move to next word
2419 --wordsLeft; // one less word left in this block
2420 }
2421
2422 //
2423 // Allocate whole words (32 blocks) at a time.
2424 //
2425
2426 bitMask = kAllBitsSetInWord; // put this in a register for 68K
2427 while (numBlocks >= kBitsPerWord) {
2428 if (wordsLeft == 0) {
2429 // Read in the next bitmap block
0b4e3aa0 2430 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
39236c6e 2431
1c79356b 2432 buffer = NULL;
0b4e3aa0 2433 err = ReleaseBitmapBlock(vcb, blockRef, true);
1c79356b 2434 if (err != noErr) goto Exit;
0b4e3aa0
A
2435
2436 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2437 if (err != noErr) goto Exit;
2438
b4c24cb9
A
2439 // XXXdbg
2440 if (hfsmp->jnl) {
2441 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2442 }
2443
1c79356b
A
2444 // Readjust currentWord and wordsLeft
2445 currentWord = buffer;
0b4e3aa0 2446 wordsLeft = wordsPerBlock;
1c79356b
A
2447 }
2448#if DEBUG_BUILD
2449 if (*currentWord != 0) {
6d2010ae 2450 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
1c79356b
A
2451 }
2452#endif
2453 *currentWord = SWAP_BE32 (bitMask);
2454 numBlocks -= kBitsPerWord;
2455
2456 ++currentWord; // move to next word
2457 --wordsLeft; // one less word left in this block
2458 }
39236c6e 2459
1c79356b
A
2460 //
2461 // Allocate any remaining blocks.
2462 //
39236c6e 2463
1c79356b
A
2464 if (numBlocks != 0) {
2465 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
2466 if (wordsLeft == 0) {
2467 // Read in the next bitmap block
0b4e3aa0 2468 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
39236c6e 2469
1c79356b 2470 buffer = NULL;
0b4e3aa0 2471 err = ReleaseBitmapBlock(vcb, blockRef, true);
1c79356b 2472 if (err != noErr) goto Exit;
0b4e3aa0
A
2473
2474 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2475 if (err != noErr) goto Exit;
2476
b4c24cb9
A
2477 // XXXdbg
2478 if (hfsmp->jnl) {
2479 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2480 }
39236c6e 2481
1c79356b
A
2482 // Readjust currentWord and wordsLeft
2483 currentWord = buffer;
0b4e3aa0 2484 wordsLeft = wordsPerBlock;
1c79356b
A
2485 }
2486#if DEBUG_BUILD
2487 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
6d2010ae 2488 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
1c79356b
A
2489 }
2490#endif
2491 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
2492
2493 // No need to update currentWord or wordsLeft
2494 }
2495
2496Exit:
2497
0b4e3aa0
A
2498 if (buffer)
2499 (void)ReleaseBitmapBlock(vcb, blockRef, true);
1c79356b 2500
6d2010ae
A
2501 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2502 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
2503
1c79356b
A
2504 return err;
2505}
2506
6d2010ae
A
2507
2508/*
2509 * BlockMarkFree
2510 *
2511 * This is a wrapper function around the internal calls which will actually mark the blocks
2512 * as freed. It will mark the blocks in the red-black tree if appropriate. We need to do
2513 * this logic here to avoid callers having to deal with whether or not the red-black tree
2514 * is enabled.
2515 *
2516 */
2517OSErr BlockMarkFree(
39236c6e
A
2518 ExtendedVCB *vcb,
2519 u_int32_t startingBlock,
2520 register u_int32_t numBlocks)
6d2010ae
A
2521{
2522 struct hfsmount *hfsmp;
2523 hfsmp = VCBTOHFS(vcb);
39236c6e 2524
6d2010ae 2525 return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
6d2010ae
A
2526}
2527
2528
2529/*
2530 * BlockMarkFreeUnused
2531 *
2532 * Scan the bitmap block beyond end of current file system for bits
2533 * that are marked as used. If any of the bits are marked as used,
2534 * this function marks them free.
2535 *
2536 * Note: This was specifically written to mark all bits beyond
2537 * end of current file system during hfs_extendfs(), which makes
2538 * sure that all the new blocks added to the file system are
2539 * marked as free. We expect that all the blocks beyond end of
2540 * current file system are always marked as free, but there might
2541 * be cases where are marked as used. This function assumes that
2542 * the number of blocks marked as used incorrectly are relatively
2543 * small, otherwise this can overflow journal transaction size
2544 * on certain file system configurations (example, large unused
2545 * bitmap with relatively small journal).
2546 *
2547 * Input:
2548 * startingBlock: First block of the range to mark unused
2549 * numBlocks: Number of blocks in the range to mark unused
2550 *
2551 * Returns: zero on success, non-zero on error.
2552 */
2553OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t numBlocks)
2554{
2555 int error = 0;
2556 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2557 u_int32_t curNumBlocks;
2558 u_int32_t bitsPerBlock;
2559 u_int32_t lastBit;
2560
2561 /* Use the optimal bitmap I/O size instead of bitmap block size */
2562 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
2563
2564 /*
2565 * First clear any non bitmap allocation block aligned bits
2566 *
2567 * Calculate the first bit in the bitmap block next to
2568 * the bitmap block containing the bit for startingBlock.
2569 * Using this value, we calculate the total number of
2570 * bits to be marked unused from startingBlock to the
2571 * end of bitmap block containing startingBlock.
2572 */
2573 lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
2574 curNumBlocks = lastBit - startingBlock;
2575 if (curNumBlocks > numBlocks) {
2576 curNumBlocks = numBlocks;
2577 }
2578 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
2579 if (error) {
2580 return error;
2581 }
2582 startingBlock += curNumBlocks;
2583 numBlocks -= curNumBlocks;
2584
2585 /*
2586 * Check a full bitmap block for any 'used' bit. If any bit is used,
2587 * mark all the bits only in that bitmap block as free. This ensures
2588 * that we do not write unmodified bitmap blocks and do not
2589 * overwhelm the journal.
2590 *
2591 * The code starts by checking full bitmap block at a time, and
2592 * marks entire bitmap block as free only if any bit in that bitmap
2593 * block is marked as used. In the end, it handles the last bitmap
2594 * block which might be partially full by only checking till the
2595 * caller-specified last bit and if any bit is set, only mark that
2596 * range as free.
2597 */
2598 while (numBlocks) {
2599 if (numBlocks >= bitsPerBlock) {
2600 curNumBlocks = bitsPerBlock;
2601 } else {
2602 curNumBlocks = numBlocks;
2603 }
2604 if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
2605 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
2606 if (error) {
2607 return error;
2608 }
2609 }
2610 startingBlock += curNumBlocks;
2611 numBlocks -= curNumBlocks;
2612 }
2613
2614 return error;
2615}
2616
2617/*
2618_______________________________________________________________________
2619
2620Routine: BlockMarkFreeInternal
2621
2622Function: Mark a contiguous group of blocks as free (clear in the
2623 bitmap). It assumes those bits are currently marked
2624 allocated (set in the bitmap).
2625
2626Inputs:
2627 vcb Pointer to volume where space is to be freed
2628 startingBlock First block number to mark as freed
2629 numBlocks Number of blocks to mark as freed
2630 do_validate If true, validate that the blocks being
2631 deallocated to check if they are within totalBlocks
2632 for current volume and whether they were allocated
2633 before they are marked free.
2634_______________________________________________________________________
2635*/
2636static
2637OSErr BlockMarkFreeInternal(
39236c6e
A
2638 ExtendedVCB *vcb,
2639 u_int32_t startingBlock_in,
2640 register u_int32_t numBlocks_in,
2641 Boolean do_validate)
6d2010ae 2642{
316670eb 2643 OSErr err;
6d2010ae
A
2644 u_int32_t startingBlock = startingBlock_in;
2645 u_int32_t numBlocks = numBlocks_in;
316670eb
A
2646 uint32_t unmapStart = startingBlock_in;
2647 uint32_t unmapCount = numBlocks_in;
2648 uint32_t wordIndexInBlock;
2649 u_int32_t *currentWord; // Pointer to current word within bitmap block
2650 u_int32_t wordsLeft; // Number of words left in this bitmap block
2651 u_int32_t bitMask; // Word with given bits already set (ready to OR in)
2652 u_int32_t currentBit; // Bit index within word of current bit to allocate
2653 u_int32_t numBits; // Number of bits in word to allocate
2654 u_int32_t *buffer = NULL;
2655 uintptr_t blockRef;
2656 u_int32_t bitsPerBlock;
2657 u_int32_t wordsPerBlock;
39236c6e 2658 // XXXdbg
6d2010ae 2659 struct hfsmount *hfsmp = VCBTOHFS(vcb);
39236c6e 2660
6d2010ae
A
2661 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2662 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);
2663
2664 /*
2665 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
2666 * need to be able to free blocks being relocated during hfs_truncatefs.
2667 */
2668 if ((do_validate == true) &&
39236c6e 2669 (startingBlock + numBlocks > vcb->totalBlocks)) {
6d2010ae
A
2670 if (ALLOC_DEBUG) {
2671 panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
2672 }
39236c6e 2673
6d2010ae 2674 printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
fe8ab488 2675 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
6d2010ae
A
2676 err = EIO;
2677 goto Exit;
2678 }
39236c6e 2679
6d2010ae
A
2680 //
2681 // Pre-read the bitmap block containing the first word of allocation
2682 //
39236c6e 2683
6d2010ae
A
2684 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2685 if (err != noErr) goto Exit;
2686 // XXXdbg
2687 if (hfsmp->jnl) {
2688 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2689 }
39236c6e 2690
6d2010ae 2691 //
316670eb 2692 // Figure out how many bits and words per bitmap block.
6d2010ae 2693 //
316670eb
A
2694 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
2695 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
2696 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
39236c6e 2697
316670eb
A
2698 //
2699 // Look for a range of free blocks immediately before startingBlock
2700 // (up to the start of the current bitmap block). Set unmapStart to
2701 // the first free block.
2702 //
2703 currentWord = buffer + wordIndexInBlock;
2704 currentBit = startingBlock % kBitsPerWord;
2705 bitMask = kHighBitInWordMask >> currentBit;
2706 while (true) {
2707 // Move currentWord/bitMask back by one bit
2708 bitMask <<= 1;
2709 if (bitMask == 0) {
2710 if (--currentWord < buffer)
2711 break;
2712 bitMask = kLowBitInWordMask;
2713 }
39236c6e 2714
316670eb
A
2715 if (*currentWord & SWAP_BE32(bitMask))
2716 break; // Found an allocated block. Stop searching.
2717 --unmapStart;
2718 ++unmapCount;
1c79356b 2719 }
39236c6e 2720
1c79356b
A
2721 //
2722 // If the first block to free doesn't start on a word
2723 // boundary in the bitmap, then treat that first word
2724 // specially.
2725 //
39236c6e 2726
316670eb
A
2727 currentWord = buffer + wordIndexInBlock;
2728 wordsLeft = wordsPerBlock - wordIndexInBlock;
2729 currentBit = startingBlock % kBitsPerWord;
2730 if (currentBit != 0) {
2731 bitMask = kAllBitsSetInWord >> currentBit; // turn off all bits before currentBit
2732 numBits = kBitsPerWord - currentBit; // number of remaining bits in this word
1c79356b
A
2733 if (numBits > numBlocks) {
2734 numBits = numBlocks; // entire allocation is inside this one word
316670eb 2735 bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits)); // turn off bits after last
1c79356b 2736 }
6d2010ae 2737 if ((do_validate == true) &&
39236c6e 2738 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
55e303ae 2739 goto Corruption;
1c79356b 2740 }
1c79356b
A
2741 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
2742 numBlocks -= numBits; // adjust number of blocks left to free
39236c6e 2743
1c79356b
A
2744 ++currentWord; // move to next word
2745 --wordsLeft; // one less word left in this block
2746 }
39236c6e 2747
1c79356b 2748 //
0b4e3aa0 2749 // Free whole words (32 blocks) at a time.
1c79356b 2750 //
39236c6e 2751
1c79356b
A
2752 while (numBlocks >= kBitsPerWord) {
2753 if (wordsLeft == 0) {
2754 // Read in the next bitmap block
0b4e3aa0 2755 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
39236c6e 2756
1c79356b 2757 buffer = NULL;
0b4e3aa0 2758 err = ReleaseBitmapBlock(vcb, blockRef, true);
1c79356b 2759 if (err != noErr) goto Exit;
39236c6e 2760
0b4e3aa0
A
2761 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2762 if (err != noErr) goto Exit;
39236c6e 2763
b4c24cb9
A
2764 // XXXdbg
2765 if (hfsmp->jnl) {
2766 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2767 }
39236c6e 2768
1c79356b
A
2769 // Readjust currentWord and wordsLeft
2770 currentWord = buffer;
0b4e3aa0 2771 wordsLeft = wordsPerBlock;
1c79356b 2772 }
6d2010ae 2773 if ((do_validate == true) &&
39236c6e 2774 (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
55e303ae 2775 goto Corruption;
1c79356b 2776 }
1c79356b
A
2777 *currentWord = 0; // clear the entire word
2778 numBlocks -= kBitsPerWord;
39236c6e 2779
1c79356b 2780 ++currentWord; // move to next word
316670eb 2781 --wordsLeft; // one less word left in this block
1c79356b 2782 }
39236c6e 2783
1c79356b 2784 //
0b4e3aa0 2785 // Free any remaining blocks.
1c79356b 2786 //
39236c6e 2787
1c79356b
A
2788 if (numBlocks != 0) {
2789 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
2790 if (wordsLeft == 0) {
2791 // Read in the next bitmap block
0b4e3aa0 2792 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
39236c6e 2793
1c79356b 2794 buffer = NULL;
0b4e3aa0 2795 err = ReleaseBitmapBlock(vcb, blockRef, true);
1c79356b 2796 if (err != noErr) goto Exit;
39236c6e 2797
0b4e3aa0
A
2798 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2799 if (err != noErr) goto Exit;
39236c6e 2800
b4c24cb9
A
2801 // XXXdbg
2802 if (hfsmp->jnl) {
2803 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2804 }
39236c6e 2805
1c79356b
A
2806 // Readjust currentWord and wordsLeft
2807 currentWord = buffer;
0b4e3aa0 2808 wordsLeft = wordsPerBlock;
1c79356b 2809 }
6d2010ae 2810 if ((do_validate == true) &&
39236c6e 2811 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
55e303ae 2812 goto Corruption;
1c79356b 2813 }
1c79356b 2814 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
39236c6e 2815
1c79356b
A
2816 // No need to update currentWord or wordsLeft
2817 }
39236c6e 2818
316670eb
A
2819 //
2820 // Look for a range of free blocks immediately after the range we just freed
2821 // (up to the end of the current bitmap block).
2822 //
2823 wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord;
2824 wordsLeft = wordsPerBlock - wordIndexInBlock;
2825 currentWord = buffer + wordIndexInBlock;
2826 currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord;
2827 bitMask = kHighBitInWordMask >> currentBit;
2828 while (true) {
2829 // Move currentWord/bitMask/wordsLeft forward one bit
2830 bitMask >>= 1;
2831 if (bitMask == 0) {
2832 if (--wordsLeft == 0)
2833 break;
2834 ++currentWord;
2835 bitMask = kHighBitInWordMask;
2836 }
39236c6e 2837
316670eb
A
2838 if (*currentWord & SWAP_BE32(bitMask))
2839 break; // Found an allocated block. Stop searching.
2840 ++unmapCount;
2841 }
39236c6e 2842
1c79356b 2843Exit:
39236c6e 2844
0b4e3aa0
A
2845 if (buffer)
2846 (void)ReleaseBitmapBlock(vcb, blockRef, true);
39236c6e 2847
6d2010ae 2848 if (err == noErr) {
316670eb 2849 hfs_unmap_free_extent(vcb, unmapStart, unmapCount);
593a1d5f
A
2850 }
2851
6d2010ae
A
2852 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2853 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
593a1d5f 2854
1c79356b 2855 return err;
39236c6e 2856
55e303ae
A
2857Corruption:
2858#if DEBUG_BUILD
6d2010ae 2859 panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
55e303ae 2860#else
316670eb 2861 printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s\n", vcb->vcbVN);
fe8ab488 2862 hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
55e303ae
A
2863 err = EIO;
2864 goto Exit;
2865#endif
1c79356b
A
2866}
2867
316670eb 2868
1c79356b
A
2869/*
2870_______________________________________________________________________
2871
2872Routine: BlockFindContiguous
2873
2874Function: Find a contiguous range of blocks that are free (bits
2875 clear in the bitmap). If a contiguous range of the
2876 minimum size can't be found, an error will be returned.
6d2010ae
A
2877 This is only needed to support the bitmap-scanning logic,
2878 as the red-black tree should be able to do this by internally
2879 searching its tree.
1c79356b
A
2880
2881Inputs:
2882 vcb Pointer to volume where space is to be allocated
2883 startingBlock Preferred first block of range
2884 endingBlock Last possible block in range + 1
2885 minBlocks Minimum number of blocks needed. Must be > 0.
2886 maxBlocks Maximum (ideal) number of blocks desired
55e303ae 2887 useMetaZone OK to dip into metadata allocation zone
1c79356b
A
2888
2889Outputs:
2890 actualStartBlock First block of range found, or 0 if error
2891 actualNumBlocks Number of blocks found, or 0 if error
1c79356b 2892
0b4e3aa0
A
2893Returns:
2894 noErr Found at least minBlocks contiguous
2895 dskFulErr No contiguous space found, or all less than minBlocks
2896_______________________________________________________________________
1c79356b
A
2897*/
2898
2899static OSErr BlockFindContiguous(
39236c6e
A
2900 ExtendedVCB *vcb,
2901 u_int32_t startingBlock,
2902 u_int32_t endingBlock,
2903 u_int32_t minBlocks,
2904 u_int32_t maxBlocks,
2905 Boolean useMetaZone,
2906 Boolean trustSummary,
2907 u_int32_t *actualStartBlock,
2908 u_int32_t *actualNumBlocks)
1c79356b
A
2909{
2910 OSErr err;
2d21ac55
A
2911 register u_int32_t currentBlock; // Block we're currently looking at.
2912 u_int32_t firstBlock; // First free block in current extent.
2913 u_int32_t stopBlock; // If we get to this block, stop searching for first free block.
2914 u_int32_t foundBlocks; // Number of contiguous free blocks in current extent.
2915 u_int32_t *buffer = NULL;
2916 register u_int32_t *currentWord;
2917 register u_int32_t bitMask;
2918 register u_int32_t wordsLeft;
2919 register u_int32_t tempWord;
b0d623f7 2920 uintptr_t blockRef;
2d21ac55 2921 u_int32_t wordsPerBlock;
6d2010ae 2922 u_int32_t updated_free_extent = 0;
39236c6e 2923 struct hfsmount *hfsmp = (struct hfsmount*) vcb;
6d2010ae
A
2924
2925 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2926 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);
1c79356b 2927
743b1565
A
2928 /*
2929 * When we're skipping the metadata zone and the start/end
2930 * range overlaps with the metadata zone then adjust the
2931 * start to be outside of the metadata zone. If the range
2932 * is entirely inside the metadata zone then we can deny the
2933 * request (dskFulErr).
2934 */
2935 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
2936 if (startingBlock <= vcb->hfs_metazone_end) {
2937 if (endingBlock > (vcb->hfs_metazone_end + 2))
2938 startingBlock = vcb->hfs_metazone_end + 1;
2939 else
2940 goto DiskFull;
2941 }
4a249263
A
2942 }
2943
0b4e3aa0
A
2944 if ((endingBlock - startingBlock) < minBlocks)
2945 {
1c79356b
A
2946 // The set of blocks we're checking is smaller than the minimum number
2947 // of blocks, so we couldn't possibly find a good range.
0b4e3aa0 2948 goto DiskFull;
1c79356b 2949 }
0b4e3aa0
A
2950
2951 stopBlock = endingBlock - minBlocks + 1;
2952 currentBlock = startingBlock;
9bccf70c 2953 firstBlock = 0;
55e303ae
A
2954
2955 /*
2956 * Skip over metadata blocks.
2957 */
2958 if (!useMetaZone)
2959 currentBlock = NextBitmapBlock(vcb, currentBlock);
2960
39236c6e
A
2961 /*
2962 * Use the summary table if we can. Skip over any totally
2963 * allocated blocks. currentBlock should now point to the first
2964 * block beyond the metadata zone if the metazone allocations are not
2965 * allowed in this invocation.
2966 */
2967 if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
2968 uint32_t suggestion;
2969 if (hfs_find_summary_free (hfsmp, currentBlock, &suggestion) == 0) {
2970 currentBlock = suggestion;
2971 }
2972 }
2973
2974
1c79356b 2975 //
0b4e3aa0 2976 // Pre-read the first bitmap block.
1c79356b 2977 //
0b4e3aa0
A
2978 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
2979 if ( err != noErr ) goto ErrorExit;
1c79356b 2980
1c79356b 2981 //
0b4e3aa0 2982 // Figure out where currentBlock is within the buffer.
1c79356b 2983 //
0b4e3aa0 2984 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
1c79356b 2985
55e303ae 2986 wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1); // Current index into buffer
0b4e3aa0
A
2987 currentWord = buffer + wordsLeft;
2988 wordsLeft = wordsPerBlock - wordsLeft;
39236c6e
A
2989
2990 /*
2991 * This outer do-while loop is the main body of this function. Its job is
2992 * to search through the blocks (until we hit 'stopBlock'), and iterate
2993 * through swaths of allocated bitmap until it finds free regions.
2994 */
2995
0b4e3aa0 2996 do
1c79356b 2997 {
0b4e3aa0 2998 foundBlocks = 0;
39236c6e
A
2999 uint32_t summary_block_scan = 0;
3000 /*
3001 * Inner while loop 1:
3002 * Look for free blocks, skipping over allocated ones.
3003 *
3004 * Initialization starts with checking the initial partial word
3005 * if applicable.
3006 */
0b4e3aa0
A
3007 bitMask = currentBlock & kBitsWithinWordMask;
3008 if (bitMask)
3009 {
9bccf70c 3010 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
0b4e3aa0
A
3011 bitMask = kHighBitInWordMask >> bitMask;
3012 while (tempWord & bitMask)
1c79356b 3013 {
0b4e3aa0
A
3014 bitMask >>= 1;
3015 ++currentBlock;
1c79356b 3016 }
1c79356b 3017
0b4e3aa0
A
3018 // Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)?
3019 if (bitMask)
3020 goto FoundUnused;
1c79356b 3021
0b4e3aa0
A
3022 // Didn't find any unused bits, so we're done with this word.
3023 ++currentWord;
3024 --wordsLeft;
1c79356b 3025 }
1c79356b 3026
0b4e3aa0
A
3027 //
3028 // Check whole words
3029 //
3030 while (currentBlock < stopBlock)
3031 {
3032 // See if it's time to read another block.
3033 if (wordsLeft == 0)
1c79356b 3034 {
1c79356b 3035 buffer = NULL;
39236c6e
A
3036 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3037 /*
3038 * If summary_block_scan is non-zero, then we must have
3039 * pulled a bitmap file block into core, and scanned through
3040 * the entire thing. Because we're in this loop, we are
3041 * implicitly trusting that the bitmap didn't have any knowledge
3042 * about this particular block. As a result, update the bitmap
3043 * (lazily, now that we've scanned it) with our findings that
3044 * this particular block is completely used up.
3045 */
3046 if (summary_block_scan != 0) {
3047 uint32_t summary_bit;
3048 (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
3049 hfs_set_summary (hfsmp, summary_bit, 1);
3050 summary_block_scan = 0;
3051 }
3052 }
0b4e3aa0
A
3053 err = ReleaseBitmapBlock(vcb, blockRef, false);
3054 if (err != noErr) goto ErrorExit;
3055
55e303ae
A
3056 /*
3057 * Skip over metadata blocks.
3058 */
3059 if (!useMetaZone) {
3060 currentBlock = NextBitmapBlock(vcb, currentBlock);
4a249263
A
3061 if (currentBlock >= stopBlock) {
3062 goto LoopExit;
3063 }
55e303ae
A
3064 }
3065
39236c6e
A
3066 /* Skip over fully allocated bitmap blocks if we can */
3067 if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
3068 uint32_t suggestion;
3069 if (hfs_find_summary_free (hfsmp, currentBlock, &suggestion) == 0) {
3070 if (suggestion < stopBlock) {
3071 currentBlock = suggestion;
3072 }
3073 }
3074 }
3075
0b4e3aa0
A
3076 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
3077 if ( err != noErr ) goto ErrorExit;
39236c6e
A
3078
3079 /*
3080 * Set summary_block_scan to be the block we just read into the block cache.
3081 *
3082 * At this point, we've just read an allocation block worth of bitmap file
3083 * into the buffer above, but we don't know if it is completely allocated or not.
3084 * If we find that it is completely allocated/full then we will jump
3085 * through this loop again and set the appropriate summary bit as fully allocated.
3086 */
3087 summary_block_scan = currentBlock;
0b4e3aa0
A
3088 currentWord = buffer;
3089 wordsLeft = wordsPerBlock;
1c79356b 3090 }
39236c6e 3091
0b4e3aa0 3092 // See if any of the bits are clear
9bccf70c 3093 if ((tempWord = SWAP_BE32(*currentWord)) + 1) // non-zero if any bits were clear
1c79356b 3094 {
0b4e3aa0
A
3095 // Figure out which bit is clear
3096 bitMask = kHighBitInWordMask;
3097 while (tempWord & bitMask)
3098 {
3099 bitMask >>= 1;
3100 ++currentBlock;
3101 }
39236c6e 3102
0b4e3aa0 3103 break; // Found the free bit; break out to FoundUnused.
1c79356b 3104 }
1c79356b 3105
0b4e3aa0
A
3106 // Keep looking at the next word
3107 currentBlock += kBitsPerWord;
3108 ++currentWord;
3109 --wordsLeft;
3110 }
1c79356b 3111
0b4e3aa0
A
3112FoundUnused:
3113 // Make sure the unused bit is early enough to use
3114 if (currentBlock >= stopBlock)
1c79356b 3115 {
0b4e3aa0 3116 break;
1c79356b 3117 }
1c79356b 3118
0b4e3aa0
A
3119 // Remember the start of the extent
3120 firstBlock = currentBlock;
1c79356b 3121
1c79356b 3122
39236c6e
A
3123 /*
3124 * Inner while loop 2:
3125 * We get here if we find a free block. Count the number
3126 * of contiguous free blocks observed.
3127 *
3128 * Initialization starts with checking the initial partial word
3129 * if applicable.
3130 */
3131 bitMask = currentBlock & kBitsWithinWordMask;
3132 if (bitMask)
1c79356b 3133 {
9bccf70c 3134 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
0b4e3aa0
A
3135 bitMask = kHighBitInWordMask >> bitMask;
3136 while (bitMask && !(tempWord & bitMask))
1c79356b 3137 {
0b4e3aa0
A
3138 bitMask >>= 1;
3139 ++currentBlock;
1c79356b 3140 }
0b4e3aa0
A
3141
3142 // Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)?
3143 if (bitMask)
3144 goto FoundUsed;
3145
3146 // Didn't find any used bits, so we're done with this word.
3147 ++currentWord;
3148 --wordsLeft;
3149 }
39236c6e 3150
0b4e3aa0
A
3151 //
3152 // Check whole words
3153 //
3154 while (currentBlock < endingBlock)
3155 {
3156 // See if it's time to read another block.
3157 if (wordsLeft == 0)
1c79356b 3158 {
0b4e3aa0
A
3159 buffer = NULL;
3160 err = ReleaseBitmapBlock(vcb, blockRef, false);
3161 if (err != noErr) goto ErrorExit;
3162
55e303ae
A
3163 /*
3164 * Skip over metadata blocks.
3165 */
3166 if (!useMetaZone) {
2d21ac55 3167 u_int32_t nextBlock;
55e303ae
A
3168
3169 nextBlock = NextBitmapBlock(vcb, currentBlock);
3170 if (nextBlock != currentBlock) {
4a249263 3171 goto LoopExit; /* allocation gap, so stop */
55e303ae
A
3172 }
3173 }
3174
0b4e3aa0
A
3175 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
3176 if ( err != noErr ) goto ErrorExit;
39236c6e 3177
0b4e3aa0
A
3178 currentWord = buffer;
3179 wordsLeft = wordsPerBlock;
1c79356b 3180 }
39236c6e 3181
0b4e3aa0 3182 // See if any of the bits are set
9bccf70c 3183 if ((tempWord = SWAP_BE32(*currentWord)) != 0)
1c79356b 3184 {
0b4e3aa0 3185 // Figure out which bit is set
1c79356b 3186 bitMask = kHighBitInWordMask;
0b4e3aa0 3187 while (!(tempWord & bitMask))
1c79356b 3188 {
0b4e3aa0
A
3189 bitMask >>= 1;
3190 ++currentBlock;
1c79356b 3191 }
39236c6e 3192
0b4e3aa0 3193 break; // Found the used bit; break out to FoundUsed.
1c79356b 3194 }
0b4e3aa0
A
3195
3196 // Keep looking at the next word
3197 currentBlock += kBitsPerWord;
3198 ++currentWord;
3199 --wordsLeft;
39236c6e 3200
0b4e3aa0
A
3201 // If we found at least maxBlocks, we can quit early.
3202 if ((currentBlock - firstBlock) >= maxBlocks)
3203 break;
1c79356b 3204 }
1c79356b 3205
0b4e3aa0
A
3206FoundUsed:
3207 // Make sure we didn't run out of bitmap looking for a used block.
3208 // If so, pin to the end of the bitmap.
3209 if (currentBlock > endingBlock)
3210 currentBlock = endingBlock;
3211
3212 // Figure out how many contiguous free blocks there were.
3213 // Pin the answer to maxBlocks.
3214 foundBlocks = currentBlock - firstBlock;
3215 if (foundBlocks > maxBlocks)
3216 foundBlocks = maxBlocks;
3217 if (foundBlocks >= minBlocks)
3218 break; // Found what we needed!
3219
39236c6e
A
3220 /*
3221 * We did not find the total blocks were were looking for, but
3222 * add this free block run to our free extent cache list, if possible.
6d2010ae 3223 */
39236c6e
A
3224 if (hfsmp->jnl == NULL) {
3225 /* If there is no journal, go ahead and add to the free ext cache. */
3226 updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
3227 }
3228 else {
3229 /*
3230 * If journaled, only add to the free extent cache if this block is not
3231 * waiting for a TRIM to complete; that implies that the transaction that freed it
3232 * has not yet been committed to stable storage.
3233 */
3234 int recently_deleted = 0;
3235 uint32_t nextblock;
3236 err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock,
3237 (uint64_t)foundBlocks, &recently_deleted, &nextblock);
3238 if ((err) || (recently_deleted == 0)) {
3239 /* if we hit an error, or the blocks not recently freed, go ahead and insert it */
3240 updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
3241 }
3242 err = 0;
3243 }
b0d623f7 3244
0b4e3aa0 3245 } while (currentBlock < stopBlock);
4a249263 3246LoopExit:
0b4e3aa0
A
3247
3248 // Return the outputs.
3249 if (foundBlocks < minBlocks)
3250 {
3251DiskFull:
3252 err = dskFulErr;
3253ErrorExit:
3254 *actualStartBlock = 0;
3255 *actualNumBlocks = 0;
3256 }
3257 else
3258 {
3259 err = noErr;
3260 *actualStartBlock = firstBlock;
3261 *actualNumBlocks = foundBlocks;
2d21ac55
A
3262 /*
3263 * Sanity check for overflow
3264 */
3265 if ((firstBlock + foundBlocks) > vcb->allocLimit) {
b0d623f7 3266 panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
39236c6e
A
3267 vcb->vcbVN, startingBlock, endingBlock, currentBlock,
3268 firstBlock, stopBlock, minBlocks, foundBlocks);
2d21ac55 3269 }
0b4e3aa0 3270 }
39236c6e 3271
6d2010ae 3272 if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
b0d623f7
A
3273 int i;
3274 u_int32_t min_start = vcb->totalBlocks;
39236c6e 3275
b0d623f7
A
3276 // set the nextAllocation pointer to the smallest free block number
3277 // we've seen so on the next mount we won't rescan unnecessarily
6d2010ae 3278 lck_spin_lock(&vcb->vcbFreeExtLock);
b0d623f7
A
3279 for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
3280 if (vcb->vcbFreeExt[i].startBlock < min_start) {
3281 min_start = vcb->vcbFreeExt[i].startBlock;
3282 }
3283 }
6d2010ae 3284 lck_spin_unlock(&vcb->vcbFreeExtLock);
b0d623f7
A
3285 if (min_start != vcb->totalBlocks) {
3286 if (min_start < vcb->nextAllocation) {
3287 vcb->nextAllocation = min_start;
3288 }
3289 if (min_start < vcb->sparseAllocation) {
3290 vcb->sparseAllocation = min_start;
3291 }
3292 }
3293 }
39236c6e 3294
0b4e3aa0
A
3295 if (buffer)
3296 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1c79356b 3297
6d2010ae
A
3298 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
3299 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
b0d623f7 3300
1c79356b
A
3301 return err;
3302}
3303
6d2010ae 3304
6d2010ae
A
3305/*
3306 * Count number of bits set in the given 32-bit unsigned number
3307 *
3308 * Returns:
3309 * Number of bits set
3310 */
3311static int num_bits_set(u_int32_t num)
3312{
3313 int count;
3314
3315 for (count = 0; num; count++) {
3316 num &= num - 1;
3317 }
3318
3319 return count;
3320}
3321
3322/*
3323 * For a given range of blocks, find the total number of blocks
3324 * allocated. If 'stop_on_first' is true, it stops as soon as it
3325 * encounters the first allocated block. This option is useful
3326 * to determine if any block is allocated or not.
3327 *
3328 * Inputs:
3329 * startingBlock First allocation block number of the range to be scanned.
3330 * numBlocks Total number of blocks that need to be scanned.
3331 * stop_on_first Stop the search after the first allocated block is found.
3332 *
3333 * Output:
3334 * allocCount Total number of allocation blocks allocated in the given range.
3335 *
3336 * On error, it is the number of allocated blocks found
3337 * before the function got an error.
3338 *
3339 * If 'stop_on_first' is set,
3340 * allocCount = 1 if any allocated block was found.
3341 * allocCount = 0 if no allocated block was found.
3342 *
3343 * Returns:
3344 * 0 on success, non-zero on failure.
3345 */
3346static int
3347hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock,
3348 u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
3349{
3350 u_int32_t *currentWord; // Pointer to current word within bitmap block
3351 u_int32_t wordsLeft; // Number of words left in this bitmap block
3352 u_int32_t bitMask; // Word with given bits already set (ready to test)
3353 u_int32_t firstBit; // Bit index within word of first bit to allocate
3354 u_int32_t numBits; // Number of bits in word to allocate
3355 u_int32_t *buffer = NULL;
3356 uintptr_t blockRef;
3357 u_int32_t bitsPerBlock;
3358 u_int32_t wordsPerBlock;
39236c6e 3359 u_int32_t blockCount = 0;
6d2010ae
A
3360 int error;
3361
3362 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3363 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0);
3364
3365 /*
3366 * Pre-read the bitmap block containing the first word of allocation
3367 */
3368 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3369 if (error)
3370 goto JustReturn;
3371
3372 /*
3373 * Initialize currentWord, and wordsLeft.
3374 */
3375 {
3376 u_int32_t wordIndexInBlock;
39236c6e 3377
6d2010ae
A
3378 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
3379 wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
3380
3381 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3382 currentWord = buffer + wordIndexInBlock;
3383 wordsLeft = wordsPerBlock - wordIndexInBlock;
3384 }
39236c6e 3385
6d2010ae
A
3386 /*
3387 * First test any non word aligned bits.
3388 */
3389 firstBit = startingBlock % kBitsPerWord;
3390 if (firstBit != 0) {
3391 bitMask = kAllBitsSetInWord >> firstBit;
3392 numBits = kBitsPerWord - firstBit;
3393 if (numBits > numBlocks) {
3394 numBits = numBlocks;
3395 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
3396 }
3397 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3398 if (stop_on_first) {
3399 blockCount = 1;
3400 goto Exit;
3401 }
3402 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3403 }
3404 numBlocks -= numBits;
3405 ++currentWord;
3406 --wordsLeft;
91447636
A
3407 }
3408
3409 /*
3410 * Test whole words (32 blocks) at a time.
3411 */
3412 while (numBlocks >= kBitsPerWord) {
3413 if (wordsLeft == 0) {
3414 /* Read in the next bitmap block. */
3415 startingBlock += bitsPerBlock;
39236c6e 3416
91447636
A
3417 buffer = NULL;
3418 error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3419 if (error) goto Exit;
3420
3421 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3422 if (error) goto Exit;
3423
3424 /* Readjust currentWord and wordsLeft. */
3425 currentWord = buffer;
3426 wordsLeft = wordsPerBlock;
3427 }
3428 if (*currentWord != 0) {
6d2010ae
A
3429 if (stop_on_first) {
3430 blockCount = 1;
3431 goto Exit;
3432 }
3433 blockCount += num_bits_set(*currentWord);
91447636
A
3434 }
3435 numBlocks -= kBitsPerWord;
3436 ++currentWord;
3437 --wordsLeft;
3438 }
39236c6e 3439
91447636
A
3440 /*
3441 * Test any remaining blocks.
3442 */
3443 if (numBlocks != 0) {
3444 bitMask = ~(kAllBitsSetInWord >> numBlocks);
3445 if (wordsLeft == 0) {
3446 /* Read in the next bitmap block */
3447 startingBlock += bitsPerBlock;
39236c6e 3448
91447636
A
3449 buffer = NULL;
3450 error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3451 if (error) goto Exit;
3452
3453 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3454 if (error) goto Exit;
3455
3456 currentWord = buffer;
3457 wordsLeft = wordsPerBlock;
3458 }
3459 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
6d2010ae
A
3460 if (stop_on_first) {
3461 blockCount = 1;
3462 goto Exit;
3463 }
3464 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
91447636
A
3465 }
3466 }
3467Exit:
3468 if (buffer) {
3469 (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
3470 }
6d2010ae
A
3471 if (allocCount) {
3472 *allocCount = blockCount;
3473 }
3474
3475JustReturn:
3476 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3477 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0);
3478
3479 return (error);
3480}
3481
3482/*
3483 * Count total number of blocks that are allocated in the given
3484 * range from the bitmap. This is used to preflight total blocks
3485 * that need to be relocated during volume resize.
3486 *
3487 * The journal or allocation file lock must be held.
3488 *
3489 * Returns:
3490 * 0 on success, non-zero on failure.
3491 * On failure, allocCount is zero.
3492 */
39236c6e 3493 int
6d2010ae
A
3494hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
3495 u_int32_t numBlocks, u_int32_t *allocCount)
3496{
3497 return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
3498}
3499
3500/*
3501 * Test to see if any blocks in a range are allocated.
3502 *
3503 * Note: On error, this function returns 1, which means that
3504 * one or more blocks in the range are allocated. This function
3505 * is primarily used for volume resize and we do not want
3506 * to report to the caller that the blocks are free when we
3507 * were not able to deterministically find it out. So on error,
3508 * we always report that the blocks are allocated.
3509 *
3510 * The journal or allocation file lock must be held.
3511 *
3512 * Returns
3513 * 0 if all blocks in the range are free.
3514 * 1 if blocks in the range are allocated, or there was an error.
3515 */
39236c6e 3516 int
6d2010ae
A
3517hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
3518{
3519 int error;
3520 u_int32_t allocCount;
3521
3522 error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
3523 if (error) {
3524 /* On error, we always say that the blocks are allocated
3525 * so that volume resize does not return false success.
3526 */
3527 return 1;
3528 } else {
3529 /* The function was deterministically able to find out
3530 * if there was any block allocated or not. In that case,
3531 * the value in allocCount is good enough to be returned
3532 * back to the caller.
3533 */
3534 return allocCount;
3535 }
3536}
3537
3538/*
39236c6e 3539 * CONFIG_HFS_RBTREE
6d2010ae
A
3540 * Check to see if the red-black tree is live. Allocation file lock must be held
3541 * shared or exclusive to call this function. Note that we may call this even if
3542 * HFS is built without activating the red-black tree code.
3543 */
3544__private_extern__
3545int
3546hfs_isrbtree_active(struct hfsmount *hfsmp){
39236c6e
A
3547
3548#pragma unused (hfsmp)
3549
3550 /* Just return 0 for now */
3551 return 0;
3552}
3553
3554
3555
3556/* Summary Table Functions */
3557/*
3558 * hfs_check_summary:
3559 *
3560 * This function should be used to query the summary table to see if we can
3561 * bypass a bitmap block or not when we're trying to find a free allocation block.
3562 *
3563 *
3564 * Inputs:
3565 * allocblock - allocation block number. Will be used to infer the correct summary bit.
3566 * hfsmp -- filesystem in question.
3567 *
3568 * Output Arg:
3569 * *freeblocks - set to 1 if we believe at least one free blocks in this vcbVBMIOSize
3570 * page of bitmap file.
3571 *
3572 *
3573 * Returns:
3574 * 0 on success
3575 * EINVAL on error
3576 *
3577 */
3578
3579static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t allocblock, uint32_t *freeblocks) {
3580
3581 int err = EINVAL;
3582 if (hfsmp->vcbVBMIOSize) {
3583 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3584 uint32_t index;
3585 if (hfs_get_summary_index (hfsmp, allocblock, &index)) {
3586 *freeblocks = 0;
3587 return EINVAL;
3588 }
3589
3590 /* Ok, now that we have the bit index into the array, what byte is it in ? */
3591 uint32_t byteindex = index / kBitsPerByte;
3592 uint8_t current_byte = hfsmp->hfs_summary_table[byteindex];
3593 uint8_t bit_in_byte = index % kBitsPerByte;
3594
3595 if (current_byte & (1 << bit_in_byte)) {
3596 /*
3597 * We do not believe there is anything free in the
3598 * entire vcbVBMIOSize'd block.
3599 */
3600 *freeblocks = 0;
3601 }
3602 else {
3603 /* Looks like there might be a free block here... */
3604 *freeblocks = 1;
3605 }
3606 }
3607 err = 0;
6d2010ae 3608 }
39236c6e
A
3609
3610 return err;
3611}
3612
3613
3614#if 0
3615/*
3616 * hfs_get_next_summary
3617 *
3618 * From a given allocation block, jump to the allocation block at the start of the
3619 * next vcbVBMIOSize boundary. This is useful when trying to quickly skip over
3620 * large swaths of bitmap once we have determined that the bitmap is relatively full.
3621 *
3622 * Inputs: hfsmount, starting allocation block number
3623 * Output Arg: *newblock will contain the allocation block number to start
3624 * querying.
3625 *
3626 * Returns:
3627 * 0 on success
3628 * EINVAL if the block argument is too large to be used, or the summary table not live.
3629 * EFBIG if there are no more summary bits to be queried
3630 */
3631static int
3632hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
3633
3634 u_int32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
3635 u_int32_t start_offset;
3636 u_int32_t next_offset;
3637 int err = EINVAL;
3638
3639 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3640 if ((err = hfs_get_summary_index(hfsmp, block, &start_offset))) {
3641 return err;
3642 }
3643
3644 next_offset = start_offset++;
3645
3646 if ((start_offset >= hfsmp->hfs_summary_size) || (next_offset >= hfsmp->hfs_summary_size)) {
3647 /* Can't jump to the next summary bit. */
3648 return EINVAL;
3649 }
3650
3651 /* Otherwise, compute and return */
3652 *newblock = next_offset * bits_per_iosize;
3653 if (*newblock >= hfsmp->totalBlocks) {
3654 return EINVAL;
6d2010ae 3655 }
39236c6e 3656 err = 0;
6d2010ae 3657 }
39236c6e
A
3658
3659 return err;
3660}
3661
6d2010ae 3662#endif
39236c6e
A
3663
3664/*
3665 * hfs_release_summary
3666 *
3667 * Given an extent that is about to be de-allocated on-disk, determine the number
3668 * of summary bitmap bits that need to be marked as 'potentially available'.
3669 * Then go ahead and mark them as free.
3670 *
3671 * Inputs:
3672 * hfsmp - hfs mount
3673 * block - starting allocation block.
3674 * length - length of the extent.
3675 *
3676 * Returns:
3677 * EINVAL upon any errors.
3678 */
3679static int hfs_release_summary(struct hfsmount *hfsmp, uint32_t start_blk, uint32_t length) {
3680 int err = EINVAL;
3681 uint32_t end_blk = (start_blk + length) - 1;
3682
3683 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3684 /* Figure out what the starting / ending block's summary bits are */
3685 uint32_t start_bit;
3686 uint32_t end_bit;
3687 uint32_t current_bit;
3688
3689 err = hfs_get_summary_index (hfsmp, start_blk, &start_bit);
3690 if (err) {
3691 goto release_err;
3692 }
3693 err = hfs_get_summary_index (hfsmp, end_blk, &end_bit);
3694 if (err) {
3695 goto release_err;
3696 }
3697
3698 if (ALLOC_DEBUG) {
3699 if (start_bit > end_bit) {
3700 panic ("HFS: start > end!, %d %d ", start_bit, end_bit);
3701 }
3702 }
3703 current_bit = start_bit;
3704 while (current_bit <= end_bit) {
3705 err = hfs_set_summary (hfsmp, current_bit, 0);
3706 current_bit++;
3707 }
3708 }
3709
3710release_err:
3711 return err;
3712}
3713
3714/*
3715 * hfs_find_summary_free
3716 *
3717 * Given a allocation block as input, returns an allocation block number as output as a
3718 * suggestion for where to start scanning the bitmap in order to find free blocks. It will
3719 * determine the vcbVBMIOsize of the input allocation block, convert that into a summary
3720 * bit, then keep iterating over the summary bits in order to find the first free one.
3721 *
3722 * Inputs:
3723 * hfsmp - hfs mount
3724 * block - starting allocation block
3725 * newblock - output block as suggestion
3726 *
3727 * Returns:
3728 * 0 on success
3729 * ENOSPC if we could not find a free block
3730 */
3731
3732int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
3733
3734 int err = ENOSPC;
3735 uint32_t bit_index = 0;
3736 uint32_t maybe_has_blocks = 0;
3737
3738 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3739 uint32_t byte_index;
3740 uint8_t curbyte;
3741 uint8_t bit_in_byte;
3742 uint32_t summary_cap;
3743
3744 /*
3745 * We generate a cap for the summary search because the summary table
3746 * always represents a full summary of the bitmap FILE, which may
3747 * be way more bits than are necessary for the actual filesystem
3748 * whose allocations are mapped by the bitmap.
3749 *
3750 * Compute how much of hfs_summary_size is useable for the given number
3751 * of allocation blocks eligible on this FS.
3752 */
3753 err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit, &summary_cap);
3754 if (err) {
3755 goto summary_exit;
3756 }
3757
3758 /* Check the starting block first */
3759 err = hfs_check_summary (hfsmp, block, &maybe_has_blocks);
3760 if (err) {
3761 goto summary_exit;
3762 }
3763
3764 if (maybe_has_blocks) {
3765 /*
3766 * It looks like the initial start block could have something.
3767 * Short-circuit and just use that.
3768 */
3769 *newblock = block;
3770 goto summary_exit;
3771 }
3772
3773 /*
3774 * OK, now we know that the first block was useless.
3775 * Get the starting summary bit, and find it in the array
3776 */
3777 maybe_has_blocks = 0;
3778 err = hfs_get_summary_index (hfsmp, block, &bit_index);
3779 if (err) {
3780 goto summary_exit;
3781 }
3782
3783 /* Iterate until we find something. */
3784 while (bit_index <= summary_cap) {
3785 byte_index = bit_index / kBitsPerByte;
3786 curbyte = hfsmp->hfs_summary_table[byte_index];
3787 bit_in_byte = bit_index % kBitsPerByte;
3788
3789 if (curbyte & (1 << bit_in_byte)) {
3790 /* nothing here. increment and move on */
3791 bit_index++;
3792 }
3793 else {
3794 /*
3795 * found something! convert bit_index back into
3796 * an allocation block for use. 'newblock' will now
3797 * contain the proper allocation block # based on the bit
3798 * index.
3799 */
3800 err = hfs_get_summary_allocblock (hfsmp, bit_index, newblock);
3801 if (err) {
3802 goto summary_exit;
3803 }
3804 maybe_has_blocks = 1;
3805 break;
3806 }
3807 }
3808
3809 /* If our loop didn't find anything, set err to ENOSPC */
3810 if (maybe_has_blocks == 0) {
3811 err = ENOSPC;
3812 }
3813 }
3814
3815 /* If the summary table is not active for this mount, we'll just return ENOSPC */
3816summary_exit:
3817 if (maybe_has_blocks) {
3818 err = 0;
3819 }
3820
3821 return err;
3822}
3823
3824/*
3825 * hfs_get_summary_allocblock
3826 *
3827 * Convert a summary bit into an allocation block number to use to start searching for free blocks.
3828 *
3829 * Inputs:
3830 * hfsmp - hfs mount
3831 * summarybit - summmary bit index
3832 * *alloc - allocation block number in the bitmap file.
3833 *
3834 * Output:
3835 * 0 on success
3836 * EINVAL on failure
3837 */
3838int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t
3839 summarybit, uint32_t *alloc) {
3840 uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
3841 uint32_t allocblk;
3842
3843 allocblk = summarybit * bits_per_iosize;
3844
3845 if (allocblk >= hfsmp->totalBlocks) {
3846 return EINVAL;
3847 }
3848 else {
3849 *alloc = allocblk;
3850 }
3851
3852 return 0;
3853}
3854
3855
3856/*
3857 * hfs_set_summary:
3858 *
3859 * This function should be used to manipulate the summary table
3860 *
3861 * The argument 'inuse' will set the value of the bit in question to one or zero
3862 * depending on its value.
3863 *
3864 * Inputs:
3865 * hfsmp - hfs mount
3866 * summarybit - the bit index into the summary table to set/unset.
3867 * inuse - the value to assign to the bit.
3868 *
3869 * Returns:
3870 * 0 on success
3871 * EINVAL on error
3872 *
3873 */
3874
3875static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse) {
3876
3877 int err = EINVAL;
3878 if (hfsmp->vcbVBMIOSize) {
3879 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3880
3881 if (ALLOC_DEBUG) {
3882 if (hfsmp->hfs_summary_table == NULL) {
3883 panic ("hfs_set_summary: no table for %p ", hfsmp);
3884 }
3885 }
3886
3887 /* Ok, now that we have the bit index into the array, what byte is it in ? */
3888 uint32_t byte_index = summarybit / kBitsPerByte;
3889 uint8_t current_byte = hfsmp->hfs_summary_table[byte_index];
3890 uint8_t bit_in_byte = summarybit % kBitsPerByte;
3891
3892 if (inuse) {
3893 current_byte = (current_byte | (1 << bit_in_byte));
3894 }
3895 else {
3896 current_byte = (current_byte & ~(1 << bit_in_byte));
3897 }
3898
3899 hfsmp->hfs_summary_table[byte_index] = current_byte;
3900 }
3901 err = 0;
3902 }
3903
3904 return err;
3905}
3906
3907
3908/*
3909 * hfs_get_summary_index:
3910 *
3911 * This is a helper function which determines what summary bit represents the vcbVBMIOSize worth
3912 * of IO against the bitmap file.
3913 *
3914 * Returns:
3915 * 0 on success
3916 * EINVAL on failure
3917 */
3918static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t* index) {
3919 uint32_t summary_bit;
3920 uint32_t bits_per_iosize;
3921 int err = EINVAL;
3922
3923 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
3924 /* Is the input block bigger than the total number of blocks? */
3925 if (block >= hfsmp->totalBlocks) {
3926 return EINVAL;
3927 }
3928
3929 /* Is there even a vbmIOSize set? */
3930 if (hfsmp->vcbVBMIOSize == 0) {
3931 return EINVAL;
3932 }
3933
3934 bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
3935
3936 summary_bit = block / bits_per_iosize;
3937
3938 *index = summary_bit;
3939 err = 0;
3940 }
3941
3942 return err;
3943}
3944
3945/*
3946 * hfs_init_summary
3947 *
3948 * From a given mount structure, compute how big the summary table should be for the given
3949 * filesystem, then allocate and bzero the memory.
3950 *
3951 * Returns:
3952 * 0 on success
3953 * EINVAL on failure
3954 */
3955int
3956hfs_init_summary (struct hfsmount *hfsmp) {
3957
3958 uint32_t summary_size;
3959 uint32_t summary_size_bytes;
3960 uint8_t *summary_table;
3961
3962 if (hfsmp->hfs_allocation_cp == NULL) {
3963 if (ALLOC_DEBUG) {
3964 printf("hfs: summary table cannot progress without a bitmap cnode! \n");
3965 }
3966 return EINVAL;
3967 }
3968 /*
3969 * The practical maximum size of the summary table is 16KB:
3970 *
3971 * (512MB maximum bitmap size / (4k -- min alloc block size)) / 8 bits/byte.
3972 *
3973 * HFS+ will allow filesystems with allocation block sizes smaller than 4k, but
3974 * the end result is that we'll start to issue I/O in 2k or 1k sized chunks, which makes
3975 * supporting this much worse. The math would instead look like this:
3976 * (512MB / 2k) / 8 == 32k.
3977 *
3978 * So, we will disallow the summary table if the allocation block size is < 4k.
3979 */
3980
3981 if (hfsmp->blockSize < HFS_MIN_SUMMARY_BLOCKSIZE) {
3982 printf("hfs: summary table not allowed on FS with block size of %d\n", hfsmp->blockSize);
3983 return EINVAL;
3984 }
3985
3986 summary_size = hfsmp->hfs_allocation_cp->c_blocks;
3987
3988 if (ALLOC_DEBUG) {
3989 printf("HFS Summary Table Initialization: Bitmap %u blocks\n",
3990 hfsmp->hfs_allocation_cp->c_blocks);
3991 }
3992
3993 /*
3994 * If the bitmap IO size is not the same as the allocation block size then
3995 * then re-compute the number of summary bits necessary. Note that above, the
3996 * the default size is the number of allocation blocks in the bitmap *FILE*
3997 * (not the number of bits in the bitmap itself). If the allocation block size
3998 * is large enough though, we may need to increase this.
3999 */
4000 if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
4001 uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
4002 lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
4003
4004 /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
4005 summary_size = (uint32_t) lrg_size;
4006 }
4007
4008 /*
4009 * If the block size is the same as the IO Size, then the total number of blocks
4010 * is already equal to the number of IO units, which is our number of summary bits.
4011 */
4012
4013 summary_size_bytes = summary_size / kBitsPerByte;
4014 /* Always add one byte, just in case we have a dangling number of bits */
4015 summary_size_bytes++;
4016
4017 if (ALLOC_DEBUG) {
4018 printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, summary_size);
4019 printf("HFS Summary Table Size (in bytes) %d \n", summary_size_bytes);
4020 }
4021
4022 /* Store the field in the mount point, and then MALLOC/bzero the memory */
4023 hfsmp->hfs_summary_size = summary_size;
4024 hfsmp->hfs_summary_bytes = summary_size_bytes;
4025
4026 MALLOC (summary_table, uint8_t*, summary_size_bytes, M_TEMP, M_WAITOK);
4027 if (summary_table == NULL) {
4028 return ENOMEM;
4029 }
4030 bzero (summary_table, summary_size_bytes);
4031
4032 /* enable the summary table */
4033 hfsmp->hfs_flags |= HFS_SUMMARY_TABLE;
4034 hfsmp->hfs_summary_table = summary_table;
4035
4036 if (ALLOC_DEBUG) {
4037 if (hfsmp->hfs_summary_table == NULL) {
4038 panic ("HFS Summary Init: no table for %p\n", hfsmp);
4039 }
4040 }
4041 return 0;
4042}
4043
4044/*
4045 * hfs_rebuild_summary
4046 *
4047 * This function should be used to allocate a new hunk of memory for use as a summary
4048 * table, then copy the existing data into it. We use it whenever the filesystem's size
4049 * changes. When a resize is in progress, you can still use the extant summary
4050 * table if it is active.
4051 *
4052 * Inputs:
4053 * hfsmp -- FS in question
4054 * newlength -- new length of the FS in allocation blocks.
4055 *
4056 * Outputs:
4057 * 0 on success, EINVAL on failure. If this function fails, the summary table
4058 * will be disabled for future use.
4059 *
4060 */
4061static int hfs_rebuild_summary (struct hfsmount *hfsmp) {
4062
4063 uint32_t new_summary_size;
4064
4065 new_summary_size = hfsmp->hfs_allocation_cp->c_blocks;
4066
4067
4068 if (ALLOC_DEBUG) {
4069 printf("HFS Summary Table Re-init: bitmap %u blocks\n", new_summary_size);
4070 }
4071
4072 /*
4073 * If the bitmap IO size is not the same as the allocation block size, then re-compute
4074 * the number of summary bits necessary. Note that above, the default size is the number
4075 * of allocation blocks in the bitmap *FILE* (not the number of bits that the bitmap manages).
4076 * If the allocation block size is large enough though, we may need to increase this, as
4077 * bitmap IO is capped at 64k per IO
4078 */
4079 if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
4080 uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
4081 lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
4082
4083 /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
4084 new_summary_size = (uint32_t) lrg_size;
4085 }
4086
4087 /*
4088 * Ok, we have the new summary bitmap theoretical max size. See if it's the same as
4089 * what we've got already...
4090 */
4091 if (new_summary_size != hfsmp->hfs_summary_size) {
4092 uint32_t summarybytes = new_summary_size / kBitsPerByte;
4093 uint32_t copysize;
4094 uint8_t *newtable;
4095 /* Add one byte for slop */
4096 summarybytes++;
4097
4098 if (ALLOC_DEBUG) {
4099 printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, new_summary_size);
4100 printf("HFS Summary Table Size (in bytes) %d \n", summarybytes);
4101 }
4102
4103 /* Attempt to MALLOC the memory */
4104 MALLOC (newtable, uint8_t*, summarybytes, M_TEMP, M_WAITOK);
4105 if (newtable == NULL) {
4106 /*
4107 * ERROR! We need to disable the table now
4108 */
4109 FREE (hfsmp->hfs_summary_table, M_TEMP);
4110 hfsmp->hfs_summary_table = NULL;
4111 hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE;
4112 return EINVAL;
4113 }
4114 bzero (newtable, summarybytes);
4115
4116 /*
4117 * The new table may be smaller than the old one. If this is true, then
4118 * we can't copy the full size of the existing summary table into the new
4119 * one.
4120 *
4121 * The converse is not an issue since we bzeroed the table above.
4122 */
4123 copysize = hfsmp->hfs_summary_bytes;
4124 if (summarybytes < hfsmp->hfs_summary_bytes) {
4125 copysize = summarybytes;
4126 }
4127 memcpy (newtable, hfsmp->hfs_summary_table, copysize);
4128
4129 /* We're all good. Destroy the old copy and update ptrs */
4130 FREE (hfsmp->hfs_summary_table, M_TEMP);
4131
4132 hfsmp->hfs_summary_table = newtable;
4133 hfsmp->hfs_summary_size = new_summary_size;
4134 hfsmp->hfs_summary_bytes = summarybytes;
4135 }
4136
4137 return 0;
4138}
4139
4140
4141#if ALLOC_DEBUG
4142/*
4143 * hfs_validate_summary
4144 *
4145 * Validation routine for the summary table. Debug-only function.
4146 *
4147 * Bitmap lock must be held.
4148 *
4149 */
4150void hfs_validate_summary (struct hfsmount *hfsmp) {
4151 uint32_t i;
4152 int err;
4153
4154 /*
4155 * Iterate over all of the bits in the summary table, and verify if
4156 * there really are free blocks in the pages that we believe may
4157 * may contain free blocks.
4158 */
4159
4160 if (hfsmp->hfs_summary_table == NULL) {
4161 panic ("HFS Summary: No HFS summary table!");
4162 }
4163
4164 /* 131072 bits == 16384 bytes. This is the theoretical max size of the summary table. we add 1 byte for slop */
4165 if (hfsmp->hfs_summary_size == 0 || hfsmp->hfs_summary_size > 131080) {
4166 panic("HFS Summary: Size is bad! %d", hfsmp->hfs_summary_size);
4167 }
4168
4169 if (hfsmp->vcbVBMIOSize == 0) {
4170 panic("HFS Summary: no VCB VBM IO Size !");
4171 }
4172
4173 printf("hfs: summary validation beginning on %s\n", hfsmp->vcbVN);
4174 printf("hfs: summary validation %d summary bits, %d summary blocks\n", hfsmp->hfs_summary_size, hfsmp->totalBlocks);
4175
4176
4177 /* iterate through all possible summary bits */
4178 for (i = 0; i < hfsmp->hfs_summary_size ; i++) {
4179
4180 uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
4181 uint32_t byte_offset = hfsmp->vcbVBMIOSize * i;
4182
4183 /* Compute the corresponding allocation block for the summary bit. */
4184 uint32_t alloc_block = i * bits_per_iosize;
4185
4186 /*
4187 * We use a uint32_t pointer here because it will speed up
4188 * access to the real bitmap data on disk.
4189 */
4190 uint32_t *block_data;
4191 struct buf *bp;
4192 int counter;
4193 int counter_max;
4194 int saw_free_bits = 0;
4195
4196 /* Get the block */
4197 if ((err = ReadBitmapRange (hfsmp, byte_offset, hfsmp->vcbVBMIOSize, &block_data, &bp))) {
4198 panic ("HFS Summary: error (%d) in ReadBitmapRange!", err);
4199 }
4200
4201 /* Query the status of the bit and then make sure we match */
4202 uint32_t maybe_has_free_blocks;
4203 err = hfs_check_summary (hfsmp, alloc_block, &maybe_has_free_blocks);
4204 if (err) {
4205 panic ("HFS Summary: hfs_check_summary returned error (%d) ", err);
4206 }
4207 counter_max = hfsmp->vcbVBMIOSize / kBytesPerWord;
4208
4209 for (counter = 0; counter < counter_max; counter++) {
4210 uint32_t word = block_data[counter];
4211
4212 /* We assume that we'll not find any free bits here. */
4213 if (word != kAllBitsSetInWord) {
4214 if (maybe_has_free_blocks) {
4215 /* All done */
4216 saw_free_bits = 1;
4217 break;
4218 }
4219 else {
4220 panic ("HFS Summary: hfs_check_summary saw free bits!");
4221 }
4222 }
4223 }
4224
4225 if (maybe_has_free_blocks && (saw_free_bits == 0)) {
4226 panic ("HFS Summary: did not see free bits !");
4227 }
4228
4229 /* Release the block. */
4230 if ((err = ReleaseScanBitmapRange (bp))) {
4231 panic ("HFS Summary: Error (%d) in ReleaseScanBitmapRange", err);
4232 }
4233 }
4234
4235 printf("hfs: summary validation completed successfully on %s\n", hfsmp->vcbVN);
4236
4237 return;
4238}
4239#endif
4240
4241/*
4242 * hfs_alloc_scan_range:
4243 *
4244 * This function should be used to scan large ranges of the allocation bitmap
4245 * at one time. It makes two key assumptions:
4246 *
4247 * 1) Bitmap lock is held during the duration of the call (exclusive)
4248 * 2) There are no pages in the buffer cache for any of the bitmap
4249 * blocks that we may encounter. It *MUST* be completely empty.
4250 *
4251 * The expected use case is when we are scanning the bitmap in full while we are
4252 * still mounting the filesystem in order to issue TRIMs or build up the summary
4253 * table for the mount point. It should be done after any potential journal replays
4254 * are completed and their I/Os fully issued.
4255 *
4256 * The key reason for assumption (2) above is that this function will try to issue
4257 * I/O against the bitmap file in chunks as large a possible -- essentially as
4258 * much as the buffer layer will handle (1MB). Because the size of these I/Os
4259 * is larger than what would be expected during normal runtime we must invalidate
4260 * the buffers as soon as we are done with them so that they do not persist in
4261 * the buffer cache for other threads to find, as they'll typically be doing
4262 * allocation-block size I/Os instead.
4263 *
4264 * Input Args:
4265 * hfsmp - hfs mount data structure
4266 * startbit - allocation block # to start our scan. It must be aligned
4267 * on a vcbVBMIOsize boundary.
4268 * list - journal trim list data structure for issuing TRIMs
4269 *
4270 * Output Args:
4271 * bitToScan - Return the next bit to scan if this function is called again.
4272 * Caller will supply this into the next invocation
4273 * of this call as 'startbit'.
4274 */
4275
4276static int hfs_alloc_scan_range(struct hfsmount *hfsmp, u_int32_t startbit,
4277 u_int32_t *bitToScan, struct jnl_trim_list *list) {
4278
4279 int error;
4280 int readwrite = 1;
4281 u_int32_t curAllocBlock;
4282 struct buf *blockRef = NULL;
4283 u_int32_t *buffer = NULL;
4284 u_int32_t free_offset = 0; //tracks the start of the current free range
4285 u_int32_t size = 0; // tracks the length of the current free range.
4286 u_int32_t iosize = 0; //how much io we should generate against the bitmap
4287 u_int32_t byte_off; // byte offset into the bitmap file.
4288 u_int32_t completed_size; // how much io was actually completed
4289 u_int32_t last_bitmap_block;
4290 u_int32_t current_word;
4291 u_int32_t word_index = 0;
4292
4293 /* summary table building */
4294 uint32_t summary_bit = 0;
4295 uint32_t saw_free_blocks = 0;
4296 uint32_t last_marked = 0;
4297
4298 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
4299 readwrite = 0;
4300 }
4301
4302 /*
4303 * Compute how much I/O we should generate here.
4304 * hfs_scan_range_size will validate that the start bit
4305 * converted into a byte offset into the bitmap file,
4306 * is aligned on a VBMIOSize boundary.
4307 */
4308 error = hfs_scan_range_size (hfsmp, startbit, &iosize);
4309 if (error) {
4310 if (ALLOC_DEBUG) {
4311 panic ("hfs_alloc_scan_range: hfs_scan_range_size error %d\n", error);
4312 }
4313 return error;
4314 }
4315
4316 if (iosize < hfsmp->vcbVBMIOSize) {
4317 if (ALLOC_DEBUG) {
4318 panic ("hfs_alloc_scan_range: iosize too small! (iosize %d)\n", iosize);
4319 }
4320 return EINVAL;
4321 }
4322
4323 /* hfs_scan_range_size should have verified startbit. Convert it to bytes */
4324 byte_off = startbit / kBitsPerByte;
4325
4326 /*
4327 * When the journal replays blocks, it does so by writing directly to the disk
4328 * device (bypassing any filesystem vnodes and such). When it finishes its I/Os
4329 * it also immediately re-reads and invalidates the range covered by the bp so
4330 * it does not leave anything lingering in the cache (for iosize reasons).
4331 *
4332 * As such, it is safe to do large I/Os here with ReadBitmapRange.
4333 *
4334 * NOTE: It is not recommended, but it is possible to call the function below
4335 * on sections of the bitmap that may be in core already as long as the pages are not
4336 * dirty. In that case, we'd notice that something starting at that
4337 * logical block of the bitmap exists in the metadata cache, and we'd check
4338 * if the iosize requested is the same as what was already allocated for it.
4339 * Odds are pretty good we're going to request something larger. In that case,
4340 * we just free the existing memory associated with the buf and reallocate a
4341 * larger range. This function should immediately invalidate it as soon as we're
4342 * done scanning, so this shouldn't cause any coherency issues.
4343 */
4344
4345 error = ReadBitmapRange(hfsmp, byte_off, iosize, &buffer, &blockRef);
4346 if (error) {
4347 if (ALLOC_DEBUG) {
4348 panic ("hfs_alloc_scan_range: start %d iosize %d ReadBitmapRange error %d\n", startbit, iosize, error);
4349 }
4350 return error;
4351 }
4352
4353 /*
4354 * At this point, we have a giant wired buffer that represents some portion of
4355 * the bitmap file that we want to analyze. We may not have gotten all 'iosize'
4356 * bytes though, so clip our ending bit to what we actually read in.
4357 */
4358 completed_size = buf_count(blockRef);
4359 last_bitmap_block = completed_size * kBitsPerByte;
4360 last_bitmap_block = last_bitmap_block + startbit;
4361
4362 /* Cap the last block to the total number of blocks if required */
4363 if (last_bitmap_block > hfsmp->totalBlocks) {
4364 last_bitmap_block = hfsmp->totalBlocks;
4365 }
4366
4367 /* curAllocBlock represents the logical block we're analyzing. */
4368 curAllocBlock = startbit;
4369 word_index = 0;
4370 size = 0;
4371
4372 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4373 if (hfs_get_summary_index (hfsmp, startbit, &summary_bit)) {
4374 error = EINVAL;
4375 if (ALLOC_DEBUG) {
4376 panic ("hfs_alloc_scan_range: Could not acquire summary index for %u", startbit);
4377 }
4378 return error;
4379 }
4380 /*
4381 * summary_bit should now be set to the summary bit corresponding to
4382 * the allocation block of the first bit that we're supposed to scan
4383 */
4384 }
4385 saw_free_blocks = 0;
4386
4387 while (curAllocBlock < last_bitmap_block) {
4388 u_int32_t bit;
4389
4390 /* Update the summary table as needed */
4391 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
4392 if (ALLOC_DEBUG) {
4393 if (hfsmp->hfs_summary_table == NULL) {
4394 panic ("hfs_alloc_scan_range: no summary table!");
4395 }
4396 }
4397
4398 uint32_t temp_summary;
4399 error = hfs_get_summary_index (hfsmp, curAllocBlock, &temp_summary);
4400 if (error) {
4401 if (ALLOC_DEBUG) {
4402 panic ("hfs_alloc_scan_range: could not get summary index for %u", curAllocBlock);
4403 }
4404 return EINVAL;
4405 }
4406
4407 if (ALLOC_DEBUG) {
4408 if (temp_summary < summary_bit) {
4409 panic ("hfs_alloc_scan_range: backwards summary bit?\n");
4410 }
4411 }
4412
4413 /*
4414 * If temp_summary is greater than summary_bit, then this
4415 * means that the next allocation block crosses a vcbVBMIOSize boundary
4416 * and we should treat this range of on-disk data as part of a new summary
4417 * bit.
4418 */
4419 if (temp_summary > summary_bit) {
4420 if (saw_free_blocks == 0) {
4421 /* Mark the bit as totally consumed in the summary table */
4422 hfs_set_summary (hfsmp, summary_bit, 1);
4423 }
4424 else {
4425 /* Mark the bit as potentially free in summary table */
4426 hfs_set_summary (hfsmp, summary_bit, 0);
4427 }
4428 last_marked = summary_bit;
4429 /*
4430 * Any time we set the summary table, update our counter which tracks
4431 * what the last bit that was fully marked in the summary table.
4432 *
4433 * Then reset our marker which says we haven't seen a free bit yet.
4434 */
4435 saw_free_blocks = 0;
4436 summary_bit = temp_summary;
4437 }
4438 } /* End summary table conditions */
4439
4440 current_word = SWAP_BE32(buffer[word_index]);
4441 /* Iterate through the word 1 bit at a time... */
4442 for (bit = 0 ; bit < kBitsPerWord ; bit++, curAllocBlock++) {
4443 if (curAllocBlock >= last_bitmap_block) {
4444 break;
4445 }
4446 u_int32_t allocated = (current_word & (kHighBitInWordMask >> bit));
4447
4448 if (allocated) {
4449 if (size != 0) {
4450 if (readwrite) {
4451 /* Insert the previously tracked range of free blocks to the trim list */
4452 hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
4453 }
4454 add_free_extent_cache (hfsmp, free_offset, size);
4455 size = 0;
4456 free_offset = 0;
4457 }
4458 }
4459 else {
4460 /* Not allocated */
4461 size++;
4462 if (free_offset == 0) {
4463 /* Start a new run of free spcae at curAllocBlock */
4464 free_offset = curAllocBlock;
4465 }
4466 if (saw_free_blocks == 0) {
4467 saw_free_blocks = 1;
4468 }
4469 }
4470 } /* end for loop iterating through the word */
4471
4472 if (curAllocBlock < last_bitmap_block) {
4473 word_index++;
4474 }
4475
4476 } /* End while loop (iterates through last_bitmap_block) */
4477
4478
4479 /*
4480 * We've (potentially) completed our pass through this region of bitmap,
4481 * but one thing we may not have done is updated that last summary bit for
4482 * the last page we scanned, because we would have never transitioned across
4483 * a vcbVBMIOSize boundary again. Check for that and update the last bit
4484 * as needed.
4485 *
4486 * Note that 'last_bitmap_block' is *not* inclusive WRT the very last bit in the bitmap
4487 * for the region of bitmap on-disk that we were scanning. (it is one greater).
4488 */
4489 if ((curAllocBlock >= last_bitmap_block) &&
4490 (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
4491 uint32_t temp_summary;
4492 /* temp_block should be INSIDE the region we just scanned, so subtract 1 */
4493 uint32_t temp_block = last_bitmap_block - 1;
4494 error = hfs_get_summary_index (hfsmp, temp_block, &temp_summary);
4495 if (error) {
4496 if (ALLOC_DEBUG) {
4497 panic ("hfs_alloc_scan_range: end bit curAllocBlock %u, last_bitmap_block %u", curAllocBlock, last_bitmap_block);
4498 }
4499 return EINVAL;
4500 }
4501
4502 /* Did we already update this in the table? */
4503 if (temp_summary > last_marked) {
4504 if (saw_free_blocks == 0) {
4505 hfs_set_summary (hfsmp, temp_summary, 1);
4506 }
4507 else {
4508 hfs_set_summary (hfsmp, temp_summary, 0);
4509 }
4510 }
4511 }
4512
4513 /*
4514 * We may have been tracking a range of free blocks that hasn't been inserted yet.
4515 * Keep the logic for the TRIM and free extent separate from that of the summary
4516 * table management even though they are closely linked.
4517 */
4518 if (size != 0) {
4519 if (readwrite) {
4520 hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
4521 }
4522 add_free_extent_cache (hfsmp, free_offset, size);
4523 }
4524
4525 /*
4526 * curAllocBlock represents the next block we need to scan when we return
4527 * to this function.
4528 */
4529 *bitToScan = curAllocBlock;
4530 ReleaseScanBitmapRange(blockRef);
4531
6d2010ae 4532 return 0;
39236c6e 4533
6d2010ae
A
4534}
4535
316670eb 4536
39236c6e
A
4537
4538/*
4539 * Compute the maximum I/O size to generate against the bitmap file
4540 * Will attempt to generate at LEAST VBMIOsize I/Os for interior ranges of the bitmap.
316670eb 4541 *
39236c6e
A
4542 * Inputs:
4543 * hfsmp -- hfsmount to look at
4544 * bitmap_off -- bit offset into the bitmap file
4545 *
4546 * Outputs:
4547 * iosize -- iosize to generate.
6d2010ae 4548 *
316670eb 4549 * Returns:
39236c6e 4550 * 0 on success; EINVAL otherwise
6d2010ae 4551 */
39236c6e 4552static int hfs_scan_range_size (struct hfsmount *hfsmp, uint32_t bitmap_st, uint32_t *iosize) {
6d2010ae 4553
6d2010ae 4554 /*
39236c6e
A
4555 * The maximum bitmap size is 512MB regardless of ABN size, so we can get away
4556 * with 32 bit math in this function.
4557 */
4558
4559 uint32_t bitmap_len;
4560 uint32_t remaining_bitmap;
4561 uint32_t target_iosize;
4562 uint32_t bitmap_off;
4563
4564 /* Is this bit index not word aligned? If so, immediately fail. */
4565 if (bitmap_st % kBitsPerWord) {
4566 if (ALLOC_DEBUG) {
4567 panic ("hfs_scan_range_size unaligned start bit! bitmap_st %d \n", bitmap_st);
4568 }
4569 return EINVAL;
6d2010ae 4570 }
39236c6e
A
4571
4572 /* bitmap_off is in bytes, not allocation blocks/bits */
4573 bitmap_off = bitmap_st / kBitsPerByte;
4574
4575 if ((hfsmp->totalBlocks <= bitmap_st) || (bitmap_off > (512 * 1024 * 1024))) {
4576 if (ALLOC_DEBUG) {
4577 panic ("hfs_scan_range_size: invalid start! bitmap_st %d, bitmap_off %d\n", bitmap_st, bitmap_off);
6d2010ae 4578 }
39236c6e 4579 return EINVAL;
6d2010ae 4580 }
39236c6e
A
4581
4582 /*
4583 * Also invalid if it's not at least aligned to HFS bitmap logical
4584 * block boundaries. We don't have to emit an iosize that's an
4585 * exact multiple of the VBMIOSize, but it must start on such
4586 * a boundary.
4587 *
4588 * The vcbVBMIOSize may be SMALLER than the allocation block size
4589 * on a FS with giant allocation blocks, but it will never be
4590 * greater than it, so it should be safe to start I/O
4591 * aligned on a VBMIOsize boundary.
4592 */
4593 if (bitmap_off & (hfsmp->vcbVBMIOSize - 1)) {
4594 if (ALLOC_DEBUG) {
4595 panic ("hfs_scan_range_size: unaligned start! bitmap_off %d\n", bitmap_off);
4596 }
4597 return EINVAL;
6d2010ae 4598 }
39236c6e 4599
6d2010ae 4600 /*
39236c6e
A
4601 * Generate the total bitmap file length in bytes, then round up
4602 * that value to the end of the last allocation block, if needed (It
4603 * will probably be needed). We won't scan past the last actual
4604 * allocation block.
4605 *
4606 * Unless we're completing the bitmap scan (or bitmap < 1MB), we
4607 * have to complete the I/O on VBMIOSize boundaries, but we can only read
4608 * up until the end of the bitmap file.
6d2010ae 4609 */
39236c6e
A
4610 bitmap_len = hfsmp->totalBlocks / kBitsPerByte;
4611 if (bitmap_len % (hfsmp->blockSize)) {
4612 bitmap_len = (bitmap_len / hfsmp->blockSize);
4613 /* round up to the end of the next alloc block */
4614 bitmap_len++;
4615
4616 /* Convert the # of alloc blocks back to bytes. */
4617 bitmap_len = bitmap_len * hfsmp->blockSize;
4618 }
4619
4620 remaining_bitmap = bitmap_len - bitmap_off;
4621
4622 /*
4623 * io size is the MIN of the maximum I/O we can generate or the
4624 * remaining amount of bitmap.
4625 */
4626 target_iosize = MIN((MAXBSIZE), remaining_bitmap);
4627 *iosize = target_iosize;
4628
6d2010ae
A
4629 return 0;
4630}
4631
316670eb 4632
39236c6e
A
4633
4634
316670eb
A
4635/*
4636 * This function is basically the same as hfs_isallocated, except it's designed for
4637 * use with the red-black tree validation code. It assumes we're only checking whether
4638 * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
4639 * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
4640 *
4641 * This should not be called in general purpose scanning code.
4642 */
4643int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
39236c6e 4644
316670eb
A
4645 u_int32_t *currentWord; // Pointer to current word within bitmap block
4646 u_int32_t bitMask; // Word with given bits already set (ready to test)
4647 u_int32_t firstBit; // Bit index within word of first bit to allocate
4648 u_int32_t numBits; // Number of bits in word to allocate
4649 u_int32_t bitsPerBlock;
4650 uintptr_t blockRef;
4651 u_int32_t wordsPerBlock;
4652 u_int32_t numBlocks = 1;
4653 u_int32_t *buffer = NULL;
4654
4655 int inuse = 0;
4656 int error;
39236c6e
A
4657
4658
316670eb
A
4659 if (bp_buf) {
4660 /* just use passed-in buffer if avail. */
4661 buffer = bp_buf;
4662 }
4663 else {
4664 /*
4665 * Pre-read the bitmap block containing the first word of allocation
4666 */
4667 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
4668 if (error)
4669 return (error);
4670 }
39236c6e 4671
316670eb
A
4672 /*
4673 * Initialize currentWord, and wordsLeft.
4674 */
4675 u_int32_t wordIndexInBlock;
39236c6e 4676
316670eb
A
4677 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
4678 wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
39236c6e 4679
316670eb
A
4680 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
4681 currentWord = buffer + wordIndexInBlock;
39236c6e 4682
316670eb
A
4683 /*
4684 * First test any non word aligned bits.
4685 */
4686 firstBit = startingBlock % kBitsPerWord;
4687 bitMask = kAllBitsSetInWord >> firstBit;
4688 numBits = kBitsPerWord - firstBit;
4689 if (numBits > numBlocks) {
4690 numBits = numBlocks;
4691 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
4692 }
4693 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
4694 inuse = 1;
4695 goto Exit;
4696 }
4697 numBlocks -= numBits;
4698 ++currentWord;
39236c6e 4699
316670eb
A
4700Exit:
4701 if(bp_buf == NULL) {
4702 if (buffer) {
4703 (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
4704 }
4705 }
4706 return (inuse);
6d2010ae
A
4707
4708
6d2010ae 4709
91447636
A
4710}
4711
6d2010ae
A
4712/*
4713 * This function resets all of the data structures relevant to the
4714 * free extent cache stored in the hfsmount struct.
4715 *
4716 * If we are using the red-black tree code then we need to account for the fact that
4717 * we may encounter situations where we need to jettison the tree. If that is the
4718 * case, then we fail-over to the bitmap scanning logic, but we need to ensure that
4719 * the free ext cache is zeroed before we start using it.
d1ecb069 4720 *
6d2010ae
A
4721 * We also reset and disable the cache when allocLimit is updated... which
4722 * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()).
4723 * It is independent of the type of allocator being used currently.
d1ecb069 4724 */
6d2010ae 4725void ResetVCBFreeExtCache(struct hfsmount *hfsmp)
d1ecb069 4726{
6d2010ae
A
4727 int bytes;
4728 void *freeExt;
1c79356b 4729
6d2010ae
A
4730 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4731 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0);
4732
4733 lck_spin_lock(&hfsmp->vcbFreeExtLock);
39236c6e 4734
6d2010ae
A
4735 /* reset Free Extent Count */
4736 hfsmp->vcbFreeExtCnt = 0;
39236c6e 4737
6d2010ae
A
4738 /* reset the actual array */
4739 bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor);
4740 freeExt = (void*)(hfsmp->vcbFreeExt);
39236c6e 4741
6d2010ae 4742 bzero (freeExt, bytes);
39236c6e 4743
6d2010ae
A
4744 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4745
4746 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4747 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0);
4748
4749 return;
4750}
4751
4752/*
4753 * This function is used to inform the allocator if we have to effectively shrink
4754 * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs.
4755 *
4756 * The bitmap lock must be held when calling this function. This function also modifies the
4757 * allocLimit field in the hfs mount point structure in the general case.
4758 *
4759 * In the shrinking case, we'll have to remove all free extents from the red-black
4760 * tree past the specified offset new_end_block. In the growth case, we'll have to force
4761 * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
4762 *
4763 * new_end_block represents the total number of blocks available for allocation in the resized
4764 * filesystem. Block #new_end_block should not be allocatable in the resized filesystem since it
4765 * will be out of the (0, n-1) range that are indexable in the bitmap.
4766 *
4767 * Returns 0 on success
4768 * errno on failure
4769 */
4770__private_extern__
4771u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
39236c6e 4772
6d2010ae 4773 /*
39236c6e 4774 * Update allocLimit to the argument specified
6d2010ae
A
4775 */
4776 hfsmp->allocLimit = new_end_block;
4777
4778 /* Invalidate the free extent cache completely so that
4779 * it does not have any extents beyond end of current
4780 * volume.
4781 */
4782 ResetVCBFreeExtCache(hfsmp);
4783
39236c6e
A
4784 /* Force a rebuild of the summary table. */
4785 (void) hfs_rebuild_summary (hfsmp);
6d2010ae
A
4786
4787 return 0;
4788
4789}
4790
4791
316670eb
A
4792/*
4793 * Remove an extent from the list of free extents.
4794 *
4795 * This is a low-level routine. It does not handle overlaps or splitting;
4796 * that is the responsibility of the caller. The input extent must exactly
4797 * match an extent already in the list; it will be removed, and any following
4798 * extents in the list will be shifted up.
4799 *
4800 * Inputs:
4801 * startBlock - Start of extent to remove
4802 * blockCount - Number of blocks in extent to remove
4803 *
4804 * Result:
4805 * The index of the extent that was removed.
4806 */
4807static void remove_free_extent_list(struct hfsmount *hfsmp, int index)
4808{
4809 if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) {
4810 if (ALLOC_DEBUG)
4811 panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
4812 else
4813 printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
4814 return;
4815 }
4816 int shift_count = hfsmp->vcbFreeExtCnt - index - 1;
4817 if (shift_count > 0) {
4818 memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
4819 }
4820 hfsmp->vcbFreeExtCnt--;
4821}
4822
4823
4824/*
4825 * Add an extent to the list of free extents.
4826 *
4827 * This is a low-level routine. It does not handle overlaps or coalescing;
4828 * that is the responsibility of the caller. This routine *does* make
4829 * sure that the extent it is adding is inserted in the correct location.
4830 * If the list is full, this routine will handle either removing the last
4831 * extent in the list to make room for the new extent, or ignoring the
4832 * new extent if it is "worse" than the last extent in the list.
4833 *
4834 * Inputs:
4835 * startBlock - Start of extent to add
4836 * blockCount - Number of blocks in extent to add
4837 *
4838 * Result:
4839 * The index where the extent that was inserted, or kMaxFreeExtents
4840 * if the extent was not inserted (the list was full, and the extent
4841 * being added was "worse" than everything in the list).
4842 */
4843static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4844{
4845 uint32_t i;
39236c6e 4846
316670eb
A
4847 /* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */
4848 if (ALLOC_DEBUG) {
4849 uint32_t endBlock = startBlock + blockCount;
4850 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4851 if (endBlock < hfsmp->vcbFreeExt[i].startBlock ||
39236c6e
A
4852 startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) {
4853 continue;
316670eb
A
4854 }
4855 panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d",
39236c6e 4856 hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i);
316670eb
A
4857 }
4858 }
4859
4860 /* Figure out what index the new extent should be inserted at. */
4861 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4862 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4863 /* The list is sorted by increasing offset. */
4864 if (startBlock < hfsmp->vcbFreeExt[i].startBlock) {
4865 break;
4866 }
4867 } else {
4868 /* The list is sorted by decreasing size. */
4869 if (blockCount > hfsmp->vcbFreeExt[i].blockCount) {
4870 break;
4871 }
4872 }
4873 }
39236c6e 4874
316670eb
A
4875 /* When we get here, i is the index where the extent should be inserted. */
4876 if (i == kMaxFreeExtents) {
4877 /*
4878 * The new extent is worse than anything already in the list,
4879 * and the list is full, so just ignore the extent to be added.
4880 */
4881 return i;
4882 }
39236c6e 4883
316670eb
A
4884 /*
4885 * Grow the list (if possible) to make room for an insert.
4886 */
4887 if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents)
4888 hfsmp->vcbFreeExtCnt++;
39236c6e 4889
316670eb
A
4890 /*
4891 * If we'll be keeping any extents after the insert position, then shift them.
4892 */
4893 int shift_count = hfsmp->vcbFreeExtCnt - i - 1;
4894 if (shift_count > 0) {
4895 memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
4896 }
39236c6e 4897
316670eb
A
4898 /* Finally, store the new extent at its correct position. */
4899 hfsmp->vcbFreeExt[i].startBlock = startBlock;
4900 hfsmp->vcbFreeExt[i].blockCount = blockCount;
4901 return i;
4902}
4903
4904
6d2010ae
A
4905/*
4906 * Remove an entry from free extent cache after it has been allocated.
4907 *
316670eb
A
4908 * This is a high-level routine. It handles removing a portion of a
4909 * cached extent, potentially splitting it into two (if the cache was
4910 * already full, throwing away the extent that would sort last). It
4911 * also handles removing an extent that overlaps multiple extents in
4912 * the cache.
6d2010ae
A
4913 *
4914 * Inputs:
316670eb
A
4915 * hfsmp - mount point structure
4916 * startBlock - starting block of the extent to be removed.
4917 * blockCount - number of blocks of the extent to be removed.
6d2010ae
A
4918 */
4919static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4920{
316670eb
A
4921 u_int32_t i, insertedIndex;
4922 u_int32_t currentStart, currentEnd, endBlock;
6d2010ae 4923 int extentsRemoved = 0;
39236c6e 4924
6d2010ae
A
4925 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4926 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
39236c6e 4927
316670eb 4928 endBlock = startBlock + blockCount;
39236c6e 4929
6d2010ae 4930 lck_spin_lock(&hfsmp->vcbFreeExtLock);
39236c6e 4931
316670eb
A
4932 /*
4933 * Iterate over all of the extents in the free extent cache, removing or
4934 * updating any entries that overlap with the input extent.
4935 */
4936 for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
4937 currentStart = hfsmp->vcbFreeExt[i].startBlock;
4938 currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount;
39236c6e 4939
316670eb
A
4940 /*
4941 * If the current extent is entirely before or entirely after the
4942 * the extent to be removed, then we keep it as-is.
6d2010ae 4943 */
316670eb
A
4944 if (currentEnd <= startBlock || currentStart >= endBlock) {
4945 continue;
4946 }
39236c6e 4947
316670eb
A
4948 /*
4949 * If the extent being removed entirely contains the current extent,
4950 * then remove the current extent.
4951 */
4952 if (startBlock <= currentStart && endBlock >= currentEnd) {
4953 remove_free_extent_list(hfsmp, i);
39236c6e 4954
316670eb
A
4955 /*
4956 * We just removed the extent at index i. The extent at
4957 * index i+1 just got shifted to index i. So decrement i
4958 * to undo the loop's "++i", and the next iteration will
4959 * examine index i again, which contains the next extent
4960 * in the list.
6d2010ae 4961 */
316670eb
A
4962 --i;
4963 ++extentsRemoved;
4964 continue;
6d2010ae 4965 }
39236c6e 4966
316670eb
A
4967 /*
4968 * If the extent being removed is strictly "in the middle" of the
4969 * current extent, then we need to split the current extent into
4970 * two discontiguous extents (the "head" and "tail"). The good
4971 * news is that we don't need to examine any other extents in
4972 * the list.
6d2010ae 4973 */
316670eb
A
4974 if (startBlock > currentStart && endBlock < currentEnd) {
4975 remove_free_extent_list(hfsmp, i);
4976 add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
4977 add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
4978 break;
4979 }
39236c6e 4980
316670eb
A
4981 /*
4982 * The only remaining possibility is that the extent to be removed
4983 * overlaps the start or end (but not both!) of the current extent.
4984 * So we need to replace the current extent with a shorter one.
4985 *
4986 * The only tricky part is that the updated extent might be at a
4987 * different index than the original extent. If the updated extent
4988 * was inserted after the current extent, then we need to re-examine
4989 * the entry at index i, since it now contains the extent that was
4990 * previously at index i+1. If the updated extent was inserted
4991 * before or at the same index as the removed extent, then the
4992 * following extents haven't changed position.
4993 */
4994 remove_free_extent_list(hfsmp, i);
4995 if (startBlock > currentStart) {
4996 /* Remove the tail of the current extent. */
4997 insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
4998 } else {
4999 /* Remove the head of the current extent. */
5000 insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
5001 }
5002 if (insertedIndex > i) {
5003 --i; /* Undo the "++i" in the loop, so we examine the entry at index i again. */
5004 }
6d2010ae 5005 }
39236c6e 5006
6d2010ae 5007 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
39236c6e 5008
6d2010ae 5009 sanity_check_free_ext(hfsmp, 0);
39236c6e 5010
6d2010ae
A
5011 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5012 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);
39236c6e 5013
d1ecb069
A
5014 return;
5015}
5016
316670eb 5017
6d2010ae 5018/*
316670eb 5019 * Add an entry to free extent cache after it has been deallocated.
d1ecb069 5020 *
316670eb
A
5021 * This is a high-level routine. It will merge overlapping or contiguous
5022 * extents into a single, larger extent.
5023 *
5024 * If the extent provided has blocks beyond current allocLimit, it is
5025 * clipped to allocLimit (so that we won't accidentally find and allocate
5026 * space beyond allocLimit).
d1ecb069 5027 *
6d2010ae 5028 * Inputs:
316670eb
A
5029 * hfsmp - mount point structure
5030 * startBlock - starting block of the extent to be removed.
5031 * blockCount - number of blocks of the extent to be removed.
6d2010ae
A
5032 *
5033 * Returns:
316670eb
A
5034 * true - if the extent was added successfully to the list
5035 * false - if the extent was not added to the list, maybe because
5036 * the extent was beyond allocLimit, or is not best
5037 * candidate to be put in the cache.
d1ecb069 5038 */
316670eb 5039static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
d1ecb069 5040{
6d2010ae 5041 Boolean retval = false;
316670eb
A
5042 uint32_t endBlock;
5043 uint32_t currentEnd;
5044 uint32_t i;
39236c6e 5045
6d2010ae
A
5046 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5047 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
39236c6e 5048
6d2010ae
A
5049 /* No need to add extent that is beyond current allocLimit */
5050 if (startBlock >= hfsmp->allocLimit) {
5051 goto out_not_locked;
5052 }
39236c6e 5053
6d2010ae
A
5054 /* If end of the free extent is beyond current allocLimit, clip the extent */
5055 if ((startBlock + blockCount) > hfsmp->allocLimit) {
5056 blockCount = hfsmp->allocLimit - startBlock;
5057 }
39236c6e 5058
6d2010ae 5059 lck_spin_lock(&hfsmp->vcbFreeExtLock);
39236c6e 5060
316670eb
A
5061 /*
5062 * Make a pass through the free extent cache, looking for known extents that
5063 * overlap or are contiguous with the extent to be added. We'll remove those
5064 * extents from the cache, and incorporate them into the new extent to be added.
6d2010ae 5065 */
316670eb
A
5066 endBlock = startBlock + blockCount;
5067 for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) {
5068 currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount;
5069 if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) {
5070 /* Extent i does not overlap and is not contiguous, so keep it. */
5071 continue;
6d2010ae 5072 } else {
316670eb
A
5073 /* We need to remove extent i and combine it with the input extent. */
5074 if (hfsmp->vcbFreeExt[i].startBlock < startBlock)
5075 startBlock = hfsmp->vcbFreeExt[i].startBlock;
5076 if (currentEnd > endBlock)
5077 endBlock = currentEnd;
39236c6e 5078
316670eb
A
5079 remove_free_extent_list(hfsmp, i);
5080 /*
5081 * We just removed the extent at index i. The extent at
5082 * index i+1 just got shifted to index i. So decrement i
5083 * to undo the loop's "++i", and the next iteration will
5084 * examine index i again, which contains the next extent
5085 * in the list.
5086 */
5087 --i;
6d2010ae
A
5088 }
5089 }
316670eb 5090 add_free_extent_list(hfsmp, startBlock, endBlock - startBlock);
39236c6e 5091
6d2010ae 5092 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
39236c6e 5093
6d2010ae
A
5094out_not_locked:
5095 sanity_check_free_ext(hfsmp, 0);
39236c6e 5096
6d2010ae
A
5097 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
5098 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);
39236c6e 5099
d1ecb069
A
5100 return retval;
5101}
6d2010ae
A
5102
5103/* Debug function to check if the free extent cache is good or not */
5104static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
5105{
5106 u_int32_t i, j;
5107
39236c6e
A
5108 /* Do not do anything if debug is not on */
5109 if (ALLOC_DEBUG == 0) {
6d2010ae
A
5110 return;
5111 }
5112
5113 lck_spin_lock(&hfsmp->vcbFreeExtLock);
39236c6e 5114
316670eb
A
5115 if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents)
5116 panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt);
39236c6e 5117
6d2010ae
A
5118 /*
5119 * Iterate the Free extent cache and ensure no entries are bogus or refer to
5120 * allocated blocks.
5121 */
5122 for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
5123 u_int32_t start, nblocks;
5124
5125 start = hfsmp->vcbFreeExt[i].startBlock;
5126 nblocks = hfsmp->vcbFreeExt[i].blockCount;
5127
6d2010ae
A
5128 /* Check if any of the blocks in free extent cache are allocated.
5129 * This should not be enabled always because it might take
5130 * very long for large extents that get added to the list.
5131 *
5132 * We have to drop vcbFreeExtLock while we call hfs_isallocated
5133 * because it is going to do I/O. Note that the free extent
5134 * cache could change. That's a risk we take when using this
5135 * debugging code. (Another alternative would be to try to
5136 * detect when the free extent cache changed, and perhaps
5137 * restart if the list changed while we dropped the lock.)
5138 */
5139 if (check_allocated) {
5140 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5141 if (hfs_isallocated(hfsmp, start, nblocks)) {
5142 panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
39236c6e 5143 hfsmp, i, start, nblocks);
6d2010ae
A
5144 }
5145 lck_spin_lock(&hfsmp->vcbFreeExtLock);
5146 }
5147
5148 /* Check if any part of the extent is beyond allocLimit */
5149 if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
5150 panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
5151 hfsmp, i, start, nblocks, hfsmp->allocLimit);
5152 }
5153
5154 /* Check if there are any duplicate start blocks */
5155 for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
5156 if (start == hfsmp->vcbFreeExt[j].startBlock) {
5157 panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n",
39236c6e
A
5158 hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock,
5159 hfsmp->vcbFreeExt[j].blockCount);
6d2010ae
A
5160 }
5161 }
5162
5163 /* Check if the entries are out of order */
5164 if ((i+1) != hfsmp->vcbFreeExtCnt) {
5165 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
5166 /* sparse devices are sorted by starting block number (ascending) */
5167 if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) {
5168 panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n",
39236c6e
A
5169 hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
5170 hfsmp->vcbFreeExt[i+1].blockCount);
6d2010ae
A
5171 }
5172 } else {
5173 /* normally sorted by block count (descending) */
5174 if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
5175 panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n",
39236c6e
A
5176 hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
5177 hfsmp->vcbFreeExt[i+1].blockCount);
6d2010ae
A
5178 }
5179 }
5180 }
5181 }
5182 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
5183}
39236c6e 5184
04b8595b
A
5185#define BIT_RIGHT_MASK(bit) (0xffffffffffffffffull >> (bit))
5186#define kHighBitInDoubleWordMask 0x8000000000000000ull
5187
5188static int clzll(uint64_t x)
5189{
5190 if (x == 0)
5191 return 64;
5192 else
5193 return __builtin_clzll(x);
5194}
5195
5196#if !HFS_ALLOC_TEST
5197
5198static errno_t get_more_bits(bitmap_context_t *bitmap_ctx)
5199{
5200 uint32_t start_bit;
5201 uint32_t iosize = 0;
5202 uint32_t byte_offset;
5203 uint32_t last_bitmap_block;
5204 int error;
5205 struct hfsmount *hfsmp = bitmap_ctx->hfsmp;
5206#if !HFS_ALLOC_TEST
5207 uint64_t lock_elapsed;
5208#endif
5209
5210
5211 if (bitmap_ctx->bp)
5212 ReleaseScanBitmapRange(bitmap_ctx->bp);
5213
5214 if (msleep(NULL, NULL, PINOD | PCATCH,
5215 "hfs_fsinfo", NULL) == EINTR) {
5216 return EINTR;
5217 }
5218
5219#if !HFS_ALLOC_TEST
5220 /*
5221 * Let someone else use the allocation map after we've processed over HFS_FSINFO_MAX_LOCKHELD_TIME .
5222 * lock_start is initialized in hfs_find_free_extents().
5223 */
5224 absolutetime_to_nanoseconds(mach_absolute_time() - bitmap_ctx->lock_start, &lock_elapsed);
5225
5226 if (lock_elapsed >= HFS_FSINFO_MAX_LOCKHELD_TIME) {
5227
5228 hfs_systemfile_unlock(hfsmp, bitmap_ctx->lockflags);
5229
5230 /* add tsleep here to force context switch and fairness */
5231 tsleep((caddr_t)get_more_bits, PRIBIO, "hfs_fsinfo", 1);
5232
5233 hfs_journal_lock(hfsmp);
5234
5235 /* Flush the journal and wait for all I/Os to finish up */
5236 error = hfs_journal_flush(hfsmp, TRUE);
5237 if (error) {
5238 hfs_journal_unlock(hfsmp);
5239 return error;
5240 }
5241
5242 /*
5243 * Take bitmap lock to ensure it is not being modified while journal is still held.
5244 * Since we are reading larger than normal blocks from the bitmap, which
5245 * might confuse other parts of the bitmap code using normal blocks, we
5246 * take exclusive lock here.
5247 */
5248 bitmap_ctx->lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5249
5250 bitmap_ctx->lock_start = mach_absolute_time();
5251
5252 /* Release the journal lock */
5253 hfs_journal_unlock(hfsmp);
5254
5255 /*
5256 * Bitmap is read in large block size (up to 1MB),
5257 * unlike the runtime which reads the bitmap in the
5258 * 4K block size. If the bitmap is read by both ways
5259 * at the same time, it can result in multiple buf_t with
5260 * different sizes and potentially case data corruption.
5261 * To avoid this, we invalidate all the existing buffers
5262 * associated with the bitmap vnode.
5263 */
5264 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
5265 if (error) {
5266 /* hfs_systemfile_unlock will be called in the caller */
5267 return error;
5268 }
5269 }
5270#endif
5271
5272 start_bit = bitmap_ctx->run_offset;
5273
5274 if (start_bit >= bitmap_ctx->hfsmp->totalBlocks) {
5275 bitmap_ctx->chunk_end = 0;
5276 bitmap_ctx->bp = NULL;
5277 bitmap_ctx->bitmap = NULL;
5278 return 0;
5279 }
5280
5281 assert(start_bit % 8 == 0);
5282
5283 /*
5284 * Compute how much I/O we should generate here.
5285 * hfs_scan_range_size will validate that the start bit
5286 * converted into a byte offset into the bitmap file,
5287 * is aligned on a VBMIOSize boundary.
5288 */
5289 error = hfs_scan_range_size (bitmap_ctx->hfsmp, start_bit, &iosize);
5290 if (error)
5291 return error;
5292
5293 /* hfs_scan_range_size should have verified startbit. Convert it to bytes */
5294 byte_offset = start_bit / kBitsPerByte;
5295
5296 /*
5297 * When the journal replays blocks, it does so by writing directly to the disk
5298 * device (bypassing any filesystem vnodes and such). When it finishes its I/Os
5299 * it also immediately re-reads and invalidates the range covered by the bp so
5300 * it does not leave anything lingering in the cache (for iosize reasons).
5301 *
5302 * As such, it is safe to do large I/Os here with ReadBitmapRange.
5303 *
5304 * NOTE: It is not recommended, but it is possible to call the function below
5305 * on sections of the bitmap that may be in core already as long as the pages are not
5306 * dirty. In that case, we'd notice that something starting at that
5307 * logical block of the bitmap exists in the metadata cache, and we'd check
5308 * if the iosize requested is the same as what was already allocated for it.
5309 * Odds are pretty good we're going to request something larger. In that case,
5310 * we just free the existing memory associated with the buf and reallocate a
5311 * larger range. This function should immediately invalidate it as soon as we're
5312 * done scanning, so this shouldn't cause any coherency issues.
5313 */
5314 error = ReadBitmapRange(bitmap_ctx->hfsmp, byte_offset, iosize, (uint32_t **)&bitmap_ctx->bitmap, &bitmap_ctx->bp);
5315 if (error)
5316 return error;
5317
5318 /*
5319 * At this point, we have a giant wired buffer that represents some portion of
5320 * the bitmap file that we want to analyze. We may not have gotten all 'iosize'
5321 * bytes though, so clip our ending bit to what we actually read in.
5322 */
5323 last_bitmap_block = start_bit + buf_count(bitmap_ctx->bp) * kBitsPerByte;
5324
5325 /* Cap the last block to the total number of blocks if required */
5326 if (last_bitmap_block > bitmap_ctx->hfsmp->totalBlocks)
5327 last_bitmap_block = bitmap_ctx->hfsmp->totalBlocks;
5328
5329 bitmap_ctx->chunk_current = 0; // new chunk of bitmap
5330 bitmap_ctx->chunk_end = last_bitmap_block - start_bit;
5331
5332 return 0;
5333}
5334
5335#endif // !HFS_ALLOC_TEST
5336
5337// Returns number of contiguous bits set at start
5338static int bit_count_set(void *bitmap, int start, int end)
5339{
5340 if (start == end)
5341 return 0;
5342
5343 assert(end > start);
5344
5345 const int start_bit = start & 63;
5346 const int end_bit = end & 63;
5347
5348 uint64_t *p = (uint64_t *)bitmap + start / 64;
5349 uint64_t x = ~OSSwapBigToHostInt64(*p);
5350
5351 if ((start & ~63) == (end & ~63)) {
5352 // Start and end in same 64 bits
5353 x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit);
5354 return clzll(x) - start_bit;
5355 }
5356
5357 // Deal with initial unaligned bit
5358 x &= BIT_RIGHT_MASK(start_bit);
5359
5360 if (x)
5361 return clzll(x) - start_bit;
5362
5363 // Go fast
5364 ++p;
5365 int count = 64 - start_bit;
5366 int nquads = (end - end_bit - start - 1) / 64;
5367
5368 while (nquads--) {
5369 if (*p != 0xffffffffffffffffull) {
5370 x = ~OSSwapBigToHostInt64(*p);
5371 return count + clzll(x);
5372 }
5373 ++p;
5374 count += 64;
5375 }
5376
5377 if (end_bit) {
5378 x = ~OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit);
5379 count += clzll(x);
5380 }
5381
5382 return count;
5383}
5384
5385/* Returns the number of a run of cleared bits:
5386 * bitmap is a single chunk of memory being examined
5387 * start: the start bit relative to the current buffer to be examined; start is inclusive.
5388 * end: the end bit relative to the current buffer to be examined; end is not inclusive.
5389 */
5390static int bit_count_clr(void *bitmap, int start, int end)
5391{
5392 if (start == end)
5393 return 0;
5394
5395 assert(end > start);
5396
5397 const int start_bit = start & 63;
5398 const int end_bit = end & 63;
5399
5400 uint64_t *p = (uint64_t *)bitmap + start / 64;
5401 uint64_t x = OSSwapBigToHostInt64(*p);
5402
5403 if ((start & ~63) == (end & ~63)) {
5404 // Start and end in same 64 bits
5405 x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit);
5406
5407 return clzll(x) - start_bit;
5408 }
5409
5410 // Deal with initial unaligned bit
5411 x &= BIT_RIGHT_MASK(start_bit);
5412
5413 if (x)
5414 return clzll(x) - start_bit;
5415
5416 // Go fast
5417 ++p;
5418 int count = 64 - start_bit;
5419 int nquads = (end - end_bit - start - 1) / 64;
5420
5421 while (nquads--) {
5422 if (*p) {
5423 x = OSSwapBigToHostInt64(*p);
5424 return count + clzll(x);
5425 }
5426 ++p;
5427 count += 64;
5428 }
5429
5430 if (end_bit) {
5431 x = OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit);
5432
5433 count += clzll(x);
5434 }
5435
5436 return count;
5437}
5438
5439#if !HFS_ALLOC_TEST
5440static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set)
5441{
5442 uint32_t end, start_summary_bit, end_summary_bit;
5443 errno_t error = 0;
5444
5445 if (count == 0)
5446 goto out;
5447
5448 if (!ISSET(bitmap_ctx->hfsmp->hfs_flags, HFS_SUMMARY_TABLE))
5449 return 0;
5450
5451 if (hfs_get_summary_index (bitmap_ctx->hfsmp, start, &start_summary_bit)) {
5452 error = EINVAL;
5453 goto out;
5454 }
5455
5456 end = start + count - 1;
5457 if (hfs_get_summary_index (bitmap_ctx->hfsmp, end, &end_summary_bit)) {
5458 error = EINVAL;
5459 goto out;
5460 }
5461
5462 // if summary table bit has been updated with free block previously, leave it.
5463 if ((start_summary_bit == bitmap_ctx->last_free_summary_bit) && set)
5464 start_summary_bit++;
5465
5466 for (uint32_t summary_bit = start_summary_bit; summary_bit <= end_summary_bit; summary_bit++)
5467 hfs_set_summary (bitmap_ctx->hfsmp, summary_bit, set);
5468
5469 if (!set)
5470 bitmap_ctx->last_free_summary_bit = end_summary_bit;
5471
5472out:
5473 return error;
5474
5475}
5476#endif //!HFS_ALLOC_TEST
5477
5478/*
5479 * Read in chunks of the bitmap into memory, and find a run of cleared/set bits;
5480 * the run can extend across chunk boundaries.
5481 * bit_count_clr can be passed to get a run of cleared bits.
5482 * bit_count_set can be passed to get a run of set bits.
5483 */
5484static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count)
5485{
5486 int count;
5487 errno_t error = 0;
5488
5489 *bit_count = 0;
5490
5491 do {
5492 if (bitmap_ctx->run_offset == 0 || bitmap_ctx->chunk_current == bitmap_ctx->chunk_end) {
5493 if ((error = get_more_bits(bitmap_ctx)) != 0)
5494 goto out;
5495 }
5496
5497 if (bitmap_ctx->chunk_end == 0)
5498 break;
5499
5500 count = fn(bitmap_ctx->bitmap, bitmap_ctx->chunk_current, bitmap_ctx->chunk_end);
5501
5502 bitmap_ctx->run_offset += count;
5503 bitmap_ctx->chunk_current += count;
5504 *bit_count += count;
5505
5506 } while (bitmap_ctx->chunk_current >= bitmap_ctx->chunk_end && count);
5507
5508out:
5509 return error;
5510
5511}
5512
5513// Returns count of number of bits clear
5514static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count)
5515{
5516 return hfs_bit_count(bitmap_ctx, bit_count_clr, count);
5517}
5518
5519// Returns count of number of bits set
5520static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count)
5521{
5522 return hfs_bit_count(bitmap_ctx, bit_count_set, count);
5523}
5524
5525static uint32_t hfs_bit_offset(bitmap_context_t *bitmap_ctx)
5526{
5527 return bitmap_ctx->run_offset;
5528}
5529
5530/*
5531 * Perform a full scan of the bitmap file.
5532 * Note: during the scan of bitmap file, it may drop and reacquire the
5533 * bitmap lock to let someone else use the bitmap for fairness.
5534 * Currently it is used by HFS_GET_FSINFO statistic gathing, which
5535 * is run while other processes might perform HFS operations.
5536 */
5537
5538errno_t hfs_find_free_extents(struct hfsmount *hfsmp,
5539 void (*callback)(void *data, off_t free_extent_size), void *callback_arg)
5540{
5541 struct bitmap_context bitmap_ctx;
5542 uint32_t count;
5543 errno_t error = 0;
5544
5545 if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
5546 error = hfs_init_summary(hfsmp);
5547 if (error)
5548 return error;
5549 }
5550
5551 bzero(&bitmap_ctx, sizeof(struct bitmap_context));
5552
5553 /*
5554 * The journal maintains list of recently deallocated blocks to
5555 * issue DKIOCUNMAPs when the corresponding journal transaction is
5556 * flushed to the disk. To avoid any race conditions, we only
5557 * want one active trim list. Therefore we make sure that the
5558 * journal trim list is sync'ed, empty, and not modifiable for
5559 * the duration of our scan.
5560 *
5561 * Take the journal lock before flushing the journal to the disk.
5562 * We will keep on holding the journal lock till we don't get the
5563 * bitmap lock to make sure that no new journal transactions can
5564 * start. This will make sure that the journal trim list is not
5565 * modified after the journal flush and before getting bitmap lock.
5566 * We can release the journal lock after we acquire the bitmap
5567 * lock as it will prevent any further block deallocations.
5568 */
5569 hfs_journal_lock(hfsmp);
5570
5571 /* Flush the journal and wait for all I/Os to finish up */
5572 error = hfs_journal_flush(hfsmp, TRUE);
5573 if (error) {
5574 hfs_journal_unlock(hfsmp);
5575 return error;
5576 }
5577
5578 /*
5579 * Take bitmap lock to ensure it is not being modified.
5580 * Since we are reading larger than normal blocks from the bitmap, which
5581 * might confuse other parts of the bitmap code using normal blocks, we
5582 * take exclusive lock here.
5583 */
5584 bitmap_ctx.lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5585
5586#if !HFS_ALLOC_TEST
5587 bitmap_ctx.lock_start = mach_absolute_time();
5588#endif
5589
5590 /* Release the journal lock */
5591 hfs_journal_unlock(hfsmp);
5592
5593 /*
5594 * Bitmap is read in large block size (up to 1MB),
5595 * unlike the runtime which reads the bitmap in the
5596 * 4K block size. If the bitmap is read by both ways
5597 * at the same time, it can result in multiple buf_t with
5598 * different sizes and potentially case data corruption.
5599 * To avoid this, we invalidate all the existing buffers
5600 * associated with the bitmap vnode.
5601 */
5602 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
5603 if (error)
5604 goto out;
5605
5606 /*
5607 * Get the list of all free extent ranges. hfs_alloc_scan_range()
5608 * will call hfs_fsinfo_data_add() to account for all the free
5609 * extent ranges found during scan.
5610 */
5611 bitmap_ctx.hfsmp = hfsmp;
5612 bitmap_ctx.run_offset = 0;
5613
5614 while (bitmap_ctx.run_offset < hfsmp->totalBlocks) {
5615
5616 uint32_t start = hfs_bit_offset(&bitmap_ctx);
5617
5618 if ((error = hfs_bit_count_clr(&bitmap_ctx, &count)) != 0)
5619 goto out;
5620
5621 if (count)
5622 callback(callback_arg, hfs_blk_to_bytes(count, hfsmp->blockSize));
5623
5624 if ((error = update_summary_table(&bitmap_ctx, start, count, false)) != 0)
5625 goto out;
5626
5627 start = hfs_bit_offset(&bitmap_ctx);
5628
5629 if ((error = hfs_bit_count_set(&bitmap_ctx, &count)) != 0)
5630 goto out;
5631
5632 if ((error = update_summary_table(&bitmap_ctx, start, count, true)) != 0)
5633 goto out;
5634 }
5635
5636out:
5637 if (bitmap_ctx.lockflags) {
5638 hfs_systemfile_unlock(hfsmp, bitmap_ctx.lockflags);
5639 }
5640
5641 return error;
5642}
5643