]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
04b8595b | 2 | * Copyright (c) 2000-2015 Apple Inc. All rights reserved. |
1c79356b | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | File: VolumeAllocation.c | |
30 | ||
31 | Contains: Routines for accessing and modifying the volume bitmap. | |
32 | ||
33 | Version: HFS Plus 1.0 | |
34 | ||
316670eb | 35 | Copyright: � 1996-2009 by Apple Computer, Inc., all rights reserved. |
0b4e3aa0 | 36 | |
1c79356b A |
37 | */ |
38 | ||
39 | /* | |
40 | Public routines: | |
41 | BlockAllocate | |
42 | Allocate space on a volume. Can allocate space contiguously. | |
43 | If not contiguous, then allocation may be less than what was | |
44 | asked for. Returns the starting block number, and number of | |
45 | blocks. (Will only do a single extent???) | |
46 | BlockDeallocate | |
47 | Deallocate a contiguous run of allocation blocks. | |
6d2010ae A |
48 | |
49 | BlockMarkAllocated | |
50 | Exported wrapper to mark blocks as in-use. This will correctly determine | |
51 | whether or not the red-black tree is enabled and call the appropriate function | |
52 | if applicable. | |
1c79356b | 53 | BlockMarkFree |
6d2010ae A |
54 | Exported wrapper to mark blocks as freed. This will correctly determine whether or |
55 | not the red-black tree is enabled and call the appropriate function if applicable. | |
56 | ||
57 | ||
58 | ResetVCBFreeExtCache | |
59 | Since the red-black tree obviates the need to maintain the free extent cache, we do | |
60 | not update it if the tree is also live. As a result, if we ever need to destroy the trees | |
61 | we should reset the free extent cache so it doesn't confuse us when we need to fall back to the | |
62 | bitmap scanning allocator. | |
63 | We also reset and disable the free extent cache when volume resizing is | |
64 | in flight. | |
65 | ||
66 | UpdateAllocLimit | |
67 | Adjusts the AllocLimit field in the hfs mount point. This is used when we need to prevent | |
68 | allocations from occupying space in the region we are modifying during a filesystem resize. | |
69 | At other times, it should be consistent with the total number of allocation blocks in the | |
70 | filesystem. It is also used to shrink or grow the number of blocks that the red-black tree should | |
71 | know about. If growing, scan the new range of bitmap, and if shrinking, reduce the | |
72 | number of items in the tree that we can allocate from. | |
316670eb | 73 | |
39236c6e A |
74 | ScanUnmapBlocks |
75 | Traverse the entire allocation bitmap. Potentially issue DKIOCUNMAPs to the device as it | |
76 | tracks unallocated ranges when iterating the volume bitmap. Additionally, build up the in-core | |
77 | summary table of the allocation bitmap. | |
6d2010ae A |
78 | |
79 | Internal routines: | |
6d2010ae | 80 | BlockMarkFreeInternal |
1c79356b | 81 | Mark a contiguous range of blocks as free. The corresponding |
6d2010ae A |
82 | bits in the volume bitmap will be cleared. This will actually do the work |
83 | of modifying the bitmap for us. | |
84 | ||
6d2010ae | 85 | BlockMarkAllocatedInternal |
1c79356b A |
86 | Mark a contiguous range of blocks as allocated. The cor- |
87 | responding bits in the volume bitmap are set. Also tests to see | |
6d2010ae A |
88 | if any of the blocks were previously unallocated. |
89 | BlockFindContiguous | |
1c79356b A |
90 | Find a contiguous range of blocks of a given size. The caller |
91 | specifies where to begin the search (by block number). The | |
6d2010ae A |
92 | block number of the first block in the range is returned. This is only |
93 | called by the bitmap scanning logic as the red-black tree should be able | |
94 | to do this internally by searching its tree. | |
0b4e3aa0 A |
95 | BlockAllocateAny |
96 | Find and allocate a contiguous range of blocks up to a given size. The | |
97 | first range of contiguous free blocks found are allocated, even if there | |
98 | are fewer blocks than requested (and even if a contiguous range of blocks | |
99 | of the given size exists elsewhere). | |
6d2010ae A |
100 | BlockAllocateAnyBitmap |
101 | Finds a range of blocks per the above requirements without using the | |
102 | Allocation RB Tree. This relies on the bitmap-scanning logic in order to find | |
103 | any valid range of free space needed. | |
1c79356b A |
104 | BlockAllocateContig |
105 | Find and allocate a contiguous range of blocks of a given size. If | |
106 | a contiguous range of free blocks of the given size isn't found, then | |
39236c6e | 107 | the allocation fails (i.e. it is "all or nothing"). |
0b4e3aa0 A |
108 | BlockAllocateKnown |
109 | Try to allocate space from known free space in the volume's | |
110 | free extent cache. | |
1c79356b A |
111 | ReadBitmapBlock |
112 | Given an allocation block number, read the bitmap block that | |
113 | contains that allocation block into a caller-supplied buffer. | |
0b4e3aa0 A |
114 | |
115 | ReleaseBitmapBlock | |
116 | Release a bitmap block back into the buffer cache. | |
316670eb | 117 | |
39236c6e A |
118 | ReadBitmapRange |
119 | Given an allocation block number, read a range of bitmap that | |
120 | must begin at that allocation block into a caller supplied buffer. | |
121 | ||
122 | ReleaseBitmapRange | |
123 | Release and invalidate a buf_t corresponding to the bitmap | |
124 | back into the UBC in order to prevent coherency issues. | |
125 | ||
316670eb A |
126 | remove_free_extent_cache |
127 | Remove an extent from the free extent cache. Handles overlaps | |
128 | with multiple extents in the cache, and handles splitting an | |
129 | extent in the cache if the extent to be removed is in the middle | |
130 | of a cached extent. | |
131 | ||
132 | add_free_extent_cache | |
133 | Add an extent to the free extent cache. It will merge the | |
134 | input extent with extents already in the cache. | |
39236c6e A |
135 | CheckUnmappedBytes |
136 | Check whether or not the current transaction | |
137 | has allocated blocks that were recently freed. This may have data safety implications. | |
138 | ||
139 | ||
6d2010ae A |
140 | |
141 | Debug/Test Routines | |
142 | hfs_isallocated | |
143 | Test to see if any blocks in a range are allocated. Journal or | |
144 | allocation file lock must be held. | |
145 | ||
146 | hfs_isallocated_scan | |
147 | Test to see if any blocks in a range are allocated. Releases and | |
148 | invalidates the block used when finished. | |
39236c6e A |
149 | |
150 | Optimization Routines | |
6d2010ae A |
151 | hfs_alloc_scan_block |
152 | Given a starting allocation block number, figures out which physical block contains that | |
153 | allocation block's bit, and scans it from the starting bit until either the ending bit or | |
154 | the end of the block. Free space extents are inserted into the appropriate red-black tree. | |
155 | ||
1c79356b A |
156 | */ |
157 | ||
1c79356b A |
158 | #include <sys/types.h> |
159 | #include <sys/buf.h> | |
04b8595b A |
160 | |
161 | ||
162 | #if !HFS_ALLOC_TEST | |
163 | ||
164 | #include "../../hfs_macos_defs.h" | |
1c79356b | 165 | #include <sys/systm.h> |
6d2010ae | 166 | #include <sys/ubc.h> |
060df5ea | 167 | #include <kern/kalloc.h> |
39236c6e | 168 | |
316670eb A |
169 | /* For VM Page size */ |
170 | #include <libkern/libkern.h> | |
1c79356b | 171 | #include "../../hfs.h" |
1c79356b | 172 | #include "../../hfs_endian.h" |
1c79356b | 173 | #include "../headers/FileMgrInternal.h" |
04b8595b A |
174 | #include <vfs/vfs_journal.h> |
175 | ||
176 | #endif // !HFS_ALLOC_TEST | |
177 | ||
178 | #include <sys/sysctl.h> | |
179 | #include <sys/disk.h> | |
180 | #include <sys/uio.h> | |
181 | #include <sys/malloc.h> | |
182 | ||
183 | #include "../../hfs_dbg.h" | |
184 | #include "../../hfs_format.h" | |
6d2010ae | 185 | #include "../../hfs_kdebug.h" |
1c79356b | 186 | |
316670eb | 187 | /* Headers for unmap-on-mount support */ |
316670eb A |
188 | #include <sys/disk.h> |
189 | ||
060df5ea A |
190 | #ifndef CONFIG_HFS_TRIM |
191 | #define CONFIG_HFS_TRIM 0 | |
192 | #endif | |
193 | ||
6d2010ae A |
194 | /* |
195 | * Use sysctl vfs.generic.hfs.kdebug.allocation to control which | |
196 | * KERNEL_DEBUG_CONSTANT events are enabled at runtime. (They're | |
197 | * disabled by default because there can be a lot of these events, | |
198 | * and we don't want to overwhelm the kernel debug buffer. If you | |
199 | * want to watch these events in particular, just set the sysctl.) | |
200 | */ | |
201 | static int hfs_kdebug_allocation = 0; | |
202 | SYSCTL_DECL(_vfs_generic); | |
203 | SYSCTL_NODE(_vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system"); | |
204 | SYSCTL_NODE(_vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug"); | |
205 | SYSCTL_INT(_vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations"); | |
206 | enum { | |
207 | /* | |
208 | * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and | |
209 | * BlockDeallocate, including the internal BlockAllocateXxx | |
210 | * routines so we can see how an allocation was satisfied. | |
211 | * | |
212 | * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the | |
213 | * free extent cache. | |
214 | * | |
215 | * HFSDBG_UNMAP_ENABLED: Log events involving the trim list. | |
216 | * | |
217 | * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting | |
218 | * or clearing bits, scanning the bitmap). | |
219 | */ | |
220 | HFSDBG_ALLOC_ENABLED = 1, | |
221 | HFSDBG_EXT_CACHE_ENABLED = 2, | |
222 | HFSDBG_UNMAP_ENABLED = 4, | |
223 | HFSDBG_BITMAP_ENABLED = 8 | |
224 | }; | |
1c79356b A |
225 | |
226 | enum { | |
0b4e3aa0 | 227 | kBytesPerWord = 4, |
1c79356b A |
228 | kBitsPerByte = 8, |
229 | kBitsPerWord = 32, | |
0b4e3aa0 A |
230 | |
231 | kBitsWithinWordMask = kBitsPerWord-1 | |
1c79356b A |
232 | }; |
233 | ||
234 | #define kLowBitInWordMask 0x00000001ul | |
235 | #define kHighBitInWordMask 0x80000000ul | |
236 | #define kAllBitsSetInWord 0xFFFFFFFFul | |
237 | ||
39236c6e | 238 | #define HFS_MIN_SUMMARY_BLOCKSIZE 4096 |
1c79356b | 239 | |
6d2010ae A |
240 | #define ALLOC_DEBUG 0 |
241 | ||
1c79356b | 242 | static OSErr ReadBitmapBlock( |
39236c6e A |
243 | ExtendedVCB *vcb, |
244 | u_int32_t bit, | |
245 | u_int32_t **buffer, | |
246 | uintptr_t *blockRef); | |
0b4e3aa0 A |
247 | |
248 | static OSErr ReleaseBitmapBlock( | |
39236c6e A |
249 | ExtendedVCB *vcb, |
250 | uintptr_t blockRef, | |
251 | Boolean dirty); | |
0b4e3aa0 A |
252 | |
253 | static OSErr BlockAllocateAny( | |
39236c6e A |
254 | ExtendedVCB *vcb, |
255 | u_int32_t startingBlock, | |
256 | u_int32_t endingBlock, | |
257 | u_int32_t maxBlocks, | |
258 | u_int32_t flags, | |
259 | Boolean trustSummary, | |
260 | u_int32_t *actualStartBlock, | |
261 | u_int32_t *actualNumBlocks); | |
1c79356b | 262 | |
6d2010ae | 263 | static OSErr BlockAllocateAnyBitmap( |
39236c6e A |
264 | ExtendedVCB *vcb, |
265 | u_int32_t startingBlock, | |
266 | u_int32_t endingBlock, | |
267 | u_int32_t maxBlocks, | |
268 | u_int32_t flags, | |
269 | u_int32_t *actualStartBlock, | |
270 | u_int32_t *actualNumBlocks); | |
6d2010ae | 271 | |
1c79356b | 272 | static OSErr BlockAllocateContig( |
39236c6e A |
273 | ExtendedVCB *vcb, |
274 | u_int32_t startingBlock, | |
275 | u_int32_t minBlocks, | |
276 | u_int32_t maxBlocks, | |
277 | u_int32_t flags, | |
278 | u_int32_t *actualStartBlock, | |
279 | u_int32_t *actualNumBlocks); | |
6d2010ae | 280 | |
1c79356b | 281 | static OSErr BlockFindContiguous( |
39236c6e A |
282 | ExtendedVCB *vcb, |
283 | u_int32_t startingBlock, | |
284 | u_int32_t endingBlock, | |
285 | u_int32_t minBlocks, | |
286 | u_int32_t maxBlocks, | |
287 | Boolean useMetaZone, | |
288 | Boolean trustSummary, | |
289 | u_int32_t *actualStartBlock, | |
290 | u_int32_t *actualNumBlocks); | |
1c79356b | 291 | |
0b4e3aa0 | 292 | static OSErr BlockAllocateKnown( |
39236c6e A |
293 | ExtendedVCB *vcb, |
294 | u_int32_t maxBlocks, | |
295 | u_int32_t *actualStartBlock, | |
296 | u_int32_t *actualNumBlocks); | |
0b4e3aa0 | 297 | |
6d2010ae | 298 | static OSErr BlockMarkAllocatedInternal ( |
39236c6e A |
299 | ExtendedVCB *vcb, |
300 | u_int32_t startingBlock, | |
301 | register u_int32_t numBlocks); | |
6d2010ae A |
302 | |
303 | static OSErr BlockMarkFreeInternal( | |
39236c6e A |
304 | ExtendedVCB *vcb, |
305 | u_int32_t startingBlock, | |
306 | u_int32_t numBlocks, | |
307 | Boolean do_validate); | |
6d2010ae | 308 | |
6d2010ae | 309 | |
39236c6e A |
310 | static OSErr ReadBitmapRange (struct hfsmount *hfsmp, uint32_t offset, uint32_t iosize, |
311 | uint32_t **buffer, struct buf **blockRef); | |
312 | ||
313 | static OSErr ReleaseScanBitmapRange( struct buf *bp ); | |
316670eb A |
314 | |
315 | static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset, | |
39236c6e | 316 | u_int32_t numBlocks, struct jnl_trim_list *list); |
316670eb A |
317 | |
318 | static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list); | |
319 | ||
39236c6e A |
320 | static int hfs_alloc_scan_range(struct hfsmount *hfsmp, |
321 | u_int32_t startbit, | |
322 | u_int32_t *bitToScan, | |
323 | struct jnl_trim_list *list); | |
316670eb | 324 | |
39236c6e A |
325 | static int hfs_scan_range_size (struct hfsmount* hfsmp, uint32_t start, uint32_t *iosize); |
326 | static uint32_t CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recent, uint32_t *next); | |
6d2010ae | 327 | |
39236c6e A |
328 | /* Bitmap Re-use Detection */ |
329 | static inline int extents_overlap (uint32_t start1, uint32_t len1, | |
330 | uint32_t start2, uint32_t len2) { | |
331 | return !( ((start1 + len1) <= start2) || ((start2 + len2) <= start1) ); | |
332 | } | |
6d2010ae | 333 | |
6d2010ae | 334 | |
39236c6e A |
335 | int hfs_isallocated_scan (struct hfsmount *hfsmp, |
336 | u_int32_t startingBlock, | |
337 | u_int32_t *bp_buf); | |
338 | ||
339 | /* Summary Table Functions */ | |
340 | static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse); | |
341 | static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t *index); | |
342 | static int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock); | |
343 | static int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t *alloc); | |
344 | static int hfs_release_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t length); | |
345 | static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t *freeblocks); | |
346 | static int hfs_rebuild_summary (struct hfsmount *hfsmp); | |
347 | ||
348 | #if 0 | |
349 | static int hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock); | |
350 | #endif | |
6d2010ae | 351 | |
39236c6e A |
352 | /* Used in external mount code to initialize the summary table */ |
353 | int hfs_init_summary (struct hfsmount *hfsmp); | |
6d2010ae | 354 | |
39236c6e A |
355 | #if ALLOC_DEBUG |
356 | void hfs_validate_summary (struct hfsmount *hfsmp); | |
357 | #endif | |
6d2010ae | 358 | |
6d2010ae A |
359 | |
360 | /* Functions for manipulating free extent cache */ | |
361 | static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount); | |
362 | static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount); | |
363 | static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated); | |
364 | ||
04b8595b A |
365 | /* Functions for getting free exents */ |
366 | ||
367 | typedef struct bitmap_context { | |
368 | void *bitmap; // current bitmap chunk | |
369 | uint32_t run_offset; // offset (in bits) from start of bitmap to start of current run | |
370 | uint32_t chunk_current; // next bit to scan in the chunk | |
371 | uint32_t chunk_end; // number of valid bits in this chunk | |
372 | struct hfsmount *hfsmp; | |
373 | struct buf *bp; | |
374 | uint32_t last_free_summary_bit; // last marked free summary bit | |
375 | int lockflags; | |
376 | uint64_t lock_start; | |
377 | } bitmap_context_t; | |
378 | ||
379 | ||
380 | static errno_t get_more_bits(bitmap_context_t *bitmap_ctx); | |
381 | static int bit_count_set(void *bitmap, int start, int end); | |
382 | static int bit_count_clr(void *bitmap, int start, int end); | |
383 | static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count); | |
384 | static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count); | |
385 | static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count); | |
386 | static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set); | |
387 | static int clzll(uint64_t x); | |
388 | ||
6d2010ae | 389 | #if ALLOC_DEBUG |
6d2010ae A |
390 | /* |
391 | * Validation Routine to verify that the TRIM list maintained by the journal | |
392 | * is in good shape relative to what we think the bitmap should have. We should | |
393 | * never encounter allocated blocks in the TRIM list, so if we ever encounter them, | |
394 | * we panic. | |
395 | */ | |
316670eb | 396 | int trim_validate_bitmap (struct hfsmount *hfsmp); |
6d2010ae A |
397 | int trim_validate_bitmap (struct hfsmount *hfsmp) { |
398 | u_int64_t blockno_offset; | |
399 | u_int64_t numblocks; | |
400 | int i; | |
401 | int count; | |
402 | u_int32_t startblk; | |
403 | u_int32_t blks; | |
404 | int err = 0; | |
405 | uint32_t alloccount = 0; | |
406 | ||
407 | if (hfsmp->jnl) { | |
408 | struct journal *jnl = (struct journal*)hfsmp->jnl; | |
409 | if (jnl->active_tr) { | |
410 | struct jnl_trim_list *trim = &(jnl->active_tr->trim); | |
411 | count = trim->extent_count; | |
412 | for (i = 0; i < count; i++) { | |
413 | blockno_offset = trim->extents[i].offset; | |
414 | blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset; | |
415 | blockno_offset = blockno_offset / hfsmp->blockSize; | |
416 | numblocks = trim->extents[i].length / hfsmp->blockSize; | |
417 | ||
418 | startblk = (u_int32_t)blockno_offset; | |
419 | blks = (u_int32_t) numblocks; | |
420 | err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount); | |
421 | ||
422 | if (err == 0 && alloccount != 0) { | |
423 | panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk); | |
424 | } | |
425 | } | |
426 | } | |
427 | } | |
428 | return 0; | |
429 | } | |
1c79356b | 430 | |
6d2010ae | 431 | #endif |
060df5ea | 432 | |
316670eb | 433 | |
060df5ea | 434 | /* |
316670eb A |
435 | ;________________________________________________________________________________ |
436 | ; | |
437 | ; Routine: hfs_unmap_free_extent | |
438 | ; | |
439 | ; Function: Make note of a range of allocation blocks that should be | |
440 | ; unmapped (trimmed). That is, the given range of blocks no | |
441 | ; longer have useful content, and the device can unmap the | |
442 | ; previous contents. For example, a solid state disk may reuse | |
443 | ; the underlying storage for other blocks. | |
444 | ; | |
445 | ; This routine is only supported for journaled volumes. The extent | |
446 | ; being freed is passed to the journal code, and the extent will | |
447 | ; be unmapped after the current transaction is written to disk. | |
448 | ; | |
449 | ; Input Arguments: | |
450 | ; hfsmp - The volume containing the allocation blocks. | |
451 | ; startingBlock - The first allocation block of the extent being freed. | |
452 | ; numBlocks - The number of allocation blocks of the extent being freed. | |
453 | ;________________________________________________________________________________ | |
454 | */ | |
060df5ea A |
455 | static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks) |
456 | { | |
6d2010ae A |
457 | u_int64_t offset; |
458 | u_int64_t length; | |
316670eb A |
459 | u_int64_t device_sz; |
460 | int err = 0; | |
39236c6e | 461 | |
6d2010ae A |
462 | if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) |
463 | KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0); | |
39236c6e | 464 | |
316670eb A |
465 | if (ALLOC_DEBUG) { |
466 | if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) { | |
467 | panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks); | |
468 | } | |
469 | } | |
39236c6e | 470 | |
6d2010ae | 471 | if (hfsmp->jnl != NULL) { |
316670eb | 472 | device_sz = hfsmp->hfs_logical_bytes; |
6d2010ae A |
473 | offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset; |
474 | length = (u_int64_t) numBlocks * hfsmp->blockSize; | |
475 | ||
316670eb A |
476 | /* Validate that the trim is in a valid range of bytes */ |
477 | if ((offset >= device_sz) || ((offset + length) > device_sz)) { | |
39236c6e | 478 | printf("hfs_unmap_free_ext: ignoring trim vol=%s @ off %lld len %lld \n", hfsmp->vcbVN, offset, length); |
316670eb A |
479 | err = EINVAL; |
480 | } | |
481 | ||
482 | if (err == 0) { | |
483 | err = journal_trim_add_extent(hfsmp->jnl, offset, length); | |
484 | if (err) { | |
39236c6e | 485 | printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent for vol=%s", err, hfsmp->vcbVN); |
316670eb | 486 | } |
060df5ea A |
487 | } |
488 | } | |
39236c6e | 489 | |
6d2010ae A |
490 | if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) |
491 | KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0); | |
060df5ea A |
492 | } |
493 | ||
060df5ea | 494 | /* |
316670eb A |
495 | ;________________________________________________________________________________ |
496 | ; | |
497 | ; Routine: hfs_track_unmap_blocks | |
498 | ; | |
499 | ; Function: Make note of a range of allocation blocks that should be | |
500 | ; unmapped (trimmed). That is, the given range of blocks no | |
501 | ; longer have useful content, and the device can unmap the | |
502 | ; previous contents. For example, a solid state disk may reuse | |
503 | ; the underlying storage for other blocks. | |
504 | ; | |
505 | ; This routine is only supported for journaled volumes. | |
506 | ; | |
507 | ; *****NOTE*****: | |
508 | ; This function should *NOT* be used when the volume is fully | |
509 | ; mounted. This function is intended to support a bitmap iteration | |
510 | ; at mount time to fully inform the SSD driver of the state of all blocks | |
511 | ; at mount time, and assumes that there is no allocation/deallocation | |
512 | ; interference during its iteration., | |
513 | ; | |
514 | ; Input Arguments: | |
515 | ; hfsmp - The volume containing the allocation blocks. | |
516 | ; offset - The first allocation block of the extent being freed. | |
517 | ; numBlocks - The number of allocation blocks of the extent being freed. | |
518 | ; list - The list of currently tracked trim ranges. | |
519 | ;________________________________________________________________________________ | |
520 | */ | |
521 | static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start, | |
39236c6e A |
522 | u_int32_t numBlocks, struct jnl_trim_list *list) { |
523 | ||
524 | u_int64_t offset; | |
525 | u_int64_t length; | |
526 | int error = 0; | |
527 | ||
528 | if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) { | |
529 | int extent_no = list->extent_count; | |
530 | offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset; | |
531 | length = (u_int64_t) numBlocks * hfsmp->blockSize; | |
532 | ||
533 | ||
534 | list->extents[extent_no].offset = offset; | |
535 | list->extents[extent_no].length = length; | |
536 | list->extent_count++; | |
537 | if (list->extent_count == list->allocated_count) { | |
538 | error = hfs_issue_unmap (hfsmp, list); | |
539 | } | |
540 | } | |
541 | ||
542 | return error; | |
316670eb A |
543 | } |
544 | ||
545 | /* | |
546 | ;________________________________________________________________________________ | |
547 | ; | |
548 | ; Routine: hfs_issue_unmap | |
549 | ; | |
550 | ; Function: Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list | |
551 | ; | |
552 | ; Input Arguments: | |
553 | ; hfsmp - The volume containing the allocation blocks. | |
554 | ; list - The list of currently tracked trim ranges. | |
555 | ;________________________________________________________________________________ | |
556 | */ | |
557 | ||
fe8ab488 A |
558 | static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list) |
559 | { | |
39236c6e A |
560 | dk_unmap_t unmap; |
561 | int error = 0; | |
316670eb | 562 | |
fe8ab488 A |
563 | if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) { |
564 | KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0); | |
565 | } | |
566 | ||
39236c6e A |
567 | if (list->extent_count > 0) { |
568 | bzero(&unmap, sizeof(unmap)); | |
569 | unmap.extents = list->extents; | |
570 | unmap.extentsCount = list->extent_count; | |
316670eb | 571 | |
fe8ab488 A |
572 | if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) { |
573 | KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_NONE, hfsmp->hfs_raw_dev, unmap.extentsCount, 0, 0, 0); | |
574 | } | |
575 | ||
576 | #if CONFIG_PROTECT | |
577 | /* | |
578 | * If we have not yet completed the first scan through the bitmap, then | |
579 | * optionally inform the block driver below us that this is an initialization | |
580 | * TRIM scan, if it can deal with this information. | |
581 | */ | |
582 | if ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED) == 0) { | |
583 | unmap.options |= _DK_UNMAP_INITIALIZE; | |
584 | } | |
585 | #endif | |
39236c6e A |
586 | /* Issue a TRIM and flush them out */ |
587 | error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); | |
588 | ||
589 | bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t))); | |
fe8ab488 | 590 | bzero (&unmap, sizeof(unmap)); |
39236c6e A |
591 | list->extent_count = 0; |
592 | } | |
fe8ab488 A |
593 | |
594 | if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) { | |
595 | KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0); | |
596 | } | |
597 | ||
39236c6e A |
598 | return error; |
599 | } | |
316670eb A |
600 | |
601 | /* | |
602 | ;________________________________________________________________________________ | |
603 | ; | |
604 | ; Routine: hfs_unmap_alloc_extent | |
605 | ; | |
606 | ; Function: Make note of a range of allocation blocks, some of | |
607 | ; which may have previously been passed to hfs_unmap_free_extent, | |
608 | ; is now in use on the volume. The given blocks will be removed | |
609 | ; from any pending DKIOCUNMAP. | |
610 | ; | |
611 | ; Input Arguments: | |
612 | ; hfsmp - The volume containing the allocation blocks. | |
613 | ; startingBlock - The first allocation block of the extent being allocated. | |
614 | ; numBlocks - The number of allocation blocks being allocated. | |
615 | ;________________________________________________________________________________ | |
616 | */ | |
39236c6e | 617 | |
060df5ea A |
618 | static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks) |
619 | { | |
6d2010ae A |
620 | u_int64_t offset; |
621 | u_int64_t length; | |
622 | int err; | |
39236c6e | 623 | |
6d2010ae A |
624 | if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) |
625 | KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0); | |
39236c6e | 626 | |
6d2010ae A |
627 | if (hfsmp->jnl != NULL) { |
628 | offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset; | |
629 | length = (u_int64_t) numBlocks * hfsmp->blockSize; | |
39236c6e | 630 | |
6d2010ae A |
631 | err = journal_trim_remove_extent(hfsmp->jnl, offset, length); |
632 | if (err) { | |
39236c6e | 633 | printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent for vol=%s", err, hfsmp->vcbVN); |
060df5ea A |
634 | } |
635 | } | |
39236c6e | 636 | |
6d2010ae A |
637 | if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) |
638 | KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0); | |
060df5ea A |
639 | } |
640 | ||
641 | ||
1c79356b A |
642 | /* |
643 | ;________________________________________________________________________________ | |
644 | ; | |
6d2010ae | 645 | ; Routine: hfs_trim_callback |
1c79356b | 646 | ; |
6d2010ae A |
647 | ; Function: This function is called when a transaction that freed extents |
648 | ; (via hfs_unmap_free_extent/journal_trim_add_extent) has been | |
649 | ; written to the on-disk journal. This routine will add those | |
650 | ; extents to the free extent cache so that they can be reused. | |
1c79356b | 651 | ; |
6d2010ae A |
652 | ; CAUTION: This routine is called while the journal's trim lock |
653 | ; is held shared, so that no other thread can reuse any portion | |
654 | ; of those extents. We must be very careful about which locks | |
655 | ; we take from within this callback, to avoid deadlock. The | |
656 | ; call to add_free_extent_cache will end up taking the cache's | |
657 | ; lock (just long enough to add these extents to the cache). | |
1c79356b | 658 | ; |
6d2010ae A |
659 | ; CAUTION: If the journal becomes invalid (eg., due to an I/O |
660 | ; error when trying to write to the journal), this callback | |
661 | ; will stop getting called, even if extents got freed before | |
662 | ; the journal became invalid! | |
1c79356b | 663 | ; |
6d2010ae A |
664 | ; Input Arguments: |
665 | ; arg - The hfsmount of the volume containing the extents. | |
666 | ; extent_count - The number of extents freed in the transaction. | |
667 | ; extents - An array of extents (byte ranges) that were freed. | |
1c79356b A |
668 | ;________________________________________________________________________________ |
669 | */ | |
39236c6e | 670 | |
6d2010ae A |
671 | __private_extern__ void |
672 | hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents) | |
b0d623f7 | 673 | { |
6d2010ae A |
674 | uint32_t i; |
675 | uint32_t startBlock, numBlocks; | |
676 | struct hfsmount *hfsmp = arg; | |
39236c6e | 677 | |
6d2010ae A |
678 | if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) |
679 | KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0); | |
39236c6e | 680 | |
6d2010ae A |
681 | for (i=0; i<extent_count; ++i) { |
682 | /* Convert the byte range in *extents back to a range of allocation blocks. */ | |
683 | startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize; | |
684 | numBlocks = extents[i].length / hfsmp->blockSize; | |
685 | (void) add_free_extent_cache(hfsmp, startBlock, numBlocks); | |
b0d623f7 | 686 | } |
6d2010ae A |
687 | |
688 | if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) | |
689 | KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0); | |
b0d623f7 A |
690 | } |
691 | ||
1c79356b | 692 | |
39236c6e A |
693 | /* |
694 | ;________________________________________________________________________________ | |
695 | ; | |
696 | ; Routine: CheckUnmappedBytes | |
697 | ; | |
698 | ; Function: From the specified inputs, determine if the extent in question overlaps | |
699 | ; space that was recently freed, where the recently freed space may still be | |
700 | ; lingering in an uncommitted journal transaction. This may have data safety | |
701 | ; implications. The intended use is to decide whether or not to force a journal flush | |
702 | ; before allowing file data I/O to be issued. If we did not do this | |
703 | ; then it would be possible to issue the file I/O ahead of the | |
704 | ; journal, resulting in data being overwritten if the transaction either | |
705 | ; is not committed or cannot be replayed. | |
706 | ; | |
707 | ; NOTE: This function assumes that the journal and catalog/extent locks are held. | |
708 | ; | |
709 | ; Input Arguments: | |
710 | ; hfsmp - The volume containing the allocation blocks. | |
711 | ; foffset - start of the extent in question (in allocation blocks) | |
712 | ; numbytes - number of blocks in the extent. | |
713 | ; recently_freed: - output pointer containing whether or not the blocks were freed recently | |
714 | ; overlap_end - end of the overlap between the argument extent and the trim list (in allocation blocks) | |
715 | ; | |
716 | ; Output: | |
717 | ; | |
718 | ; Returns 0 if we could determine extent validity for this (or a previous transaction) | |
719 | ; Returns errno if there was an error | |
720 | ; | |
721 | ; If returned 0, then recently freed will contain a boolean that indicates | |
722 | ; that it was recently freed. | |
723 | ;________________________________________________________________________________ | |
724 | */ | |
725 | ||
726 | u_int32_t | |
727 | CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recently_freed, uint32_t *overlap_end) { | |
728 | uint64_t device_offset; | |
729 | uint64_t numbytes; | |
730 | uint32_t err = 0; | |
731 | uint64_t lba_overlap_end; | |
732 | ||
733 | if (hfsmp->jnl != NULL) { | |
734 | /* | |
735 | * Convert the allocation block # and the number of blocks into device-relative | |
736 | * offsets so that they can be compared using the TRIM list. | |
737 | */ | |
738 | uint64_t device_sz = hfsmp->hfs_logical_bytes; | |
739 | device_offset = blockno * ((uint64_t)hfsmp->blockSize); | |
740 | device_offset += hfsmp->hfsPlusIOPosOffset; | |
741 | numbytes = (((uint64_t)hfsmp->blockSize) * numblocks); | |
742 | ||
743 | /* | |
744 | * Since we check that the device_offset isn't too large, it's safe to subtract it | |
745 | * from the size in the second check. | |
746 | */ | |
747 | if ((device_offset >= device_sz) || (numbytes > (device_sz - device_offset))) { | |
748 | return EINVAL; | |
749 | } | |
750 | ||
751 | /* Ask the journal if this extent overlaps with any pending TRIMs */ | |
752 | if (journal_trim_extent_overlap (hfsmp->jnl, device_offset, numbytes, &lba_overlap_end)) { | |
753 | *recently_freed = 1; | |
754 | ||
755 | /* Convert lba_overlap_end back into allocation blocks */ | |
756 | uint64_t end_offset = lba_overlap_end - hfsmp->hfsPlusIOPosOffset; | |
757 | end_offset = end_offset / ((uint64_t) hfsmp->blockSize); | |
758 | *overlap_end = (uint32_t) end_offset; | |
759 | } | |
760 | else { | |
761 | *recently_freed = 0; | |
762 | } | |
763 | err = 0; | |
764 | } | |
765 | else { | |
766 | /* There may not be a journal. In that case, always return success. */ | |
767 | *recently_freed = 0; | |
768 | } | |
769 | return err; | |
770 | ||
771 | } | |
772 | ||
773 | ||
316670eb A |
774 | /* |
775 | ;________________________________________________________________________________ | |
776 | ; | |
39236c6e | 777 | ; Routine: ScanUnmapBlocks |
316670eb | 778 | ; |
39236c6e | 779 | ; Function: Traverse the bitmap, and potentially issue DKIOCUNMAPs to the underlying |
316670eb A |
780 | ; device as needed so that the underlying disk device is as |
781 | ; up-to-date as possible with which blocks are unmapped. | |
39236c6e | 782 | ; Additionally build up the summary table as needed. |
316670eb | 783 | ; |
fe8ab488 A |
784 | ; This function reads the bitmap in large block size |
785 | ; (up to 1MB) unlink the runtime which reads the bitmap | |
786 | ; in 4K block size. So if this function is being called | |
787 | ; after the volume is mounted and actively modified, the | |
788 | ; caller needs to invalidate all of the existing buffers | |
789 | ; associated with the bitmap vnode before calling this | |
790 | ; function. If the buffers are not invalidated, it can | |
791 | ; cause but_t collision and potential data corruption. | |
792 | ; | |
316670eb A |
793 | ; Input Arguments: |
794 | ; hfsmp - The volume containing the allocation blocks. | |
795 | ;________________________________________________________________________________ | |
796 | */ | |
797 | ||
798 | __private_extern__ | |
39236c6e A |
799 | u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp) |
800 | { | |
316670eb A |
801 | u_int32_t blocks_scanned = 0; |
802 | int error = 0; | |
39236c6e A |
803 | struct jnl_trim_list trimlist; |
804 | ||
fe8ab488 A |
805 | if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) { |
806 | KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0); | |
807 | } | |
808 | ||
39236c6e A |
809 | /* |
810 | *struct jnl_trim_list { | |
811 | uint32_t allocated_count; | |
812 | uint32_t extent_count; | |
813 | dk_extent_t *extents; | |
814 | }; | |
815 | */ | |
816 | ||
817 | /* | |
818 | * The scanning itself here is not tied to the presence of CONFIG_HFS_TRIM | |
819 | * which is now enabled for most architectures. Instead, any trim related | |
820 | * work should be tied to whether the underlying storage media supports | |
821 | * UNMAP, as any solid state device would on desktop or embedded. | |
822 | * | |
823 | * We do this because we may want to scan the full bitmap on desktop | |
824 | * for spinning media for the purposes of building up the | |
825 | * summary table. | |
826 | * | |
827 | * We also avoid sending TRIMs down to the underlying media if the mount is read-only. | |
828 | */ | |
829 | ||
830 | if ((hfsmp->hfs_flags & HFS_UNMAP) && | |
831 | ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) { | |
832 | /* If the underlying device supports unmap and the mount is read-write, initialize */ | |
833 | int alloc_count = PAGE_SIZE / sizeof(dk_extent_t); | |
834 | void *extents = kalloc (alloc_count * sizeof(dk_extent_t)); | |
835 | if (extents == NULL) { | |
836 | return ENOMEM; | |
837 | } | |
838 | bzero (&trimlist, sizeof(trimlist)); | |
839 | trimlist.extents = (dk_extent_t*)extents; | |
840 | trimlist.allocated_count = alloc_count; | |
841 | trimlist.extent_count = 0; | |
842 | } | |
843 | ||
844 | while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){ | |
845 | ||
846 | error = hfs_alloc_scan_range (hfsmp, blocks_scanned, &blocks_scanned, &trimlist); | |
847 | ||
848 | if (error) { | |
849 | printf("HFS: bitmap scan range error: %d on vol=%s\n", error, hfsmp->vcbVN); | |
850 | break; | |
851 | } | |
852 | } | |
853 | ||
854 | if ((hfsmp->hfs_flags & HFS_UNMAP) && | |
855 | ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) { | |
856 | if (error == 0) { | |
857 | hfs_issue_unmap(hfsmp, &trimlist); | |
858 | } | |
859 | if (trimlist.extents) { | |
860 | kfree (trimlist.extents, (trimlist.allocated_count * sizeof(dk_extent_t))); | |
861 | } | |
862 | } | |
863 | ||
864 | /* | |
865 | * This is in an #if block because hfs_validate_summary prototype and function body | |
866 | * will only show up if ALLOC_DEBUG is on, to save wired memory ever so slightly. | |
867 | */ | |
868 | #if ALLOC_DEBUG | |
869 | sanity_check_free_ext(hfsmp, 1); | |
870 | if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { | |
871 | /* Validate the summary table too! */ | |
872 | hfs_validate_summary(hfsmp); | |
873 | printf("HFS: Summary validation complete on %s\n", hfsmp->vcbVN); | |
316670eb | 874 | } |
39236c6e A |
875 | #endif |
876 | ||
fe8ab488 A |
877 | if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) { |
878 | KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0); | |
879 | } | |
880 | ||
316670eb A |
881 | return error; |
882 | } | |
883 | ||
6d2010ae A |
884 | /* |
885 | ;________________________________________________________________________________ | |
886 | ; | |
887 | ; Routine: BlockAllocate | |
888 | ; | |
889 | ; Function: Allocate space on a volume. If contiguous allocation is requested, | |
890 | ; at least the requested number of bytes will be allocated or an | |
891 | ; error will be returned. If contiguous allocation is not forced, | |
892 | ; the space will be allocated with the first largest extent available | |
893 | ; at the requested starting allocation block. If there is not enough | |
894 | ; room there, a block allocation of less than the requested size will be | |
895 | ; allocated. | |
896 | ; | |
897 | ; If the requested starting block is 0 (for new file allocations), | |
898 | ; the volume's allocation block pointer will be used as a starting | |
899 | ; point. | |
900 | ; | |
901 | ; Input Arguments: | |
902 | ; vcb - Pointer to ExtendedVCB for the volume to allocate space on | |
903 | ; fcb - Pointer to FCB for the file for which storage is being allocated | |
904 | ; startingBlock - Preferred starting allocation block, 0 = no preference | |
905 | ; minBlocks - Number of blocks requested. If the allocation is non-contiguous, | |
906 | ; less than this may actually be allocated | |
907 | ; maxBlocks - The maximum number of blocks to allocate. If there is additional free | |
908 | ; space after bytesRequested, then up to maxBlocks bytes should really | |
909 | ; be allocated. (Used by ExtendFileC to round up allocations to a multiple | |
910 | ; of the file's clump size.) | |
911 | ; flags - Flags to specify options like contiguous, use metadata zone, | |
912 | ; skip free block check, etc. | |
913 | ; | |
914 | ; Output: | |
915 | ; (result) - Error code, zero for successful allocation | |
916 | ; *startBlock - Actual starting allocation block | |
39236c6e | 917 | ; *actualBlccks - Actual number of allocation blocks allocated |
6d2010ae A |
918 | ; |
919 | ; Side effects: | |
920 | ; The volume bitmap is read and updated; the volume bitmap cache may be changed. | |
921 | ;________________________________________________________________________________ | |
922 | */ | |
1c79356b | 923 | OSErr BlockAllocate ( |
39236c6e A |
924 | ExtendedVCB *vcb, /* which volume to allocate space on */ |
925 | u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */ | |
926 | u_int32_t minBlocks, /* desired number of blocks to allocate */ | |
927 | u_int32_t maxBlocks, /* maximum number of blocks to allocate */ | |
928 | u_int32_t flags, /* option flags */ | |
929 | u_int32_t *actualStartBlock, /* actual first block of allocation */ | |
930 | u_int32_t *actualNumBlocks) | |
931 | /* | |
932 | * actualNumBlocks is the number of blocks actually allocated; | |
933 | * if forceContiguous was zero, then this may represent fewer than minBlocks | |
934 | */ | |
1c79356b | 935 | { |
2d21ac55 | 936 | u_int32_t freeBlocks; |
1c79356b | 937 | OSErr err; |
1c79356b | 938 | Boolean updateAllocPtr = false; // true if nextAllocation needs to be updated |
6d2010ae | 939 | struct hfsmount *hfsmp; |
0b4c1975 A |
940 | Boolean useMetaZone; |
941 | Boolean forceContiguous; | |
39236c6e | 942 | Boolean forceFlush; |
0b4c1975 | 943 | |
6d2010ae A |
944 | if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) |
945 | KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0); | |
39236c6e | 946 | |
0b4c1975 A |
947 | if (flags & HFS_ALLOC_FORCECONTIG) { |
948 | forceContiguous = true; | |
949 | } else { | |
950 | forceContiguous = false; | |
951 | } | |
952 | ||
953 | if (flags & HFS_ALLOC_METAZONE) { | |
954 | useMetaZone = true; | |
955 | } else { | |
956 | useMetaZone = false; | |
957 | } | |
1c79356b | 958 | |
39236c6e A |
959 | if (flags & HFS_ALLOC_FLUSHTXN) { |
960 | forceFlush = true; | |
961 | } | |
962 | else { | |
963 | forceFlush = false; | |
964 | } | |
965 | ||
966 | ||
1c79356b A |
967 | // |
968 | // Initialize outputs in case we get an error | |
969 | // | |
970 | *actualStartBlock = 0; | |
971 | *actualNumBlocks = 0; | |
6d2010ae A |
972 | hfsmp = VCBTOHFS (vcb); |
973 | freeBlocks = hfs_freeblks(hfsmp, 0); | |
39236c6e A |
974 | |
975 | ||
0b4c1975 A |
976 | /* Skip free block check if blocks are being allocated for relocating |
977 | * data during truncating a volume. | |
978 | * | |
979 | * During hfs_truncatefs(), the volume free block count is updated | |
980 | * before relocating data to reflect the total number of free blocks | |
981 | * that will exist on the volume after resize is successful. This | |
982 | * means that we have reserved allocation blocks required for relocating | |
983 | * the data and hence there is no need to check the free blocks. | |
984 | * It will also prevent resize failure when the number of blocks in | |
985 | * an extent being relocated is more than the free blocks that will | |
986 | * exist after the volume is resized. | |
55e303ae | 987 | */ |
0b4c1975 A |
988 | if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) { |
989 | // If the disk is already full, don't bother. | |
990 | if (freeBlocks == 0) { | |
991 | err = dskFulErr; | |
992 | goto Exit; | |
993 | } | |
994 | if (forceContiguous && freeBlocks < minBlocks) { | |
995 | err = dskFulErr; | |
996 | goto Exit; | |
997 | } | |
998 | ||
999 | /* | |
1000 | * Clip if necessary so we don't over-subscribe the free blocks. | |
1001 | */ | |
1002 | if (minBlocks > freeBlocks) { | |
1003 | minBlocks = freeBlocks; | |
1004 | } | |
1005 | if (maxBlocks > freeBlocks) { | |
1006 | maxBlocks = freeBlocks; | |
1007 | } | |
55e303ae A |
1008 | } |
1009 | ||
1c79356b A |
1010 | // |
1011 | // If caller didn't specify a starting block number, then use the volume's | |
1012 | // next block to allocate from. | |
1013 | // | |
1014 | if (startingBlock == 0) { | |
39236c6e A |
1015 | hfs_lock_mount (hfsmp); |
1016 | ||
6d2010ae | 1017 | /* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */ |
b0d623f7 A |
1018 | if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) { |
1019 | startingBlock = vcb->sparseAllocation; | |
6d2010ae A |
1020 | } |
1021 | else { | |
b0d623f7 A |
1022 | startingBlock = vcb->nextAllocation; |
1023 | } | |
39236c6e | 1024 | hfs_unlock_mount(hfsmp); |
1c79356b A |
1025 | updateAllocPtr = true; |
1026 | } | |
39236c6e A |
1027 | |
1028 | ||
2d21ac55 | 1029 | if (startingBlock >= vcb->allocLimit) { |
55e303ae A |
1030 | startingBlock = 0; /* overflow so start at beginning */ |
1031 | } | |
0b4e3aa0 | 1032 | |
1c79356b A |
1033 | // |
1034 | // If the request must be contiguous, then find a sequence of free blocks | |
1035 | // that is long enough. Otherwise, find the first free block. | |
1036 | // | |
1037 | if (forceContiguous) { | |
55e303ae | 1038 | err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks, |
39236c6e | 1039 | flags, actualStartBlock, actualNumBlocks); |
9bccf70c | 1040 | /* |
6d2010ae A |
1041 | * If we allocated from a new position then also update the roving allocator. |
1042 | * This will keep the roving allocation pointer up-to-date even | |
1043 | * if we are using the new R/B tree allocator, since | |
1044 | * it doesn't matter to us here, how the underlying allocator found | |
1045 | * the block to vend out. | |
9bccf70c | 1046 | */ |
55e303ae | 1047 | if ((err == noErr) && |
39236c6e A |
1048 | (*actualStartBlock > startingBlock) && |
1049 | ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) || | |
1050 | (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) { | |
b0d623f7 | 1051 | updateAllocPtr = true; |
55e303ae | 1052 | } |
39236c6e | 1053 | } else { |
0b4e3aa0 A |
1054 | /* |
1055 | * Scan the bitmap once, gather the N largest free extents, then | |
1056 | * allocate from these largest extents. Repeat as needed until | |
1057 | * we get all the space we needed. We could probably build up | |
1058 | * that list when the higher level caller tried (and failed) a | |
1059 | * contiguous allocation first. | |
6d2010ae A |
1060 | * |
1061 | * Note that the free-extent cache will be cease to be updated if | |
1062 | * we are using the red-black tree for allocations. If we jettison | |
1063 | * the tree, then we will reset the free-extent cache and start over. | |
0b4e3aa0 | 1064 | */ |
39236c6e A |
1065 | |
1066 | /* Disable HFS_ALLOC_FLUSHTXN if needed */ | |
1067 | if (forceFlush) { | |
1068 | flags &= ~HFS_ALLOC_FLUSHTXN; | |
1069 | } | |
1070 | ||
1071 | /* | |
1072 | * BlockAllocateKnown only examines the free extent cache; anything in there will | |
1073 | * have been committed to stable storage already. | |
1074 | */ | |
0b4e3aa0 | 1075 | err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks); |
39236c6e | 1076 | |
6d2010ae A |
1077 | /* dskFulErr out of BlockAllocateKnown indicates an empty Free Extent Cache */ |
1078 | ||
1079 | if (err == dskFulErr) { | |
1080 | /* | |
1081 | * Now we have to do a bigger scan. Start at startingBlock and go up until the | |
39236c6e A |
1082 | * allocation limit. We 'trust' the summary bitmap in this call, if it tells us |
1083 | * that it could not find any free space. | |
6d2010ae | 1084 | */ |
2d21ac55 | 1085 | err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit, |
39236c6e A |
1086 | maxBlocks, flags, true, |
1087 | actualStartBlock, actualNumBlocks); | |
6d2010ae A |
1088 | } |
1089 | if (err == dskFulErr) { | |
1090 | /* | |
39236c6e A |
1091 | * Vary the behavior here if the summary table is on or off. |
1092 | * If it is on, then we don't trust it it if we get into this case and | |
1093 | * basically do a full scan for maximum coverage. | |
1094 | * If it is off, then we trust the above and go up until the startingBlock. | |
6d2010ae | 1095 | */ |
39236c6e A |
1096 | if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { |
1097 | err = BlockAllocateAny(vcb, 1, vcb->allocLimit, maxBlocks, | |
1098 | flags, false, | |
1099 | actualStartBlock, actualNumBlocks); | |
1100 | } | |
1101 | else { | |
1102 | err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks, | |
1103 | flags, false, | |
1104 | actualStartBlock, actualNumBlocks); | |
1105 | } | |
1106 | ||
1107 | /* | |
1108 | * Last Resort: Find/use blocks that may require a journal flush. | |
1109 | */ | |
1110 | if (err == dskFulErr && forceFlush) { | |
1111 | flags |= HFS_ALLOC_FLUSHTXN; | |
1112 | err = BlockAllocateAny(vcb, 1, vcb->allocLimit, maxBlocks, | |
1113 | flags, false, | |
1114 | actualStartBlock, actualNumBlocks); | |
1115 | } | |
6d2010ae | 1116 | } |
1c79356b A |
1117 | } |
1118 | ||
91447636 | 1119 | Exit: |
39236c6e A |
1120 | if ((hfsmp->hfs_flags & HFS_CS) && *actualNumBlocks != 0) { |
1121 | errno_t ec; | |
1122 | _dk_cs_map_t cm; | |
1123 | uint64_t mapped_blocks; | |
1124 | ||
1125 | cm.cm_extent.offset = (uint64_t)*actualStartBlock * hfsmp->blockSize + hfsmp->hfsPlusIOPosOffset; | |
1126 | cm.cm_extent.length = (uint64_t)*actualNumBlocks * hfsmp->blockSize; | |
1127 | cm.cm_bytes_mapped = 0; | |
1128 | ec = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP, (caddr_t)&cm, 0, vfs_context_current()); | |
1129 | if (ec != 0 && ec != ENOSPC) { | |
1130 | printf ("VNOP_IOCTL(_DKIOCCSMAP) returned an unexpected error code=%d\n", ec); | |
1131 | err = ec; | |
1132 | goto Exit_CS; | |
1133 | } | |
1134 | mapped_blocks = cm.cm_bytes_mapped / hfsmp->blockSize; | |
1135 | /* CoreStorage returned more blocks than requested */ | |
1136 | if (mapped_blocks > *actualNumBlocks) { | |
1137 | printf ("VNOP_IOCTL(_DKIOCCSMAP) mapped too many blocks, mapped=%lld, actual=%d\n", | |
1138 | mapped_blocks, *actualNumBlocks); | |
1139 | } | |
1140 | if (*actualNumBlocks > mapped_blocks) { | |
1141 | if (forceContiguous && mapped_blocks < minBlocks) { | |
1142 | mapped_blocks = 0; | |
1143 | } | |
1144 | } | |
1145 | uint64_t numBlocksToFree = *actualNumBlocks - mapped_blocks; | |
1146 | uint64_t firstBlockToFree = *actualStartBlock + mapped_blocks; | |
1147 | if (numBlocksToFree > 0) { | |
1148 | err = BlockDeallocate(vcb, firstBlockToFree, numBlocksToFree, flags); | |
1149 | if (err != noErr) { | |
1150 | printf ("BlockDeallocate failed (err=%d)\n", err); | |
1151 | goto Exit_CS; | |
1152 | } | |
1153 | } | |
1154 | *actualNumBlocks = mapped_blocks; | |
1155 | if (*actualNumBlocks == 0 && err == noErr) { | |
1156 | err = dskFulErr; | |
1157 | } | |
1158 | } | |
1159 | Exit_CS: | |
91447636 A |
1160 | // if we actually allocated something then go update the |
1161 | // various bits of state that we maintain regardless of | |
1162 | // whether there was an error (i.e. partial allocations | |
1163 | // still need to update things like the free block count). | |
1164 | // | |
1165 | if (*actualNumBlocks != 0) { | |
1c79356b A |
1166 | // |
1167 | // If we used the volume's roving allocation pointer, then we need to update it. | |
1168 | // Adding in the length of the current allocation might reduce the next allocate | |
1169 | // call by avoiding a re-scan of the already allocated space. However, the clump | |
1170 | // just allocated can quite conceivably end up being truncated or released when | |
1171 | // the file is closed or its EOF changed. Leaving the allocation pointer at the | |
1172 | // start of the last allocation will avoid unnecessary fragmentation in this case. | |
1173 | // | |
39236c6e | 1174 | hfs_lock_mount (hfsmp); |
1c79356b | 1175 | |
6d2010ae | 1176 | lck_spin_lock(&hfsmp->vcbFreeExtLock); |
b0d623f7 A |
1177 | if (vcb->vcbFreeExtCnt == 0 && vcb->hfs_freed_block_count == 0) { |
1178 | vcb->sparseAllocation = *actualStartBlock; | |
1179 | } | |
6d2010ae | 1180 | lck_spin_unlock(&hfsmp->vcbFreeExtLock); |
b0d623f7 A |
1181 | if (*actualNumBlocks < vcb->hfs_freed_block_count) { |
1182 | vcb->hfs_freed_block_count -= *actualNumBlocks; | |
1183 | } else { | |
1184 | vcb->hfs_freed_block_count = 0; | |
1185 | } | |
6d2010ae | 1186 | |
55e303ae | 1187 | if (updateAllocPtr && |
6d2010ae A |
1188 | ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) || |
1189 | (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) { | |
2d21ac55 | 1190 | HFS_UPDATE_NEXT_ALLOCATION(vcb, *actualStartBlock); |
55e303ae | 1191 | } |
b0d623f7 | 1192 | |
6d2010ae | 1193 | (void) remove_free_extent_cache(hfsmp, *actualStartBlock, *actualNumBlocks); |
0b4c1975 A |
1194 | |
1195 | /* | |
1196 | * Update the number of free blocks on the volume | |
1197 | * | |
1198 | * Skip updating the free blocks count if the block are | |
1199 | * being allocated to relocate data as part of hfs_truncatefs() | |
1200 | */ | |
1201 | if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) { | |
1202 | vcb->freeBlocks -= *actualNumBlocks; | |
1203 | } | |
1c79356b | 1204 | MarkVCBDirty(vcb); |
39236c6e | 1205 | hfs_unlock_mount(hfsmp); |
91447636 A |
1206 | |
1207 | hfs_generate_volume_notifications(VCBTOHFS(vcb)); | |
1c79356b | 1208 | } |
39236c6e | 1209 | |
6d2010ae A |
1210 | if (ALLOC_DEBUG) { |
1211 | if (err == noErr) { | |
1212 | if (*actualStartBlock >= hfsmp->totalBlocks) { | |
1213 | panic ("BlockAllocate: vending invalid blocks!"); | |
1214 | } | |
1215 | if (*actualStartBlock >= hfsmp->allocLimit) { | |
1216 | panic ("BlockAllocate: vending block past allocLimit!"); | |
1217 | } | |
39236c6e | 1218 | |
6d2010ae A |
1219 | if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->totalBlocks) { |
1220 | panic ("BlockAllocate: vending too many invalid blocks!"); | |
1221 | } | |
39236c6e | 1222 | |
6d2010ae A |
1223 | if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->allocLimit) { |
1224 | panic ("BlockAllocate: vending too many invalid blocks past allocLimit!"); | |
1225 | } | |
1226 | } | |
1227 | } | |
39236c6e | 1228 | |
6d2010ae A |
1229 | if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) |
1230 | KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); | |
1231 | ||
1c79356b A |
1232 | return err; |
1233 | } | |
1234 | ||
1235 | ||
1c79356b A |
1236 | /* |
1237 | ;________________________________________________________________________________ | |
1238 | ; | |
6d2010ae | 1239 | ; Routine: BlockDeallocate |
1c79356b A |
1240 | ; |
1241 | ; Function: Update the bitmap to deallocate a run of disk allocation blocks | |
1242 | ; | |
1243 | ; Input Arguments: | |
1244 | ; vcb - Pointer to ExtendedVCB for the volume to free space on | |
1245 | ; firstBlock - First allocation block to be freed | |
1246 | ; numBlocks - Number of allocation blocks to free up (must be > 0!) | |
1247 | ; | |
1248 | ; Output: | |
1249 | ; (result) - Result code | |
1250 | ; | |
1251 | ; Side effects: | |
1252 | ; The volume bitmap is read and updated; the volume bitmap cache may be changed. | |
6d2010ae | 1253 | ; The Allocator's red-black trees may also be modified as a result. |
1c79356b A |
1254 | ;________________________________________________________________________________ |
1255 | */ | |
1256 | ||
1257 | OSErr BlockDeallocate ( | |
39236c6e A |
1258 | ExtendedVCB *vcb, // Which volume to deallocate space on |
1259 | u_int32_t firstBlock, // First block in range to deallocate | |
1260 | u_int32_t numBlocks, // Number of contiguous blocks to deallocate | |
1261 | u_int32_t flags) | |
1c79356b A |
1262 | { |
1263 | OSErr err; | |
6d2010ae A |
1264 | struct hfsmount *hfsmp; |
1265 | hfsmp = VCBTOHFS(vcb); | |
39236c6e | 1266 | |
6d2010ae A |
1267 | if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) |
1268 | KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0); | |
1269 | ||
1c79356b A |
1270 | // |
1271 | // If no blocks to deallocate, then exit early | |
1272 | // | |
1273 | if (numBlocks == 0) { | |
1274 | err = noErr; | |
1275 | goto Exit; | |
1276 | } | |
39236c6e A |
1277 | |
1278 | ||
6d2010ae A |
1279 | if (ALLOC_DEBUG) { |
1280 | if (firstBlock >= hfsmp->totalBlocks) { | |
1281 | panic ("BlockDeallocate: freeing invalid blocks!"); | |
1282 | } | |
39236c6e | 1283 | |
6d2010ae A |
1284 | if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) { |
1285 | panic ("BlockDeallocate: freeing too many invalid blocks!"); | |
1286 | } | |
1287 | } | |
6d2010ae A |
1288 | |
1289 | /* | |
39236c6e A |
1290 | * If we're using the summary bitmap, then try to mark the bits |
1291 | * as potentially usable/free before actually deallocating them. | |
1292 | * It is better to be slightly speculative here for correctness. | |
6d2010ae | 1293 | */ |
1c79356b | 1294 | |
39236c6e A |
1295 | (void) hfs_release_summary (hfsmp, firstBlock, numBlocks); |
1296 | ||
6d2010ae | 1297 | err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true); |
39236c6e A |
1298 | |
1299 | if (err) { | |
1c79356b | 1300 | goto Exit; |
39236c6e | 1301 | } |
1c79356b A |
1302 | |
1303 | // | |
1304 | // Update the volume's free block count, and mark the VCB as dirty. | |
1305 | // | |
39236c6e | 1306 | hfs_lock_mount(hfsmp); |
0b4c1975 A |
1307 | /* |
1308 | * Do not update the free block count. This flags is specified | |
1309 | * when a volume is being truncated. | |
1310 | */ | |
1311 | if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) { | |
1312 | vcb->freeBlocks += numBlocks; | |
1313 | } | |
1314 | ||
b0d623f7 | 1315 | vcb->hfs_freed_block_count += numBlocks; |
b0d623f7 A |
1316 | |
1317 | if (vcb->nextAllocation == (firstBlock + numBlocks)) { | |
2d21ac55 | 1318 | HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks)); |
b0d623f7 A |
1319 | } |
1320 | ||
6d2010ae A |
1321 | if (hfsmp->jnl == NULL) { |
1322 | /* | |
1323 | * In the journal case, we'll add the free extent once the journal | |
1324 | * calls us back to tell us it wrote the transaction to disk. | |
1325 | */ | |
1326 | (void) add_free_extent_cache(vcb, firstBlock, numBlocks); | |
39236c6e | 1327 | |
6d2010ae A |
1328 | /* |
1329 | * If the journal case, we'll only update sparseAllocation once the | |
1330 | * free extent cache becomes empty (when we remove the last entry | |
1331 | * from the cache). Skipping it here means we're less likely to | |
1332 | * find a recently freed extent via the bitmap before it gets added | |
1333 | * to the free extent cache. | |
1334 | */ | |
1335 | if (firstBlock < vcb->sparseAllocation) { | |
1336 | vcb->sparseAllocation = firstBlock; | |
b0d623f7 A |
1337 | } |
1338 | } | |
39236c6e | 1339 | |
1c79356b | 1340 | MarkVCBDirty(vcb); |
39236c6e | 1341 | hfs_unlock_mount(hfsmp); |
91447636 A |
1342 | |
1343 | hfs_generate_volume_notifications(VCBTOHFS(vcb)); | |
1c79356b A |
1344 | Exit: |
1345 | ||
6d2010ae A |
1346 | if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) |
1347 | KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0); | |
1348 | ||
1c79356b A |
1349 | return err; |
1350 | } | |
1351 | ||
1352 | ||
2d21ac55 | 1353 | u_int8_t freebitcount[16] = { |
55e303ae A |
1354 | 4, 3, 3, 2, 3, 2, 2, 1, /* 0 1 2 3 4 5 6 7 */ |
1355 | 3, 2, 2, 1, 2, 1, 1, 0, /* 8 9 A B C D E F */ | |
1356 | }; | |
1357 | ||
2d21ac55 | 1358 | u_int32_t |
55e303ae | 1359 | MetaZoneFreeBlocks(ExtendedVCB *vcb) |
1c79356b | 1360 | { |
2d21ac55 A |
1361 | u_int32_t freeblocks; |
1362 | u_int32_t *currCache; | |
b0d623f7 | 1363 | uintptr_t blockRef; |
2d21ac55 A |
1364 | u_int32_t bit; |
1365 | u_int32_t lastbit; | |
55e303ae A |
1366 | int bytesleft; |
1367 | int bytesperblock; | |
2d21ac55 A |
1368 | u_int8_t byte; |
1369 | u_int8_t *buffer; | |
91447636 | 1370 | |
55e303ae A |
1371 | blockRef = 0; |
1372 | bytesleft = freeblocks = 0; | |
91447636 | 1373 | buffer = NULL; |
55e303ae A |
1374 | bit = VCBTOHFS(vcb)->hfs_metazone_start; |
1375 | if (bit == 1) | |
1376 | bit = 0; | |
39236c6e | 1377 | |
55e303ae A |
1378 | lastbit = VCBTOHFS(vcb)->hfs_metazone_end; |
1379 | bytesperblock = vcb->vcbVBMIOSize; | |
1380 | ||
1381 | /* | |
1382 | * Count all the bits from bit to lastbit. | |
1383 | */ | |
1384 | while (bit < lastbit) { | |
1385 | /* | |
1386 | * Get next bitmap block. | |
1387 | */ | |
1388 | if (bytesleft == 0) { | |
1389 | if (blockRef) { | |
1390 | (void) ReleaseBitmapBlock(vcb, blockRef, false); | |
1391 | blockRef = 0; | |
1392 | } | |
1393 | if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef) != 0) { | |
1394 | return (0); | |
1395 | } | |
2d21ac55 | 1396 | buffer = (u_int8_t *)currCache; |
55e303ae A |
1397 | bytesleft = bytesperblock; |
1398 | } | |
1399 | byte = *buffer++; | |
1400 | freeblocks += freebitcount[byte & 0x0F]; | |
1401 | freeblocks += freebitcount[(byte >> 4) & 0x0F]; | |
1402 | bit += kBitsPerByte; | |
1403 | --bytesleft; | |
1404 | } | |
1405 | if (blockRef) | |
1406 | (void) ReleaseBitmapBlock(vcb, blockRef, false); | |
1407 | ||
1408 | return (freeblocks); | |
1c79356b A |
1409 | } |
1410 | ||
1411 | ||
55e303ae A |
1412 | /* |
1413 | * Obtain the next allocation block (bit) that's | |
1414 | * outside the metadata allocation zone. | |
1415 | */ | |
2d21ac55 | 1416 | static u_int32_t NextBitmapBlock( |
39236c6e A |
1417 | ExtendedVCB *vcb, |
1418 | u_int32_t bit) | |
55e303ae A |
1419 | { |
1420 | struct hfsmount *hfsmp = VCBTOHFS(vcb); | |
1421 | ||
1422 | if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0) | |
1423 | return (bit); | |
1424 | /* | |
1425 | * Skip over metadata allocation zone. | |
1426 | */ | |
1427 | if ((bit >= hfsmp->hfs_metazone_start) && | |
39236c6e | 1428 | (bit <= hfsmp->hfs_metazone_end)) { |
55e303ae A |
1429 | bit = hfsmp->hfs_metazone_end + 1; |
1430 | } | |
1431 | return (bit); | |
1432 | } | |
1433 | ||
1c79356b A |
1434 | |
1435 | /* | |
1436 | ;_______________________________________________________________________ | |
1437 | ; | |
1438 | ; Routine: ReadBitmapBlock | |
1439 | ; | |
1440 | ; Function: Read in a bitmap block corresponding to a given allocation | |
0b4e3aa0 | 1441 | ; block (bit). Return a pointer to the bitmap block. |
1c79356b A |
1442 | ; |
1443 | ; Inputs: | |
1444 | ; vcb -- Pointer to ExtendedVCB | |
0b4e3aa0 | 1445 | ; bit -- Allocation block whose bitmap block is desired |
1c79356b A |
1446 | ; |
1447 | ; Outputs: | |
1448 | ; buffer -- Pointer to bitmap block corresonding to "block" | |
0b4e3aa0 | 1449 | ; blockRef |
1c79356b A |
1450 | ;_______________________________________________________________________ |
1451 | */ | |
1452 | static OSErr ReadBitmapBlock( | |
39236c6e A |
1453 | ExtendedVCB *vcb, |
1454 | u_int32_t bit, | |
1455 | u_int32_t **buffer, | |
1456 | uintptr_t *blockRef) | |
1c79356b A |
1457 | { |
1458 | OSErr err; | |
0b4e3aa0 A |
1459 | struct buf *bp = NULL; |
1460 | struct vnode *vp = NULL; | |
91447636 | 1461 | daddr64_t block; |
2d21ac55 | 1462 | u_int32_t blockSize; |
1c79356b | 1463 | |
6d2010ae A |
1464 | if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) |
1465 | KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0); | |
1466 | ||
0b4e3aa0 | 1467 | /* |
91447636 | 1468 | * volume bitmap blocks are protected by the allocation file lock |
0b4e3aa0 | 1469 | */ |
91447636 | 1470 | REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); |
1c79356b | 1471 | |
2d21ac55 | 1472 | blockSize = (u_int32_t)vcb->vcbVBMIOSize; |
91447636 | 1473 | block = (daddr64_t)(bit / (blockSize * kBitsPerByte)); |
1c79356b | 1474 | |
39236c6e A |
1475 | /* HFS+ / HFSX */ |
1476 | if (vcb->vcbSigWord != kHFSSigWord) { | |
91447636 | 1477 | vp = vcb->hfs_allocation_vp; /* use allocation file vnode */ |
39236c6e A |
1478 | } |
1479 | #if CONFIG_HFS_STD | |
1480 | else { | |
1481 | /* HFS Standard */ | |
0b4e3aa0 A |
1482 | vp = VCBTOHFS(vcb)->hfs_devvp; /* use device I/O vnode */ |
1483 | block += vcb->vcbVBMSt; /* map to physical block */ | |
1c79356b | 1484 | } |
39236c6e | 1485 | #endif |
1c79356b | 1486 | |
91447636 | 1487 | err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp); |
1c79356b | 1488 | |
0b4e3aa0 A |
1489 | if (bp) { |
1490 | if (err) { | |
91447636 | 1491 | buf_brelse(bp); |
2d21ac55 | 1492 | *blockRef = 0; |
0b4e3aa0 A |
1493 | *buffer = NULL; |
1494 | } else { | |
b0d623f7 | 1495 | *blockRef = (uintptr_t)bp; |
2d21ac55 | 1496 | *buffer = (u_int32_t *)buf_dataptr(bp); |
0b4e3aa0 | 1497 | } |
1c79356b A |
1498 | } |
1499 | ||
6d2010ae A |
1500 | if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) |
1501 | KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0); | |
1502 | ||
1c79356b A |
1503 | return err; |
1504 | } | |
1505 | ||
1506 | ||
0b4e3aa0 A |
1507 | /* |
1508 | ;_______________________________________________________________________ | |
1509 | ; | |
39236c6e | 1510 | ; Routine: ReadBitmapRange |
0b4e3aa0 | 1511 | ; |
39236c6e A |
1512 | ; Function: Read in a range of the bitmap starting at the given offset. |
1513 | ; Use the supplied size to determine the amount of I/O to generate | |
1514 | ; against the bitmap file. Return a pointer to the bitmap block. | |
0b4e3aa0 A |
1515 | ; |
1516 | ; Inputs: | |
39236c6e A |
1517 | ; hfsmp -- Pointer to hfs mount |
1518 | ; offset -- byte offset into the bitmap file | |
1519 | ; size -- How much I/O to generate against the bitmap file. | |
1520 | ; | |
1521 | ; Outputs: | |
1522 | ; buffer -- Pointer to bitmap block data corresonding to "block" | |
1523 | ; blockRef -- struct 'buf' pointer which MUST be released in a subsequent call. | |
0b4e3aa0 A |
1524 | ;_______________________________________________________________________ |
1525 | */ | |
39236c6e A |
1526 | static OSErr ReadBitmapRange(struct hfsmount *hfsmp, uint32_t offset, |
1527 | uint32_t iosize, uint32_t **buffer, struct buf **blockRef) | |
0b4e3aa0 | 1528 | { |
6d2010ae | 1529 | |
39236c6e A |
1530 | OSErr err; |
1531 | struct buf *bp = NULL; | |
1532 | struct vnode *vp = NULL; | |
1533 | daddr64_t block; | |
0b4e3aa0 | 1534 | |
39236c6e A |
1535 | /* This function isn't supported for HFS standard */ |
1536 | if (hfsmp->vcbSigWord != kHFSPlusSigWord) { | |
1537 | return EINVAL; | |
0b4e3aa0 A |
1538 | } |
1539 | ||
39236c6e A |
1540 | if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) { |
1541 | KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_START, offset, iosize, 0, 0, 0); | |
1542 | } | |
1543 | ||
1544 | /* | |
1545 | * volume bitmap blocks are protected by the allocation file lock | |
1546 | */ | |
1547 | REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); | |
1548 | ||
1549 | vp = hfsmp->hfs_allocation_vp; /* use allocation file vnode */ | |
1550 | ||
1551 | /* | |
1552 | * The byte offset argument must be converted into bitmap-relative logical | |
1553 | * block numbers before using it in buf_meta_bread. | |
1554 | * | |
1555 | * buf_meta_bread (and the things it calls) will eventually try to | |
1556 | * reconstruct the byte offset into the file by multiplying the logical | |
1557 | * block number passed in below by the vcbVBMIOSize field in the mount | |
1558 | * point. So we prepare for that by converting the byte offset back into | |
1559 | * logical blocks in terms of VBMIOSize units. | |
1560 | * | |
1561 | * The amount of I/O requested and the byte offset should be computed | |
1562 | * based on the helper function in the frame that called us, so we can | |
1563 | * get away with just doing a simple divide here. | |
1564 | */ | |
1565 | block = (daddr64_t)(offset / hfsmp->vcbVBMIOSize); | |
1566 | ||
1567 | err = (int) buf_meta_bread(vp, block, iosize, NOCRED, &bp); | |
1568 | ||
1569 | if (bp) { | |
1570 | if (err) { | |
1571 | buf_brelse(bp); | |
1572 | *blockRef = 0; | |
1573 | *buffer = NULL; | |
1574 | } else { | |
1575 | *blockRef = bp; | |
1576 | *buffer = (u_int32_t *)buf_dataptr(bp); | |
1577 | } | |
1578 | } | |
1579 | ||
1580 | if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) { | |
1581 | KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_END, err, 0, 0, 0, 0); | |
1582 | } | |
1583 | ||
1584 | return err; | |
1585 | } | |
6d2010ae | 1586 | |
6d2010ae | 1587 | |
6d2010ae | 1588 | /* |
39236c6e A |
1589 | ;_______________________________________________________________________ |
1590 | ; | |
1591 | ; Routine: ReleaseBitmapBlock | |
1592 | ; | |
1593 | ; Function: Relase a bitmap block. | |
1594 | ; | |
1595 | ; Inputs: | |
1596 | ; vcb | |
1597 | ; blockRef | |
1598 | ; dirty | |
1599 | ;_______________________________________________________________________ | |
1600 | */ | |
1601 | static OSErr ReleaseBitmapBlock( | |
1602 | ExtendedVCB *vcb, | |
1603 | uintptr_t blockRef, | |
1604 | Boolean dirty) | |
1605 | { | |
1606 | struct buf *bp = (struct buf *)blockRef; | |
6d2010ae | 1607 | |
39236c6e A |
1608 | if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) |
1609 | KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0); | |
1610 | ||
1611 | if (blockRef == 0) { | |
1612 | if (dirty) | |
1613 | panic("hfs: ReleaseBitmapBlock: missing bp"); | |
6d2010ae A |
1614 | return (0); |
1615 | } | |
39236c6e A |
1616 | |
1617 | if (bp) { | |
1618 | if (dirty) { | |
1619 | // XXXdbg | |
1620 | struct hfsmount *hfsmp = VCBTOHFS(vcb); | |
1621 | ||
1622 | if (hfsmp->jnl) { | |
1623 | journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL); | |
1624 | } else { | |
1625 | buf_bdwrite(bp); | |
1626 | } | |
1627 | } else { | |
1628 | buf_brelse(bp); | |
1629 | } | |
1630 | } | |
1631 | ||
1632 | if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) | |
1633 | KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0); | |
1634 | ||
1635 | return (0); | |
1636 | } | |
1637 | ||
1638 | /* | |
1639 | * ReleaseScanBitmapRange | |
1640 | * | |
1641 | * This is used to release struct bufs that were created for use by | |
1642 | * bitmap scanning code. Because they may be of sizes different than the | |
1643 | * typical runtime manipulation code, we want to force them to be purged out | |
1644 | * of the buffer cache ASAP, so we'll release them differently than in the | |
1645 | * ReleaseBitmapBlock case. | |
1646 | * | |
1647 | * Additionally, because we know that we're only reading the blocks and that they | |
1648 | * should have been clean prior to reading them, we will never | |
1649 | * issue a write to them (thus dirtying them). | |
1650 | */ | |
1651 | ||
1652 | static OSErr ReleaseScanBitmapRange(struct buf *bp ) { | |
1653 | ||
1654 | if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) { | |
1655 | KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, 0, 0, 0, 0, 0); | |
1656 | } | |
1657 | ||
6d2010ae A |
1658 | if (bp) { |
1659 | /* Mark the buffer invalid if it isn't locked, then release it */ | |
1660 | if ((buf_flags(bp) & B_LOCKED) == 0) { | |
1661 | buf_markinvalid(bp); | |
1662 | } | |
1663 | buf_brelse(bp); | |
1664 | } | |
39236c6e A |
1665 | |
1666 | if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) { | |
1667 | KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_SCAN_BITMAP | DBG_FUNC_END, 0, 0, 0, 0, 0); | |
1668 | } | |
1669 | ||
0b4e3aa0 A |
1670 | return (0); |
1671 | } | |
1672 | ||
1c79356b A |
1673 | /* |
1674 | _______________________________________________________________________ | |
1675 | ||
1676 | Routine: BlockAllocateContig | |
1677 | ||
1678 | Function: Allocate a contiguous group of allocation blocks. The | |
1679 | allocation is all-or-nothing. The caller guarantees that | |
1680 | there are enough free blocks (though they may not be | |
1681 | contiguous, in which case this call will fail). | |
1682 | ||
1683 | Inputs: | |
1684 | vcb Pointer to volume where space is to be allocated | |
1685 | startingBlock Preferred first block for allocation | |
1686 | minBlocks Minimum number of contiguous blocks to allocate | |
1687 | maxBlocks Maximum number of contiguous blocks to allocate | |
39236c6e | 1688 | flags |
1c79356b A |
1689 | |
1690 | Outputs: | |
1691 | actualStartBlock First block of range allocated, or 0 if error | |
1692 | actualNumBlocks Number of blocks allocated, or 0 if error | |
1693 | _______________________________________________________________________ | |
1694 | */ | |
1695 | static OSErr BlockAllocateContig( | |
39236c6e A |
1696 | ExtendedVCB *vcb, |
1697 | u_int32_t startingBlock, | |
1698 | u_int32_t minBlocks, | |
1699 | u_int32_t maxBlocks, | |
1700 | u_int32_t flags, | |
1701 | u_int32_t *actualStartBlock, | |
1702 | u_int32_t *actualNumBlocks) | |
1c79356b | 1703 | { |
39236c6e A |
1704 | OSErr retval = noErr; |
1705 | uint32_t currentStart = startingBlock; | |
1c79356b | 1706 | |
39236c6e A |
1707 | uint32_t foundStart = 0; // values to emit to caller |
1708 | uint32_t foundCount = 0; | |
6d2010ae | 1709 | |
39236c6e A |
1710 | uint32_t collision_start = 0; // if we have to re-allocate a recently deleted extent, use this |
1711 | uint32_t collision_count = 0; | |
6d2010ae | 1712 | |
39236c6e A |
1713 | int err; |
1714 | int allowReuse = (flags & HFS_ALLOC_FLUSHTXN); | |
1715 | Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE); | |
6d2010ae | 1716 | |
39236c6e A |
1717 | int recently_deleted = 0; |
1718 | struct hfsmount *hfsmp = VCBTOHFS(vcb); | |
6d2010ae | 1719 | |
6d2010ae | 1720 | if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) |
39236c6e | 1721 | KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0); |
6d2010ae | 1722 | |
39236c6e | 1723 | while ((retval == noErr) && (foundStart == 0) && (foundCount == 0)) { |
6d2010ae | 1724 | |
39236c6e A |
1725 | /* Try and find something that works. */ |
1726 | do { | |
1727 | /* | |
1728 | * NOTE: If the only contiguous free extent of at least minBlocks | |
1729 | * crosses startingBlock (i.e. starts before, ends after), then we | |
1730 | * won't find it. Earlier versions *did* find this case by letting | |
1731 | * the second search look past startingBlock by minBlocks. But | |
1732 | * with the free extent cache, this can lead to duplicate entries | |
1733 | * in the cache, causing the same blocks to be allocated twice. | |
1734 | */ | |
1735 | retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks, | |
1736 | maxBlocks, useMetaZone, true, &foundStart, &foundCount); | |
6d2010ae | 1737 | |
39236c6e A |
1738 | if (retval == dskFulErr && currentStart != 0) { |
1739 | /* | |
1740 | * We constrain the endingBlock so we don't bother looking for ranges | |
1741 | * that would overlap those found in the previous call, if the summary bitmap | |
1742 | * is not on for this volume. If it is, then we assume that it was not trust | |
1743 | * -worthy and do a full scan. | |
1744 | */ | |
1745 | if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { | |
1746 | retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks, | |
1747 | maxBlocks, useMetaZone, false, &foundStart, &foundCount); | |
6d2010ae | 1748 | } |
39236c6e A |
1749 | else { |
1750 | retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks, | |
1751 | maxBlocks, useMetaZone, false, &foundStart, &foundCount); | |
1752 | } | |
1753 | } | |
1754 | } while (0); | |
1755 | ||
1756 | if (retval != noErr) { | |
1757 | goto bailout; | |
6d2010ae | 1758 | } |
39236c6e A |
1759 | |
1760 | /* Do we overlap with the recently found collision extent? */ | |
1761 | if (collision_start) { | |
1762 | if (extents_overlap (foundStart, foundCount, collision_start, collision_count)) { | |
1763 | /* | |
1764 | * We've looped around, and the only thing we could use was the collision extent. | |
1765 | * Since we are allowed to use it, go ahead and do so now. | |
1766 | */ | |
1767 | if(allowReuse) { | |
1768 | /* | |
1769 | * then we couldn't find anything except values which might have been | |
1770 | * recently deallocated. just return our cached value if we are allowed to. | |
1771 | */ | |
1772 | foundStart = collision_start; | |
1773 | foundCount = collision_count; | |
1774 | goto bailout; | |
6d2010ae | 1775 | } |
39236c6e A |
1776 | else { |
1777 | /* Otherwise, we looped around and couldn't find anything that wouldn't require a journal flush. */ | |
1778 | retval = dskFulErr; | |
1779 | goto bailout; | |
1780 | } | |
6d2010ae A |
1781 | } |
1782 | } | |
6d2010ae | 1783 | |
39236c6e A |
1784 | /* OK, we know we must not have collided . See if this one is recently deleted */ |
1785 | if (hfsmp->jnl) { | |
1786 | recently_deleted = 0; | |
1787 | uint32_t nextStart; | |
1788 | err = CheckUnmappedBytes (hfsmp, (uint64_t)foundStart, | |
1789 | (uint64_t) foundCount, &recently_deleted, &nextStart); | |
1790 | if (err == 0) { | |
1791 | if(recently_deleted != 0) { | |
1792 | /* | |
1793 | * these blocks were recently deleted/deallocated. Cache the extent, but | |
1794 | * but keep searching to see if we can find one that won't collide here. | |
1795 | */ | |
1796 | if (collision_start == 0) { | |
1797 | collision_start = foundStart; | |
1798 | collision_count = foundCount; | |
1799 | } | |
1800 | recently_deleted = 0; | |
1801 | ||
1802 | /* | |
1803 | * advance currentStart to the point just past the overlap we just found. Note that | |
1804 | * we will automatically loop around to start of the bitmap as needed. | |
1805 | */ | |
1806 | currentStart = nextStart; | |
1807 | /* Unset foundStart/Count to allow it to loop around again. */ | |
1808 | foundStart = 0; | |
1809 | foundCount = 0; | |
6d2010ae | 1810 | } |
6d2010ae | 1811 | } |
39236c6e A |
1812 | } // end jnl/deleted case |
1813 | ||
1814 | /* | |
1815 | * If we found something good, we'd break out of the loop at the top; foundCount | |
1816 | * and foundStart should be set. | |
6d2010ae | 1817 | */ |
6d2010ae | 1818 | |
39236c6e | 1819 | } // end while loop. |
6d2010ae | 1820 | |
39236c6e A |
1821 | bailout: |
1822 | /* mark the blocks as in-use */ | |
1823 | if (retval == noErr) { | |
1824 | *actualStartBlock = foundStart; | |
1825 | *actualNumBlocks = foundCount; | |
1826 | err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks); | |
1827 | ||
1828 | if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) { | |
1829 | KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_END, *actualStartBlock, *actualNumBlocks, 0, 0, 0); | |
6d2010ae A |
1830 | } |
1831 | } | |
6d2010ae | 1832 | |
39236c6e A |
1833 | return retval; |
1834 | ||
1835 | } | |
6d2010ae | 1836 | |
1c79356b | 1837 | |
1c79356b A |
1838 | /* |
1839 | _______________________________________________________________________ | |
1840 | ||
1841 | Routine: BlockAllocateAny | |
1842 | ||
1843 | Function: Allocate one or more allocation blocks. If there are fewer | |
1844 | free blocks than requested, all free blocks will be | |
1845 | allocated. The caller guarantees that there is at least | |
1846 | one free block. | |
1847 | ||
1848 | Inputs: | |
1849 | vcb Pointer to volume where space is to be allocated | |
1850 | startingBlock Preferred first block for allocation | |
1851 | endingBlock Last block to check + 1 | |
1852 | maxBlocks Maximum number of contiguous blocks to allocate | |
55e303ae | 1853 | useMetaZone |
1c79356b A |
1854 | |
1855 | Outputs: | |
1856 | actualStartBlock First block of range allocated, or 0 if error | |
1857 | actualNumBlocks Number of blocks allocated, or 0 if error | |
1858 | _______________________________________________________________________ | |
1859 | */ | |
6d2010ae | 1860 | |
0b4e3aa0 | 1861 | static OSErr BlockAllocateAny( |
39236c6e A |
1862 | ExtendedVCB *vcb, |
1863 | u_int32_t startingBlock, | |
1864 | register u_int32_t endingBlock, | |
1865 | u_int32_t maxBlocks, | |
1866 | u_int32_t flags, | |
1867 | Boolean trustSummary, | |
1868 | u_int32_t *actualStartBlock, | |
1869 | u_int32_t *actualNumBlocks) | |
1c79356b | 1870 | { |
6d2010ae | 1871 | |
39236c6e A |
1872 | /* |
1873 | * If it is enabled, scan through the summary table to find the first free block. | |
1874 | * | |
1875 | * If it reports that there are not any free blocks, we could have a false | |
1876 | * positive, so in that case, use the input arguments as a pass through. | |
1877 | */ | |
1878 | uint32_t start_blk = startingBlock; | |
1879 | uint32_t end_blk = endingBlock; | |
1880 | struct hfsmount *hfsmp; | |
1881 | OSErr err; | |
6d2010ae | 1882 | |
39236c6e A |
1883 | hfsmp = (struct hfsmount*)vcb; |
1884 | if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { | |
1885 | uint32_t suggested_start; | |
1886 | ||
1887 | /* | |
1888 | * If the summary table is enabled, scan through it to find the first free | |
1889 | * block. If there was an error, or we couldn't find anything free in the | |
1890 | * summary table, then just leave the start_blk fields unmodified. We wouldn't | |
1891 | * have gotten to this point if the mount point made it look like there was possibly | |
1892 | * free space in the FS. | |
1893 | */ | |
1894 | err = hfs_find_summary_free (hfsmp, startingBlock, &suggested_start); | |
1895 | if (err == 0) { | |
1896 | start_blk = suggested_start; | |
1897 | } | |
1898 | else { | |
1899 | /* Differentiate between ENOSPC and a more esoteric error in the above call. */ | |
1900 | if ((err == ENOSPC) && (trustSummary)) { | |
1901 | /* | |
1902 | * The 'trustSummary' argument is for doing a full scan if we really | |
1903 | * really, need the space and we think it's somewhere but can't find it in the | |
1904 | * summary table. If it's true, then we trust the summary table and return | |
1905 | * dskFulErr if we couldn't find it above. | |
1906 | */ | |
1907 | return dskFulErr; | |
1908 | } | |
1909 | /* | |
1910 | * If either trustSummary was false or we got a different errno, then we | |
1911 | * want to fall through to the real bitmap single i/o code... | |
1912 | */ | |
1913 | } | |
1914 | } | |
1915 | ||
1916 | err = BlockAllocateAnyBitmap(vcb, start_blk, end_blk, maxBlocks, | |
1917 | flags, actualStartBlock, actualNumBlocks); | |
6d2010ae | 1918 | |
6d2010ae | 1919 | return err; |
6d2010ae | 1920 | } |
39236c6e | 1921 | |
6d2010ae A |
1922 | |
1923 | /* | |
1924 | * BlockAllocateAnyBitmap finds free ranges by scanning the bitmap to figure out | |
1925 | * where the free allocation blocks are. Inputs and outputs are the same as for | |
1926 | * BlockAllocateAny and BlockAllocateAnyRBTree | |
1927 | */ | |
1928 | ||
1929 | static OSErr BlockAllocateAnyBitmap( | |
39236c6e A |
1930 | ExtendedVCB *vcb, |
1931 | u_int32_t startingBlock, | |
1932 | register u_int32_t endingBlock, | |
1933 | u_int32_t maxBlocks, | |
1934 | u_int32_t flags, | |
1935 | u_int32_t *actualStartBlock, | |
1936 | u_int32_t *actualNumBlocks) | |
6d2010ae | 1937 | { |
1c79356b | 1938 | OSErr err; |
2d21ac55 A |
1939 | register u_int32_t block; // current block number |
1940 | register u_int32_t currentWord; // Pointer to current word within bitmap block | |
1941 | register u_int32_t bitMask; // Word with given bits already set (ready to OR in) | |
1942 | register u_int32_t wordsLeft; // Number of words left in this bitmap block | |
1943 | u_int32_t *buffer = NULL; | |
1944 | u_int32_t *currCache = NULL; | |
b0d623f7 | 1945 | uintptr_t blockRef; |
2d21ac55 A |
1946 | u_int32_t bitsPerBlock; |
1947 | u_int32_t wordsPerBlock; | |
0b4e3aa0 | 1948 | Boolean dirty = false; |
b4c24cb9 | 1949 | struct hfsmount *hfsmp = VCBTOHFS(vcb); |
39236c6e A |
1950 | uint32_t summary_block_scan = 0; |
1951 | Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE); | |
1952 | Boolean forceFlush = (flags & HFS_ALLOC_FLUSHTXN); | |
1c79356b | 1953 | |
6d2010ae A |
1954 | if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) |
1955 | KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0); | |
1956 | ||
39236c6e | 1957 | restartSearchAny: |
2d21ac55 A |
1958 | /* |
1959 | * When we're skipping the metadata zone and the start/end | |
1960 | * range overlaps with the metadata zone then adjust the | |
1961 | * start to be outside of the metadata zone. If the range | |
1962 | * is entirely inside the metadata zone then we can deny the | |
1963 | * request (dskFulErr). | |
1964 | */ | |
1965 | if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) { | |
1966 | if (startingBlock <= vcb->hfs_metazone_end) { | |
1967 | if (endingBlock > (vcb->hfs_metazone_end + 2)) | |
1968 | startingBlock = vcb->hfs_metazone_end + 1; | |
1969 | else { | |
1970 | err = dskFulErr; | |
1971 | goto Exit; | |
1972 | } | |
1973 | } | |
1974 | } | |
1975 | ||
1c79356b A |
1976 | // Since this routine doesn't wrap around |
1977 | if (maxBlocks > (endingBlock - startingBlock)) { | |
1978 | maxBlocks = endingBlock - startingBlock; | |
1979 | } | |
1980 | ||
1c79356b A |
1981 | // |
1982 | // Pre-read the first bitmap block | |
1983 | // | |
55e303ae | 1984 | err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef); |
1c79356b | 1985 | if (err != noErr) goto Exit; |
55e303ae | 1986 | buffer = currCache; |
1c79356b A |
1987 | |
1988 | // | |
1989 | // Set up the current position within the block | |
1990 | // | |
1991 | { | |
2d21ac55 | 1992 | u_int32_t wordIndexInBlock; |
39236c6e | 1993 | |
0b4e3aa0 A |
1994 | bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte; |
1995 | wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord; | |
1c79356b | 1996 | |
0b4e3aa0 | 1997 | wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord; |
1c79356b | 1998 | buffer += wordIndexInBlock; |
0b4e3aa0 | 1999 | wordsLeft = wordsPerBlock - wordIndexInBlock; |
1c79356b A |
2000 | currentWord = SWAP_BE32 (*buffer); |
2001 | bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask); | |
2002 | } | |
39236c6e A |
2003 | |
2004 | /* | |
2005 | * While loop 1: | |
2006 | * Find the first unallocated block starting at 'block' | |
2007 | */ | |
1c79356b A |
2008 | block=startingBlock; |
2009 | while (block < endingBlock) { | |
2010 | if ((currentWord & bitMask) == 0) | |
2011 | break; | |
55e303ae | 2012 | |
1c79356b A |
2013 | // Next bit |
2014 | ++block; | |
2015 | bitMask >>= 1; | |
2016 | if (bitMask == 0) { | |
2017 | // Next word | |
2018 | bitMask = kHighBitInWordMask; | |
2019 | ++buffer; | |
55e303ae | 2020 | |
1c79356b A |
2021 | if (--wordsLeft == 0) { |
2022 | // Next block | |
55e303ae | 2023 | buffer = currCache = NULL; |
39236c6e A |
2024 | if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { |
2025 | /* | |
2026 | * If summary_block_scan is non-zero, then we must have | |
2027 | * pulled a bitmap file block into core, and scanned through | |
2028 | * the entire thing. Because we're in this loop, we are | |
2029 | * implicitly trusting that the bitmap didn't have any knowledge | |
2030 | * about this particular block. As a result, update the bitmap | |
2031 | * (lazily, now that we've scanned it) with our findings that | |
2032 | * this particular block is completely used up. | |
2033 | */ | |
2034 | if (summary_block_scan != 0) { | |
2035 | uint32_t summary_bit; | |
2036 | (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit); | |
2037 | hfs_set_summary (hfsmp, summary_bit, 1); | |
2038 | summary_block_scan = 0; | |
2039 | } | |
2040 | } | |
2041 | ||
0b4e3aa0 A |
2042 | err = ReleaseBitmapBlock(vcb, blockRef, false); |
2043 | if (err != noErr) goto Exit; | |
2044 | ||
55e303ae A |
2045 | /* |
2046 | * Skip over metadata blocks. | |
2047 | */ | |
2048 | if (!useMetaZone) { | |
2049 | block = NextBitmapBlock(vcb, block); | |
55e303ae | 2050 | } |
2d21ac55 A |
2051 | if (block >= endingBlock) { |
2052 | err = dskFulErr; | |
2053 | goto Exit; | |
2054 | } | |
2055 | ||
55e303ae | 2056 | err = ReadBitmapBlock(vcb, block, &currCache, &blockRef); |
1c79356b | 2057 | if (err != noErr) goto Exit; |
55e303ae | 2058 | buffer = currCache; |
39236c6e | 2059 | summary_block_scan = block; |
0b4e3aa0 | 2060 | wordsLeft = wordsPerBlock; |
1c79356b | 2061 | } |
1c79356b A |
2062 | currentWord = SWAP_BE32 (*buffer); |
2063 | } | |
2064 | } | |
2065 | ||
2066 | // Did we get to the end of the bitmap before finding a free block? | |
2067 | // If so, then couldn't allocate anything. | |
55e303ae | 2068 | if (block >= endingBlock) { |
1c79356b A |
2069 | err = dskFulErr; |
2070 | goto Exit; | |
2071 | } | |
2072 | ||
39236c6e A |
2073 | |
2074 | /* | |
2075 | * Don't move forward just yet. Verify that either one of the following | |
2076 | * two conditions is true: | |
2077 | * 1) journaling is not enabled | |
2078 | * 2) block is not currently on any pending TRIM list. | |
2079 | */ | |
2080 | if (hfsmp->jnl != NULL && (forceFlush == false)) { | |
2081 | int recently_deleted = 0; | |
2082 | uint32_t nextblk; | |
2083 | err = CheckUnmappedBytes (hfsmp, (uint64_t) block, 1, &recently_deleted, &nextblk); | |
2084 | if ((err == 0) && (recently_deleted)) { | |
2085 | ||
2086 | /* release the bitmap block & unset currCache. we may jump past it. */ | |
2087 | err = ReleaseBitmapBlock(vcb, blockRef, false); | |
2088 | currCache = NULL; | |
2089 | if (err != noErr) { | |
2090 | goto Exit; | |
2091 | } | |
2092 | /* set our start to nextblk, and re-do the search. */ | |
2093 | startingBlock = nextblk; | |
2094 | goto restartSearchAny; | |
2095 | } | |
2096 | } | |
2097 | ||
2098 | ||
2099 | // Return the first block in the allocated range | |
2100 | *actualStartBlock = block; | |
0b4e3aa0 | 2101 | dirty = true; |
39236c6e | 2102 | |
1c79356b A |
2103 | // If we could get the desired number of blocks before hitting endingBlock, |
2104 | // then adjust endingBlock so we won't keep looking. Ideally, the comparison | |
2105 | // would be (block + maxBlocks) < endingBlock, but that could overflow. The | |
2106 | // comparison below yields identical results, but without overflow. | |
2107 | if (block < (endingBlock-maxBlocks)) { | |
2108 | endingBlock = block + maxBlocks; // if we get this far, we've found enough | |
2109 | } | |
b4c24cb9 | 2110 | |
39236c6e A |
2111 | /* |
2112 | * While loop 2: | |
2113 | * Scan the bitmap, starting at 'currentWord' in the current | |
2114 | * bitmap block. Continue iterating through the bitmap until | |
2115 | * either we hit an allocated block, or until we have accumuluated | |
2116 | * maxBlocks worth of bitmap. | |
2117 | */ | |
2118 | ||
2119 | /* Continue until we see an allocated block */ | |
2120 | while ((currentWord & bitMask) == 0) { | |
1c79356b A |
2121 | // Move to the next block. If no more, then exit. |
2122 | ++block; | |
39236c6e | 2123 | if (block == endingBlock) { |
1c79356b | 2124 | break; |
39236c6e | 2125 | } |
1c79356b A |
2126 | |
2127 | // Next bit | |
2128 | bitMask >>= 1; | |
2129 | if (bitMask == 0) { | |
1c79356b A |
2130 | // Next word |
2131 | bitMask = kHighBitInWordMask; | |
2132 | ++buffer; | |
39236c6e | 2133 | |
1c79356b A |
2134 | if (--wordsLeft == 0) { |
2135 | // Next block | |
55e303ae | 2136 | buffer = currCache = NULL; |
39236c6e A |
2137 | |
2138 | /* We're only reading the bitmap here, so mark it as clean */ | |
2139 | err = ReleaseBitmapBlock(vcb, blockRef, false); | |
2140 | if (err != noErr) { | |
2141 | goto Exit; | |
2142 | } | |
0b4e3aa0 | 2143 | |
55e303ae A |
2144 | /* |
2145 | * Skip over metadata blocks. | |
2146 | */ | |
2147 | if (!useMetaZone) { | |
2d21ac55 | 2148 | u_int32_t nextBlock; |
55e303ae A |
2149 | nextBlock = NextBitmapBlock(vcb, block); |
2150 | if (nextBlock != block) { | |
2151 | goto Exit; /* allocation gap, so stop */ | |
2152 | } | |
2153 | } | |
2154 | ||
39236c6e A |
2155 | if (block >= endingBlock) { |
2156 | goto Exit; | |
2157 | } | |
1c79356b | 2158 | |
39236c6e A |
2159 | err = ReadBitmapBlock(vcb, block, &currCache, &blockRef); |
2160 | if (err != noErr) { | |
2161 | goto Exit; | |
b4c24cb9 | 2162 | } |
39236c6e | 2163 | buffer = currCache; |
0b4e3aa0 | 2164 | wordsLeft = wordsPerBlock; |
1c79356b | 2165 | } |
1c79356b A |
2166 | currentWord = SWAP_BE32 (*buffer); |
2167 | } | |
2168 | } | |
1c79356b A |
2169 | |
2170 | Exit: | |
39236c6e A |
2171 | if (currCache) { |
2172 | /* Release the bitmap reference prior to marking bits in-use */ | |
2173 | (void) ReleaseBitmapBlock(vcb, blockRef, false); | |
2174 | currCache = NULL; | |
2175 | } | |
2176 | ||
1c79356b A |
2177 | if (err == noErr) { |
2178 | *actualNumBlocks = block - *actualStartBlock; | |
39236c6e | 2179 | |
060df5ea A |
2180 | // sanity check |
2181 | if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) { | |
2182 | panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN); | |
2183 | } | |
39236c6e A |
2184 | |
2185 | /* Mark the bits found as in-use */ | |
2186 | err = BlockMarkAllocatedInternal (vcb, *actualStartBlock, *actualNumBlocks); | |
1c79356b A |
2187 | } |
2188 | else { | |
2189 | *actualStartBlock = 0; | |
2190 | *actualNumBlocks = 0; | |
2191 | } | |
316670eb | 2192 | |
6d2010ae A |
2193 | if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) |
2194 | KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); | |
2195 | ||
1c79356b A |
2196 | return err; |
2197 | } | |
2198 | ||
2199 | ||
0b4e3aa0 A |
2200 | /* |
2201 | _______________________________________________________________________ | |
2202 | ||
2203 | Routine: BlockAllocateKnown | |
2204 | ||
2205 | Function: Try to allocate space from known free space in the free | |
2206 | extent cache. | |
2207 | ||
2208 | Inputs: | |
2209 | vcb Pointer to volume where space is to be allocated | |
2210 | maxBlocks Maximum number of contiguous blocks to allocate | |
2211 | ||
2212 | Outputs: | |
2213 | actualStartBlock First block of range allocated, or 0 if error | |
2214 | actualNumBlocks Number of blocks allocated, or 0 if error | |
2215 | ||
2216 | Returns: | |
2217 | dskFulErr Free extent cache is empty | |
2218 | _______________________________________________________________________ | |
2219 | */ | |
b0d623f7 | 2220 | |
0b4e3aa0 | 2221 | static OSErr BlockAllocateKnown( |
39236c6e A |
2222 | ExtendedVCB *vcb, |
2223 | u_int32_t maxBlocks, | |
2224 | u_int32_t *actualStartBlock, | |
2225 | u_int32_t *actualNumBlocks) | |
0b4e3aa0 | 2226 | { |
2d21ac55 | 2227 | OSErr err; |
2d21ac55 | 2228 | u_int32_t foundBlocks; |
39236c6e | 2229 | struct hfsmount *hfsmp = VCBTOHFS(vcb); |
b0d623f7 | 2230 | |
6d2010ae A |
2231 | if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) |
2232 | KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0); | |
2233 | ||
39236c6e | 2234 | hfs_lock_mount (hfsmp); |
6d2010ae | 2235 | lck_spin_lock(&vcb->vcbFreeExtLock); |
39236c6e A |
2236 | if ( vcb->vcbFreeExtCnt == 0 || |
2237 | vcb->vcbFreeExt[0].blockCount == 0) { | |
6d2010ae | 2238 | lck_spin_unlock(&vcb->vcbFreeExtLock); |
39236c6e | 2239 | hfs_unlock_mount(hfsmp); |
6d2010ae A |
2240 | if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) |
2241 | KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0); | |
0b4e3aa0 | 2242 | return dskFulErr; |
d1ecb069 | 2243 | } |
6d2010ae | 2244 | lck_spin_unlock(&vcb->vcbFreeExtLock); |
39236c6e | 2245 | hfs_unlock_mount(hfsmp); |
0b4e3aa0 | 2246 | |
6d2010ae A |
2247 | lck_spin_lock(&vcb->vcbFreeExtLock); |
2248 | ||
0b4e3aa0 A |
2249 | // Just grab up to maxBlocks of the first (largest) free exent. |
2250 | *actualStartBlock = vcb->vcbFreeExt[0].startBlock; | |
2251 | foundBlocks = vcb->vcbFreeExt[0].blockCount; | |
2252 | if (foundBlocks > maxBlocks) | |
2253 | foundBlocks = maxBlocks; | |
2254 | *actualNumBlocks = foundBlocks; | |
39236c6e | 2255 | |
316670eb | 2256 | lck_spin_unlock(&vcb->vcbFreeExtLock); |
b0d623f7 | 2257 | |
316670eb | 2258 | remove_free_extent_cache(vcb, *actualStartBlock, *actualNumBlocks); |
39236c6e | 2259 | |
55e303ae | 2260 | // sanity check |
2d21ac55 | 2261 | if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) |
0c530ab8 | 2262 | { |
2d21ac55 | 2263 | printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN); |
fe8ab488 | 2264 | hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED); |
6601e61a A |
2265 | *actualStartBlock = 0; |
2266 | *actualNumBlocks = 0; | |
2267 | err = EIO; | |
2d21ac55 A |
2268 | } |
2269 | else | |
6601e61a A |
2270 | { |
2271 | // | |
2272 | // Now mark the found extent in the bitmap | |
2273 | // | |
6d2010ae | 2274 | err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks); |
6601e61a | 2275 | } |
2d21ac55 | 2276 | |
6d2010ae A |
2277 | sanity_check_free_ext(vcb, 0); |
2278 | ||
2279 | if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) | |
2280 | KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); | |
b0d623f7 | 2281 | |
2d21ac55 | 2282 | return err; |
0b4e3aa0 A |
2283 | } |
2284 | ||
6d2010ae A |
2285 | /* |
2286 | * BlockMarkAllocated | |
2287 | * | |
2288 | * This is a wrapper function around the internal calls which will actually mark the blocks | |
2289 | * as in-use. It will mark the blocks in the red-black tree if appropriate. We need to do | |
2290 | * this logic here to avoid callers having to deal with whether or not the red-black tree | |
2291 | * is enabled. | |
2292 | */ | |
2293 | ||
6d2010ae | 2294 | OSErr BlockMarkAllocated( |
39236c6e A |
2295 | ExtendedVCB *vcb, |
2296 | u_int32_t startingBlock, | |
2297 | register u_int32_t numBlocks) | |
6d2010ae A |
2298 | { |
2299 | struct hfsmount *hfsmp; | |
6d2010ae | 2300 | |
39236c6e | 2301 | hfsmp = VCBTOHFS(vcb); |
6d2010ae A |
2302 | |
2303 | return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks); | |
39236c6e | 2304 | |
6d2010ae A |
2305 | } |
2306 | ||
0b4e3aa0 | 2307 | |
1c79356b A |
2308 | |
2309 | /* | |
2310 | _______________________________________________________________________ | |
2311 | ||
6d2010ae | 2312 | Routine: BlockMarkAllocatedInternal |
1c79356b A |
2313 | |
2314 | Function: Mark a contiguous group of blocks as allocated (set in the | |
2315 | bitmap). It assumes those bits are currently marked | |
6d2010ae A |
2316 | deallocated (clear in the bitmap). Note that this function |
2317 | must be called regardless of whether or not the bitmap or | |
2318 | tree-based allocator is used, as all allocations must correctly | |
2319 | be marked on-disk. If the tree-based approach is running, then | |
2320 | this will be done before the node is removed from the tree. | |
1c79356b A |
2321 | |
2322 | Inputs: | |
2323 | vcb Pointer to volume where space is to be allocated | |
2324 | startingBlock First block number to mark as allocated | |
2325 | numBlocks Number of blocks to mark as allocated | |
2326 | _______________________________________________________________________ | |
2327 | */ | |
6d2010ae A |
2328 | static |
2329 | OSErr BlockMarkAllocatedInternal ( | |
39236c6e A |
2330 | ExtendedVCB *vcb, |
2331 | u_int32_t startingBlock, | |
2332 | register u_int32_t numBlocks) | |
1c79356b A |
2333 | { |
2334 | OSErr err; | |
2d21ac55 A |
2335 | register u_int32_t *currentWord; // Pointer to current word within bitmap block |
2336 | register u_int32_t wordsLeft; // Number of words left in this bitmap block | |
2337 | register u_int32_t bitMask; // Word with given bits already set (ready to OR in) | |
2338 | u_int32_t firstBit; // Bit index within word of first bit to allocate | |
2339 | u_int32_t numBits; // Number of bits in word to allocate | |
2340 | u_int32_t *buffer = NULL; | |
b0d623f7 | 2341 | uintptr_t blockRef; |
2d21ac55 A |
2342 | u_int32_t bitsPerBlock; |
2343 | u_int32_t wordsPerBlock; | |
b4c24cb9 A |
2344 | // XXXdbg |
2345 | struct hfsmount *hfsmp = VCBTOHFS(vcb); | |
1c79356b | 2346 | |
6d2010ae A |
2347 | if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) |
2348 | KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0); | |
2349 | ||
39236c6e A |
2350 | int force_flush = 0; |
2351 | /* | |
2352 | * Since we are about to mark these bits as in-use | |
2353 | * in the bitmap, decide if we need to alert the caller | |
2354 | * that a journal flush might be appropriate. It's safe to | |
2355 | * poke at the journal pointer here since we MUST have | |
2356 | * called start_transaction by the time this function is invoked. | |
2357 | * If the journal is enabled, then it will have taken the requisite | |
2358 | * journal locks. If it is not enabled, then we have taken | |
2359 | * a shared lock on the global lock. | |
2360 | */ | |
2361 | if (hfsmp->jnl) { | |
2362 | uint32_t ignore; | |
2363 | err = CheckUnmappedBytes (hfsmp, (uint64_t) startingBlock, (uint64_t)numBlocks, &force_flush, &ignore); | |
2364 | if ((err == 0) && (force_flush)) { | |
2365 | journal_request_immediate_flush (hfsmp->jnl); | |
2366 | } | |
2367 | } | |
2368 | ||
6d2010ae | 2369 | hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks); |
39236c6e | 2370 | |
1c79356b A |
2371 | // |
2372 | // Pre-read the bitmap block containing the first word of allocation | |
2373 | // | |
2374 | ||
0b4e3aa0 | 2375 | err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); |
1c79356b | 2376 | if (err != noErr) goto Exit; |
1c79356b A |
2377 | // |
2378 | // Initialize currentWord, and wordsLeft. | |
2379 | // | |
2380 | { | |
2d21ac55 | 2381 | u_int32_t wordIndexInBlock; |
39236c6e | 2382 | |
0b4e3aa0 A |
2383 | bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte; |
2384 | wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord; | |
1c79356b | 2385 | |
0b4e3aa0 | 2386 | wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord; |
1c79356b | 2387 | currentWord = buffer + wordIndexInBlock; |
0b4e3aa0 | 2388 | wordsLeft = wordsPerBlock - wordIndexInBlock; |
1c79356b | 2389 | } |
39236c6e | 2390 | |
b4c24cb9 A |
2391 | // XXXdbg |
2392 | if (hfsmp->jnl) { | |
2393 | journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); | |
2394 | } | |
2395 | ||
1c79356b A |
2396 | // |
2397 | // If the first block to allocate doesn't start on a word | |
2398 | // boundary in the bitmap, then treat that first word | |
2399 | // specially. | |
2400 | // | |
2401 | ||
2402 | firstBit = startingBlock % kBitsPerWord; | |
2403 | if (firstBit != 0) { | |
2404 | bitMask = kAllBitsSetInWord >> firstBit; // turn off all bits before firstBit | |
2405 | numBits = kBitsPerWord - firstBit; // number of remaining bits in this word | |
2406 | if (numBits > numBlocks) { | |
2407 | numBits = numBlocks; // entire allocation is inside this one word | |
2408 | bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); // turn off bits after last | |
2409 | } | |
2410 | #if DEBUG_BUILD | |
2411 | if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { | |
6d2010ae | 2412 | panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!"); |
1c79356b A |
2413 | } |
2414 | #endif | |
2415 | *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap | |
2416 | numBlocks -= numBits; // adjust number of blocks left to allocate | |
2417 | ||
2418 | ++currentWord; // move to next word | |
2419 | --wordsLeft; // one less word left in this block | |
2420 | } | |
2421 | ||
2422 | // | |
2423 | // Allocate whole words (32 blocks) at a time. | |
2424 | // | |
2425 | ||
2426 | bitMask = kAllBitsSetInWord; // put this in a register for 68K | |
2427 | while (numBlocks >= kBitsPerWord) { | |
2428 | if (wordsLeft == 0) { | |
2429 | // Read in the next bitmap block | |
0b4e3aa0 | 2430 | startingBlock += bitsPerBlock; // generate a block number in the next bitmap block |
39236c6e | 2431 | |
1c79356b | 2432 | buffer = NULL; |
0b4e3aa0 | 2433 | err = ReleaseBitmapBlock(vcb, blockRef, true); |
1c79356b | 2434 | if (err != noErr) goto Exit; |
0b4e3aa0 A |
2435 | |
2436 | err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); | |
2437 | if (err != noErr) goto Exit; | |
2438 | ||
b4c24cb9 A |
2439 | // XXXdbg |
2440 | if (hfsmp->jnl) { | |
2441 | journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); | |
2442 | } | |
2443 | ||
1c79356b A |
2444 | // Readjust currentWord and wordsLeft |
2445 | currentWord = buffer; | |
0b4e3aa0 | 2446 | wordsLeft = wordsPerBlock; |
1c79356b A |
2447 | } |
2448 | #if DEBUG_BUILD | |
2449 | if (*currentWord != 0) { | |
6d2010ae | 2450 | panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!"); |
1c79356b A |
2451 | } |
2452 | #endif | |
2453 | *currentWord = SWAP_BE32 (bitMask); | |
2454 | numBlocks -= kBitsPerWord; | |
2455 | ||
2456 | ++currentWord; // move to next word | |
2457 | --wordsLeft; // one less word left in this block | |
2458 | } | |
39236c6e | 2459 | |
1c79356b A |
2460 | // |
2461 | // Allocate any remaining blocks. | |
2462 | // | |
39236c6e | 2463 | |
1c79356b A |
2464 | if (numBlocks != 0) { |
2465 | bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits | |
2466 | if (wordsLeft == 0) { | |
2467 | // Read in the next bitmap block | |
0b4e3aa0 | 2468 | startingBlock += bitsPerBlock; // generate a block number in the next bitmap block |
39236c6e | 2469 | |
1c79356b | 2470 | buffer = NULL; |
0b4e3aa0 | 2471 | err = ReleaseBitmapBlock(vcb, blockRef, true); |
1c79356b | 2472 | if (err != noErr) goto Exit; |
0b4e3aa0 A |
2473 | |
2474 | err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); | |
2475 | if (err != noErr) goto Exit; | |
2476 | ||
b4c24cb9 A |
2477 | // XXXdbg |
2478 | if (hfsmp->jnl) { | |
2479 | journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); | |
2480 | } | |
39236c6e | 2481 | |
1c79356b A |
2482 | // Readjust currentWord and wordsLeft |
2483 | currentWord = buffer; | |
0b4e3aa0 | 2484 | wordsLeft = wordsPerBlock; |
1c79356b A |
2485 | } |
2486 | #if DEBUG_BUILD | |
2487 | if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { | |
6d2010ae | 2488 | panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!"); |
1c79356b A |
2489 | } |
2490 | #endif | |
2491 | *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap | |
2492 | ||
2493 | // No need to update currentWord or wordsLeft | |
2494 | } | |
2495 | ||
2496 | Exit: | |
2497 | ||
0b4e3aa0 A |
2498 | if (buffer) |
2499 | (void)ReleaseBitmapBlock(vcb, blockRef, true); | |
1c79356b | 2500 | |
6d2010ae A |
2501 | if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) |
2502 | KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0); | |
2503 | ||
1c79356b A |
2504 | return err; |
2505 | } | |
2506 | ||
6d2010ae A |
2507 | |
2508 | /* | |
2509 | * BlockMarkFree | |
2510 | * | |
2511 | * This is a wrapper function around the internal calls which will actually mark the blocks | |
2512 | * as freed. It will mark the blocks in the red-black tree if appropriate. We need to do | |
2513 | * this logic here to avoid callers having to deal with whether or not the red-black tree | |
2514 | * is enabled. | |
2515 | * | |
2516 | */ | |
2517 | OSErr BlockMarkFree( | |
39236c6e A |
2518 | ExtendedVCB *vcb, |
2519 | u_int32_t startingBlock, | |
2520 | register u_int32_t numBlocks) | |
6d2010ae A |
2521 | { |
2522 | struct hfsmount *hfsmp; | |
2523 | hfsmp = VCBTOHFS(vcb); | |
39236c6e | 2524 | |
6d2010ae | 2525 | return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true); |
6d2010ae A |
2526 | } |
2527 | ||
2528 | ||
2529 | /* | |
2530 | * BlockMarkFreeUnused | |
2531 | * | |
2532 | * Scan the bitmap block beyond end of current file system for bits | |
2533 | * that are marked as used. If any of the bits are marked as used, | |
2534 | * this function marks them free. | |
2535 | * | |
2536 | * Note: This was specifically written to mark all bits beyond | |
2537 | * end of current file system during hfs_extendfs(), which makes | |
2538 | * sure that all the new blocks added to the file system are | |
2539 | * marked as free. We expect that all the blocks beyond end of | |
2540 | * current file system are always marked as free, but there might | |
2541 | * be cases where are marked as used. This function assumes that | |
2542 | * the number of blocks marked as used incorrectly are relatively | |
2543 | * small, otherwise this can overflow journal transaction size | |
2544 | * on certain file system configurations (example, large unused | |
2545 | * bitmap with relatively small journal). | |
2546 | * | |
2547 | * Input: | |
2548 | * startingBlock: First block of the range to mark unused | |
2549 | * numBlocks: Number of blocks in the range to mark unused | |
2550 | * | |
2551 | * Returns: zero on success, non-zero on error. | |
2552 | */ | |
2553 | OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t numBlocks) | |
2554 | { | |
2555 | int error = 0; | |
2556 | struct hfsmount *hfsmp = VCBTOHFS(vcb); | |
2557 | u_int32_t curNumBlocks; | |
2558 | u_int32_t bitsPerBlock; | |
2559 | u_int32_t lastBit; | |
2560 | ||
2561 | /* Use the optimal bitmap I/O size instead of bitmap block size */ | |
2562 | bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte; | |
2563 | ||
2564 | /* | |
2565 | * First clear any non bitmap allocation block aligned bits | |
2566 | * | |
2567 | * Calculate the first bit in the bitmap block next to | |
2568 | * the bitmap block containing the bit for startingBlock. | |
2569 | * Using this value, we calculate the total number of | |
2570 | * bits to be marked unused from startingBlock to the | |
2571 | * end of bitmap block containing startingBlock. | |
2572 | */ | |
2573 | lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock; | |
2574 | curNumBlocks = lastBit - startingBlock; | |
2575 | if (curNumBlocks > numBlocks) { | |
2576 | curNumBlocks = numBlocks; | |
2577 | } | |
2578 | error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false); | |
2579 | if (error) { | |
2580 | return error; | |
2581 | } | |
2582 | startingBlock += curNumBlocks; | |
2583 | numBlocks -= curNumBlocks; | |
2584 | ||
2585 | /* | |
2586 | * Check a full bitmap block for any 'used' bit. If any bit is used, | |
2587 | * mark all the bits only in that bitmap block as free. This ensures | |
2588 | * that we do not write unmodified bitmap blocks and do not | |
2589 | * overwhelm the journal. | |
2590 | * | |
2591 | * The code starts by checking full bitmap block at a time, and | |
2592 | * marks entire bitmap block as free only if any bit in that bitmap | |
2593 | * block is marked as used. In the end, it handles the last bitmap | |
2594 | * block which might be partially full by only checking till the | |
2595 | * caller-specified last bit and if any bit is set, only mark that | |
2596 | * range as free. | |
2597 | */ | |
2598 | while (numBlocks) { | |
2599 | if (numBlocks >= bitsPerBlock) { | |
2600 | curNumBlocks = bitsPerBlock; | |
2601 | } else { | |
2602 | curNumBlocks = numBlocks; | |
2603 | } | |
2604 | if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) { | |
2605 | error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false); | |
2606 | if (error) { | |
2607 | return error; | |
2608 | } | |
2609 | } | |
2610 | startingBlock += curNumBlocks; | |
2611 | numBlocks -= curNumBlocks; | |
2612 | } | |
2613 | ||
2614 | return error; | |
2615 | } | |
2616 | ||
2617 | /* | |
2618 | _______________________________________________________________________ | |
2619 | ||
2620 | Routine: BlockMarkFreeInternal | |
2621 | ||
2622 | Function: Mark a contiguous group of blocks as free (clear in the | |
2623 | bitmap). It assumes those bits are currently marked | |
2624 | allocated (set in the bitmap). | |
2625 | ||
2626 | Inputs: | |
2627 | vcb Pointer to volume where space is to be freed | |
2628 | startingBlock First block number to mark as freed | |
2629 | numBlocks Number of blocks to mark as freed | |
2630 | do_validate If true, validate that the blocks being | |
2631 | deallocated to check if they are within totalBlocks | |
2632 | for current volume and whether they were allocated | |
2633 | before they are marked free. | |
2634 | _______________________________________________________________________ | |
2635 | */ | |
2636 | static | |
2637 | OSErr BlockMarkFreeInternal( | |
39236c6e A |
2638 | ExtendedVCB *vcb, |
2639 | u_int32_t startingBlock_in, | |
2640 | register u_int32_t numBlocks_in, | |
2641 | Boolean do_validate) | |
6d2010ae | 2642 | { |
316670eb | 2643 | OSErr err; |
6d2010ae A |
2644 | u_int32_t startingBlock = startingBlock_in; |
2645 | u_int32_t numBlocks = numBlocks_in; | |
316670eb A |
2646 | uint32_t unmapStart = startingBlock_in; |
2647 | uint32_t unmapCount = numBlocks_in; | |
2648 | uint32_t wordIndexInBlock; | |
2649 | u_int32_t *currentWord; // Pointer to current word within bitmap block | |
2650 | u_int32_t wordsLeft; // Number of words left in this bitmap block | |
2651 | u_int32_t bitMask; // Word with given bits already set (ready to OR in) | |
2652 | u_int32_t currentBit; // Bit index within word of current bit to allocate | |
2653 | u_int32_t numBits; // Number of bits in word to allocate | |
2654 | u_int32_t *buffer = NULL; | |
2655 | uintptr_t blockRef; | |
2656 | u_int32_t bitsPerBlock; | |
2657 | u_int32_t wordsPerBlock; | |
39236c6e | 2658 | // XXXdbg |
6d2010ae | 2659 | struct hfsmount *hfsmp = VCBTOHFS(vcb); |
39236c6e | 2660 | |
6d2010ae A |
2661 | if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) |
2662 | KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0); | |
2663 | ||
2664 | /* | |
2665 | * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we | |
2666 | * need to be able to free blocks being relocated during hfs_truncatefs. | |
2667 | */ | |
2668 | if ((do_validate == true) && | |
39236c6e | 2669 | (startingBlock + numBlocks > vcb->totalBlocks)) { |
6d2010ae A |
2670 | if (ALLOC_DEBUG) { |
2671 | panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN); | |
2672 | } | |
39236c6e | 2673 | |
6d2010ae | 2674 | printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN); |
fe8ab488 | 2675 | hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED); |
6d2010ae A |
2676 | err = EIO; |
2677 | goto Exit; | |
2678 | } | |
39236c6e | 2679 | |
6d2010ae A |
2680 | // |
2681 | // Pre-read the bitmap block containing the first word of allocation | |
2682 | // | |
39236c6e | 2683 | |
6d2010ae A |
2684 | err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); |
2685 | if (err != noErr) goto Exit; | |
2686 | // XXXdbg | |
2687 | if (hfsmp->jnl) { | |
2688 | journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); | |
2689 | } | |
39236c6e | 2690 | |
6d2010ae | 2691 | // |
316670eb | 2692 | // Figure out how many bits and words per bitmap block. |
6d2010ae | 2693 | // |
316670eb A |
2694 | bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte; |
2695 | wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord; | |
2696 | wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord; | |
39236c6e | 2697 | |
316670eb A |
2698 | // |
2699 | // Look for a range of free blocks immediately before startingBlock | |
2700 | // (up to the start of the current bitmap block). Set unmapStart to | |
2701 | // the first free block. | |
2702 | // | |
2703 | currentWord = buffer + wordIndexInBlock; | |
2704 | currentBit = startingBlock % kBitsPerWord; | |
2705 | bitMask = kHighBitInWordMask >> currentBit; | |
2706 | while (true) { | |
2707 | // Move currentWord/bitMask back by one bit | |
2708 | bitMask <<= 1; | |
2709 | if (bitMask == 0) { | |
2710 | if (--currentWord < buffer) | |
2711 | break; | |
2712 | bitMask = kLowBitInWordMask; | |
2713 | } | |
39236c6e | 2714 | |
316670eb A |
2715 | if (*currentWord & SWAP_BE32(bitMask)) |
2716 | break; // Found an allocated block. Stop searching. | |
2717 | --unmapStart; | |
2718 | ++unmapCount; | |
1c79356b | 2719 | } |
39236c6e | 2720 | |
1c79356b A |
2721 | // |
2722 | // If the first block to free doesn't start on a word | |
2723 | // boundary in the bitmap, then treat that first word | |
2724 | // specially. | |
2725 | // | |
39236c6e | 2726 | |
316670eb A |
2727 | currentWord = buffer + wordIndexInBlock; |
2728 | wordsLeft = wordsPerBlock - wordIndexInBlock; | |
2729 | currentBit = startingBlock % kBitsPerWord; | |
2730 | if (currentBit != 0) { | |
2731 | bitMask = kAllBitsSetInWord >> currentBit; // turn off all bits before currentBit | |
2732 | numBits = kBitsPerWord - currentBit; // number of remaining bits in this word | |
1c79356b A |
2733 | if (numBits > numBlocks) { |
2734 | numBits = numBlocks; // entire allocation is inside this one word | |
316670eb | 2735 | bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits)); // turn off bits after last |
1c79356b | 2736 | } |
6d2010ae | 2737 | if ((do_validate == true) && |
39236c6e | 2738 | (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) { |
55e303ae | 2739 | goto Corruption; |
1c79356b | 2740 | } |
1c79356b A |
2741 | *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap |
2742 | numBlocks -= numBits; // adjust number of blocks left to free | |
39236c6e | 2743 | |
1c79356b A |
2744 | ++currentWord; // move to next word |
2745 | --wordsLeft; // one less word left in this block | |
2746 | } | |
39236c6e | 2747 | |
1c79356b | 2748 | // |
0b4e3aa0 | 2749 | // Free whole words (32 blocks) at a time. |
1c79356b | 2750 | // |
39236c6e | 2751 | |
1c79356b A |
2752 | while (numBlocks >= kBitsPerWord) { |
2753 | if (wordsLeft == 0) { | |
2754 | // Read in the next bitmap block | |
0b4e3aa0 | 2755 | startingBlock += bitsPerBlock; // generate a block number in the next bitmap block |
39236c6e | 2756 | |
1c79356b | 2757 | buffer = NULL; |
0b4e3aa0 | 2758 | err = ReleaseBitmapBlock(vcb, blockRef, true); |
1c79356b | 2759 | if (err != noErr) goto Exit; |
39236c6e | 2760 | |
0b4e3aa0 A |
2761 | err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); |
2762 | if (err != noErr) goto Exit; | |
39236c6e | 2763 | |
b4c24cb9 A |
2764 | // XXXdbg |
2765 | if (hfsmp->jnl) { | |
2766 | journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); | |
2767 | } | |
39236c6e | 2768 | |
1c79356b A |
2769 | // Readjust currentWord and wordsLeft |
2770 | currentWord = buffer; | |
0b4e3aa0 | 2771 | wordsLeft = wordsPerBlock; |
1c79356b | 2772 | } |
6d2010ae | 2773 | if ((do_validate == true) && |
39236c6e | 2774 | (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) { |
55e303ae | 2775 | goto Corruption; |
1c79356b | 2776 | } |
1c79356b A |
2777 | *currentWord = 0; // clear the entire word |
2778 | numBlocks -= kBitsPerWord; | |
39236c6e | 2779 | |
1c79356b | 2780 | ++currentWord; // move to next word |
316670eb | 2781 | --wordsLeft; // one less word left in this block |
1c79356b | 2782 | } |
39236c6e | 2783 | |
1c79356b | 2784 | // |
0b4e3aa0 | 2785 | // Free any remaining blocks. |
1c79356b | 2786 | // |
39236c6e | 2787 | |
1c79356b A |
2788 | if (numBlocks != 0) { |
2789 | bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits | |
2790 | if (wordsLeft == 0) { | |
2791 | // Read in the next bitmap block | |
0b4e3aa0 | 2792 | startingBlock += bitsPerBlock; // generate a block number in the next bitmap block |
39236c6e | 2793 | |
1c79356b | 2794 | buffer = NULL; |
0b4e3aa0 | 2795 | err = ReleaseBitmapBlock(vcb, blockRef, true); |
1c79356b | 2796 | if (err != noErr) goto Exit; |
39236c6e | 2797 | |
0b4e3aa0 A |
2798 | err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); |
2799 | if (err != noErr) goto Exit; | |
39236c6e | 2800 | |
b4c24cb9 A |
2801 | // XXXdbg |
2802 | if (hfsmp->jnl) { | |
2803 | journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); | |
2804 | } | |
39236c6e | 2805 | |
1c79356b A |
2806 | // Readjust currentWord and wordsLeft |
2807 | currentWord = buffer; | |
0b4e3aa0 | 2808 | wordsLeft = wordsPerBlock; |
1c79356b | 2809 | } |
6d2010ae | 2810 | if ((do_validate == true) && |
39236c6e | 2811 | (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) { |
55e303ae | 2812 | goto Corruption; |
1c79356b | 2813 | } |
1c79356b | 2814 | *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap |
39236c6e | 2815 | |
1c79356b A |
2816 | // No need to update currentWord or wordsLeft |
2817 | } | |
39236c6e | 2818 | |
316670eb A |
2819 | // |
2820 | // Look for a range of free blocks immediately after the range we just freed | |
2821 | // (up to the end of the current bitmap block). | |
2822 | // | |
2823 | wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord; | |
2824 | wordsLeft = wordsPerBlock - wordIndexInBlock; | |
2825 | currentWord = buffer + wordIndexInBlock; | |
2826 | currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord; | |
2827 | bitMask = kHighBitInWordMask >> currentBit; | |
2828 | while (true) { | |
2829 | // Move currentWord/bitMask/wordsLeft forward one bit | |
2830 | bitMask >>= 1; | |
2831 | if (bitMask == 0) { | |
2832 | if (--wordsLeft == 0) | |
2833 | break; | |
2834 | ++currentWord; | |
2835 | bitMask = kHighBitInWordMask; | |
2836 | } | |
39236c6e | 2837 | |
316670eb A |
2838 | if (*currentWord & SWAP_BE32(bitMask)) |
2839 | break; // Found an allocated block. Stop searching. | |
2840 | ++unmapCount; | |
2841 | } | |
39236c6e | 2842 | |
1c79356b | 2843 | Exit: |
39236c6e | 2844 | |
0b4e3aa0 A |
2845 | if (buffer) |
2846 | (void)ReleaseBitmapBlock(vcb, blockRef, true); | |
39236c6e | 2847 | |
6d2010ae | 2848 | if (err == noErr) { |
316670eb | 2849 | hfs_unmap_free_extent(vcb, unmapStart, unmapCount); |
593a1d5f A |
2850 | } |
2851 | ||
6d2010ae A |
2852 | if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) |
2853 | KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0); | |
593a1d5f | 2854 | |
1c79356b | 2855 | return err; |
39236c6e | 2856 | |
55e303ae A |
2857 | Corruption: |
2858 | #if DEBUG_BUILD | |
6d2010ae | 2859 | panic("hfs: BlockMarkFreeInternal: blocks not allocated!"); |
55e303ae | 2860 | #else |
316670eb | 2861 | printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s\n", vcb->vcbVN); |
fe8ab488 | 2862 | hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED); |
55e303ae A |
2863 | err = EIO; |
2864 | goto Exit; | |
2865 | #endif | |
1c79356b A |
2866 | } |
2867 | ||
316670eb | 2868 | |
1c79356b A |
2869 | /* |
2870 | _______________________________________________________________________ | |
2871 | ||
2872 | Routine: BlockFindContiguous | |
2873 | ||
2874 | Function: Find a contiguous range of blocks that are free (bits | |
2875 | clear in the bitmap). If a contiguous range of the | |
2876 | minimum size can't be found, an error will be returned. | |
6d2010ae A |
2877 | This is only needed to support the bitmap-scanning logic, |
2878 | as the red-black tree should be able to do this by internally | |
2879 | searching its tree. | |
1c79356b A |
2880 | |
2881 | Inputs: | |
2882 | vcb Pointer to volume where space is to be allocated | |
2883 | startingBlock Preferred first block of range | |
2884 | endingBlock Last possible block in range + 1 | |
2885 | minBlocks Minimum number of blocks needed. Must be > 0. | |
2886 | maxBlocks Maximum (ideal) number of blocks desired | |
55e303ae | 2887 | useMetaZone OK to dip into metadata allocation zone |
1c79356b A |
2888 | |
2889 | Outputs: | |
2890 | actualStartBlock First block of range found, or 0 if error | |
2891 | actualNumBlocks Number of blocks found, or 0 if error | |
1c79356b | 2892 | |
0b4e3aa0 A |
2893 | Returns: |
2894 | noErr Found at least minBlocks contiguous | |
2895 | dskFulErr No contiguous space found, or all less than minBlocks | |
2896 | _______________________________________________________________________ | |
1c79356b A |
2897 | */ |
2898 | ||
2899 | static OSErr BlockFindContiguous( | |
39236c6e A |
2900 | ExtendedVCB *vcb, |
2901 | u_int32_t startingBlock, | |
2902 | u_int32_t endingBlock, | |
2903 | u_int32_t minBlocks, | |
2904 | u_int32_t maxBlocks, | |
2905 | Boolean useMetaZone, | |
2906 | Boolean trustSummary, | |
2907 | u_int32_t *actualStartBlock, | |
2908 | u_int32_t *actualNumBlocks) | |
1c79356b A |
2909 | { |
2910 | OSErr err; | |
2d21ac55 A |
2911 | register u_int32_t currentBlock; // Block we're currently looking at. |
2912 | u_int32_t firstBlock; // First free block in current extent. | |
2913 | u_int32_t stopBlock; // If we get to this block, stop searching for first free block. | |
2914 | u_int32_t foundBlocks; // Number of contiguous free blocks in current extent. | |
2915 | u_int32_t *buffer = NULL; | |
2916 | register u_int32_t *currentWord; | |
2917 | register u_int32_t bitMask; | |
2918 | register u_int32_t wordsLeft; | |
2919 | register u_int32_t tempWord; | |
b0d623f7 | 2920 | uintptr_t blockRef; |
2d21ac55 | 2921 | u_int32_t wordsPerBlock; |
6d2010ae | 2922 | u_int32_t updated_free_extent = 0; |
39236c6e | 2923 | struct hfsmount *hfsmp = (struct hfsmount*) vcb; |
6d2010ae A |
2924 | |
2925 | if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) | |
2926 | KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0); | |
1c79356b | 2927 | |
743b1565 A |
2928 | /* |
2929 | * When we're skipping the metadata zone and the start/end | |
2930 | * range overlaps with the metadata zone then adjust the | |
2931 | * start to be outside of the metadata zone. If the range | |
2932 | * is entirely inside the metadata zone then we can deny the | |
2933 | * request (dskFulErr). | |
2934 | */ | |
2935 | if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) { | |
2936 | if (startingBlock <= vcb->hfs_metazone_end) { | |
2937 | if (endingBlock > (vcb->hfs_metazone_end + 2)) | |
2938 | startingBlock = vcb->hfs_metazone_end + 1; | |
2939 | else | |
2940 | goto DiskFull; | |
2941 | } | |
4a249263 A |
2942 | } |
2943 | ||
0b4e3aa0 A |
2944 | if ((endingBlock - startingBlock) < minBlocks) |
2945 | { | |
1c79356b A |
2946 | // The set of blocks we're checking is smaller than the minimum number |
2947 | // of blocks, so we couldn't possibly find a good range. | |
0b4e3aa0 | 2948 | goto DiskFull; |
1c79356b | 2949 | } |
0b4e3aa0 A |
2950 | |
2951 | stopBlock = endingBlock - minBlocks + 1; | |
2952 | currentBlock = startingBlock; | |
9bccf70c | 2953 | firstBlock = 0; |
55e303ae A |
2954 | |
2955 | /* | |
2956 | * Skip over metadata blocks. | |
2957 | */ | |
2958 | if (!useMetaZone) | |
2959 | currentBlock = NextBitmapBlock(vcb, currentBlock); | |
2960 | ||
39236c6e A |
2961 | /* |
2962 | * Use the summary table if we can. Skip over any totally | |
2963 | * allocated blocks. currentBlock should now point to the first | |
2964 | * block beyond the metadata zone if the metazone allocations are not | |
2965 | * allowed in this invocation. | |
2966 | */ | |
2967 | if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) { | |
2968 | uint32_t suggestion; | |
2969 | if (hfs_find_summary_free (hfsmp, currentBlock, &suggestion) == 0) { | |
2970 | currentBlock = suggestion; | |
2971 | } | |
2972 | } | |
2973 | ||
2974 | ||
1c79356b | 2975 | // |
0b4e3aa0 | 2976 | // Pre-read the first bitmap block. |
1c79356b | 2977 | // |
0b4e3aa0 A |
2978 | err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef); |
2979 | if ( err != noErr ) goto ErrorExit; | |
1c79356b | 2980 | |
1c79356b | 2981 | // |
0b4e3aa0 | 2982 | // Figure out where currentBlock is within the buffer. |
1c79356b | 2983 | // |
0b4e3aa0 | 2984 | wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord; |
1c79356b | 2985 | |
55e303ae | 2986 | wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1); // Current index into buffer |
0b4e3aa0 A |
2987 | currentWord = buffer + wordsLeft; |
2988 | wordsLeft = wordsPerBlock - wordsLeft; | |
39236c6e A |
2989 | |
2990 | /* | |
2991 | * This outer do-while loop is the main body of this function. Its job is | |
2992 | * to search through the blocks (until we hit 'stopBlock'), and iterate | |
2993 | * through swaths of allocated bitmap until it finds free regions. | |
2994 | */ | |
2995 | ||
0b4e3aa0 | 2996 | do |
1c79356b | 2997 | { |
0b4e3aa0 | 2998 | foundBlocks = 0; |
39236c6e A |
2999 | uint32_t summary_block_scan = 0; |
3000 | /* | |
3001 | * Inner while loop 1: | |
3002 | * Look for free blocks, skipping over allocated ones. | |
3003 | * | |
3004 | * Initialization starts with checking the initial partial word | |
3005 | * if applicable. | |
3006 | */ | |
0b4e3aa0 A |
3007 | bitMask = currentBlock & kBitsWithinWordMask; |
3008 | if (bitMask) | |
3009 | { | |
9bccf70c | 3010 | tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once |
0b4e3aa0 A |
3011 | bitMask = kHighBitInWordMask >> bitMask; |
3012 | while (tempWord & bitMask) | |
1c79356b | 3013 | { |
0b4e3aa0 A |
3014 | bitMask >>= 1; |
3015 | ++currentBlock; | |
1c79356b | 3016 | } |
1c79356b | 3017 | |
0b4e3aa0 A |
3018 | // Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)? |
3019 | if (bitMask) | |
3020 | goto FoundUnused; | |
1c79356b | 3021 | |
0b4e3aa0 A |
3022 | // Didn't find any unused bits, so we're done with this word. |
3023 | ++currentWord; | |
3024 | --wordsLeft; | |
1c79356b | 3025 | } |
1c79356b | 3026 | |
0b4e3aa0 A |
3027 | // |
3028 | // Check whole words | |
3029 | // | |
3030 | while (currentBlock < stopBlock) | |
3031 | { | |
3032 | // See if it's time to read another block. | |
3033 | if (wordsLeft == 0) | |
1c79356b | 3034 | { |
1c79356b | 3035 | buffer = NULL; |
39236c6e A |
3036 | if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { |
3037 | /* | |
3038 | * If summary_block_scan is non-zero, then we must have | |
3039 | * pulled a bitmap file block into core, and scanned through | |
3040 | * the entire thing. Because we're in this loop, we are | |
3041 | * implicitly trusting that the bitmap didn't have any knowledge | |
3042 | * about this particular block. As a result, update the bitmap | |
3043 | * (lazily, now that we've scanned it) with our findings that | |
3044 | * this particular block is completely used up. | |
3045 | */ | |
3046 | if (summary_block_scan != 0) { | |
3047 | uint32_t summary_bit; | |
3048 | (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit); | |
3049 | hfs_set_summary (hfsmp, summary_bit, 1); | |
3050 | summary_block_scan = 0; | |
3051 | } | |
3052 | } | |
0b4e3aa0 A |
3053 | err = ReleaseBitmapBlock(vcb, blockRef, false); |
3054 | if (err != noErr) goto ErrorExit; | |
3055 | ||
55e303ae A |
3056 | /* |
3057 | * Skip over metadata blocks. | |
3058 | */ | |
3059 | if (!useMetaZone) { | |
3060 | currentBlock = NextBitmapBlock(vcb, currentBlock); | |
4a249263 A |
3061 | if (currentBlock >= stopBlock) { |
3062 | goto LoopExit; | |
3063 | } | |
55e303ae A |
3064 | } |
3065 | ||
39236c6e A |
3066 | /* Skip over fully allocated bitmap blocks if we can */ |
3067 | if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) { | |
3068 | uint32_t suggestion; | |
3069 | if (hfs_find_summary_free (hfsmp, currentBlock, &suggestion) == 0) { | |
3070 | if (suggestion < stopBlock) { | |
3071 | currentBlock = suggestion; | |
3072 | } | |
3073 | } | |
3074 | } | |
3075 | ||
0b4e3aa0 A |
3076 | err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef); |
3077 | if ( err != noErr ) goto ErrorExit; | |
39236c6e A |
3078 | |
3079 | /* | |
3080 | * Set summary_block_scan to be the block we just read into the block cache. | |
3081 | * | |
3082 | * At this point, we've just read an allocation block worth of bitmap file | |
3083 | * into the buffer above, but we don't know if it is completely allocated or not. | |
3084 | * If we find that it is completely allocated/full then we will jump | |
3085 | * through this loop again and set the appropriate summary bit as fully allocated. | |
3086 | */ | |
3087 | summary_block_scan = currentBlock; | |
0b4e3aa0 A |
3088 | currentWord = buffer; |
3089 | wordsLeft = wordsPerBlock; | |
1c79356b | 3090 | } |
39236c6e | 3091 | |
0b4e3aa0 | 3092 | // See if any of the bits are clear |
9bccf70c | 3093 | if ((tempWord = SWAP_BE32(*currentWord)) + 1) // non-zero if any bits were clear |
1c79356b | 3094 | { |
0b4e3aa0 A |
3095 | // Figure out which bit is clear |
3096 | bitMask = kHighBitInWordMask; | |
3097 | while (tempWord & bitMask) | |
3098 | { | |
3099 | bitMask >>= 1; | |
3100 | ++currentBlock; | |
3101 | } | |
39236c6e | 3102 | |
0b4e3aa0 | 3103 | break; // Found the free bit; break out to FoundUnused. |
1c79356b | 3104 | } |
1c79356b | 3105 | |
0b4e3aa0 A |
3106 | // Keep looking at the next word |
3107 | currentBlock += kBitsPerWord; | |
3108 | ++currentWord; | |
3109 | --wordsLeft; | |
3110 | } | |
1c79356b | 3111 | |
0b4e3aa0 A |
3112 | FoundUnused: |
3113 | // Make sure the unused bit is early enough to use | |
3114 | if (currentBlock >= stopBlock) | |
1c79356b | 3115 | { |
0b4e3aa0 | 3116 | break; |
1c79356b | 3117 | } |
1c79356b | 3118 | |
0b4e3aa0 A |
3119 | // Remember the start of the extent |
3120 | firstBlock = currentBlock; | |
1c79356b | 3121 | |
1c79356b | 3122 | |
39236c6e A |
3123 | /* |
3124 | * Inner while loop 2: | |
3125 | * We get here if we find a free block. Count the number | |
3126 | * of contiguous free blocks observed. | |
3127 | * | |
3128 | * Initialization starts with checking the initial partial word | |
3129 | * if applicable. | |
3130 | */ | |
3131 | bitMask = currentBlock & kBitsWithinWordMask; | |
3132 | if (bitMask) | |
1c79356b | 3133 | { |
9bccf70c | 3134 | tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once |
0b4e3aa0 A |
3135 | bitMask = kHighBitInWordMask >> bitMask; |
3136 | while (bitMask && !(tempWord & bitMask)) | |
1c79356b | 3137 | { |
0b4e3aa0 A |
3138 | bitMask >>= 1; |
3139 | ++currentBlock; | |
1c79356b | 3140 | } |
0b4e3aa0 A |
3141 | |
3142 | // Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)? | |
3143 | if (bitMask) | |
3144 | goto FoundUsed; | |
3145 | ||
3146 | // Didn't find any used bits, so we're done with this word. | |
3147 | ++currentWord; | |
3148 | --wordsLeft; | |
3149 | } | |
39236c6e | 3150 | |
0b4e3aa0 A |
3151 | // |
3152 | // Check whole words | |
3153 | // | |
3154 | while (currentBlock < endingBlock) | |
3155 | { | |
3156 | // See if it's time to read another block. | |
3157 | if (wordsLeft == 0) | |
1c79356b | 3158 | { |
0b4e3aa0 A |
3159 | buffer = NULL; |
3160 | err = ReleaseBitmapBlock(vcb, blockRef, false); | |
3161 | if (err != noErr) goto ErrorExit; | |
3162 | ||
55e303ae A |
3163 | /* |
3164 | * Skip over metadata blocks. | |
3165 | */ | |
3166 | if (!useMetaZone) { | |
2d21ac55 | 3167 | u_int32_t nextBlock; |
55e303ae A |
3168 | |
3169 | nextBlock = NextBitmapBlock(vcb, currentBlock); | |
3170 | if (nextBlock != currentBlock) { | |
4a249263 | 3171 | goto LoopExit; /* allocation gap, so stop */ |
55e303ae A |
3172 | } |
3173 | } | |
3174 | ||
0b4e3aa0 A |
3175 | err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef); |
3176 | if ( err != noErr ) goto ErrorExit; | |
39236c6e | 3177 | |
0b4e3aa0 A |
3178 | currentWord = buffer; |
3179 | wordsLeft = wordsPerBlock; | |
1c79356b | 3180 | } |
39236c6e | 3181 | |
0b4e3aa0 | 3182 | // See if any of the bits are set |
9bccf70c | 3183 | if ((tempWord = SWAP_BE32(*currentWord)) != 0) |
1c79356b | 3184 | { |
0b4e3aa0 | 3185 | // Figure out which bit is set |
1c79356b | 3186 | bitMask = kHighBitInWordMask; |
0b4e3aa0 | 3187 | while (!(tempWord & bitMask)) |
1c79356b | 3188 | { |
0b4e3aa0 A |
3189 | bitMask >>= 1; |
3190 | ++currentBlock; | |
1c79356b | 3191 | } |
39236c6e | 3192 | |
0b4e3aa0 | 3193 | break; // Found the used bit; break out to FoundUsed. |
1c79356b | 3194 | } |
0b4e3aa0 A |
3195 | |
3196 | // Keep looking at the next word | |
3197 | currentBlock += kBitsPerWord; | |
3198 | ++currentWord; | |
3199 | --wordsLeft; | |
39236c6e | 3200 | |
0b4e3aa0 A |
3201 | // If we found at least maxBlocks, we can quit early. |
3202 | if ((currentBlock - firstBlock) >= maxBlocks) | |
3203 | break; | |
1c79356b | 3204 | } |
1c79356b | 3205 | |
0b4e3aa0 A |
3206 | FoundUsed: |
3207 | // Make sure we didn't run out of bitmap looking for a used block. | |
3208 | // If so, pin to the end of the bitmap. | |
3209 | if (currentBlock > endingBlock) | |
3210 | currentBlock = endingBlock; | |
3211 | ||
3212 | // Figure out how many contiguous free blocks there were. | |
3213 | // Pin the answer to maxBlocks. | |
3214 | foundBlocks = currentBlock - firstBlock; | |
3215 | if (foundBlocks > maxBlocks) | |
3216 | foundBlocks = maxBlocks; | |
3217 | if (foundBlocks >= minBlocks) | |
3218 | break; // Found what we needed! | |
3219 | ||
39236c6e A |
3220 | /* |
3221 | * We did not find the total blocks were were looking for, but | |
3222 | * add this free block run to our free extent cache list, if possible. | |
6d2010ae | 3223 | */ |
39236c6e A |
3224 | if (hfsmp->jnl == NULL) { |
3225 | /* If there is no journal, go ahead and add to the free ext cache. */ | |
3226 | updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks); | |
3227 | } | |
3228 | else { | |
3229 | /* | |
3230 | * If journaled, only add to the free extent cache if this block is not | |
3231 | * waiting for a TRIM to complete; that implies that the transaction that freed it | |
3232 | * has not yet been committed to stable storage. | |
3233 | */ | |
3234 | int recently_deleted = 0; | |
3235 | uint32_t nextblock; | |
3236 | err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock, | |
3237 | (uint64_t)foundBlocks, &recently_deleted, &nextblock); | |
3238 | if ((err) || (recently_deleted == 0)) { | |
3239 | /* if we hit an error, or the blocks not recently freed, go ahead and insert it */ | |
3240 | updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks); | |
3241 | } | |
3242 | err = 0; | |
3243 | } | |
b0d623f7 | 3244 | |
0b4e3aa0 | 3245 | } while (currentBlock < stopBlock); |
4a249263 | 3246 | LoopExit: |
0b4e3aa0 A |
3247 | |
3248 | // Return the outputs. | |
3249 | if (foundBlocks < minBlocks) | |
3250 | { | |
3251 | DiskFull: | |
3252 | err = dskFulErr; | |
3253 | ErrorExit: | |
3254 | *actualStartBlock = 0; | |
3255 | *actualNumBlocks = 0; | |
3256 | } | |
3257 | else | |
3258 | { | |
3259 | err = noErr; | |
3260 | *actualStartBlock = firstBlock; | |
3261 | *actualNumBlocks = foundBlocks; | |
2d21ac55 A |
3262 | /* |
3263 | * Sanity check for overflow | |
3264 | */ | |
3265 | if ((firstBlock + foundBlocks) > vcb->allocLimit) { | |
b0d623f7 | 3266 | panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x", |
39236c6e A |
3267 | vcb->vcbVN, startingBlock, endingBlock, currentBlock, |
3268 | firstBlock, stopBlock, minBlocks, foundBlocks); | |
2d21ac55 | 3269 | } |
0b4e3aa0 | 3270 | } |
39236c6e | 3271 | |
6d2010ae | 3272 | if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) { |
b0d623f7 A |
3273 | int i; |
3274 | u_int32_t min_start = vcb->totalBlocks; | |
39236c6e | 3275 | |
b0d623f7 A |
3276 | // set the nextAllocation pointer to the smallest free block number |
3277 | // we've seen so on the next mount we won't rescan unnecessarily | |
6d2010ae | 3278 | lck_spin_lock(&vcb->vcbFreeExtLock); |
b0d623f7 A |
3279 | for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) { |
3280 | if (vcb->vcbFreeExt[i].startBlock < min_start) { | |
3281 | min_start = vcb->vcbFreeExt[i].startBlock; | |
3282 | } | |
3283 | } | |
6d2010ae | 3284 | lck_spin_unlock(&vcb->vcbFreeExtLock); |
b0d623f7 A |
3285 | if (min_start != vcb->totalBlocks) { |
3286 | if (min_start < vcb->nextAllocation) { | |
3287 | vcb->nextAllocation = min_start; | |
3288 | } | |
3289 | if (min_start < vcb->sparseAllocation) { | |
3290 | vcb->sparseAllocation = min_start; | |
3291 | } | |
3292 | } | |
3293 | } | |
39236c6e | 3294 | |
0b4e3aa0 A |
3295 | if (buffer) |
3296 | (void) ReleaseBitmapBlock(vcb, blockRef, false); | |
1c79356b | 3297 | |
6d2010ae A |
3298 | if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) |
3299 | KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); | |
b0d623f7 | 3300 | |
1c79356b A |
3301 | return err; |
3302 | } | |
3303 | ||
6d2010ae | 3304 | |
6d2010ae A |
3305 | /* |
3306 | * Count number of bits set in the given 32-bit unsigned number | |
3307 | * | |
3308 | * Returns: | |
3309 | * Number of bits set | |
3310 | */ | |
3311 | static int num_bits_set(u_int32_t num) | |
3312 | { | |
3313 | int count; | |
3314 | ||
3315 | for (count = 0; num; count++) { | |
3316 | num &= num - 1; | |
3317 | } | |
3318 | ||
3319 | return count; | |
3320 | } | |
3321 | ||
3322 | /* | |
3323 | * For a given range of blocks, find the total number of blocks | |
3324 | * allocated. If 'stop_on_first' is true, it stops as soon as it | |
3325 | * encounters the first allocated block. This option is useful | |
3326 | * to determine if any block is allocated or not. | |
3327 | * | |
3328 | * Inputs: | |
3329 | * startingBlock First allocation block number of the range to be scanned. | |
3330 | * numBlocks Total number of blocks that need to be scanned. | |
3331 | * stop_on_first Stop the search after the first allocated block is found. | |
3332 | * | |
3333 | * Output: | |
3334 | * allocCount Total number of allocation blocks allocated in the given range. | |
3335 | * | |
3336 | * On error, it is the number of allocated blocks found | |
3337 | * before the function got an error. | |
3338 | * | |
3339 | * If 'stop_on_first' is set, | |
3340 | * allocCount = 1 if any allocated block was found. | |
3341 | * allocCount = 0 if no allocated block was found. | |
3342 | * | |
3343 | * Returns: | |
3344 | * 0 on success, non-zero on failure. | |
3345 | */ | |
3346 | static int | |
3347 | hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, | |
3348 | u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount) | |
3349 | { | |
3350 | u_int32_t *currentWord; // Pointer to current word within bitmap block | |
3351 | u_int32_t wordsLeft; // Number of words left in this bitmap block | |
3352 | u_int32_t bitMask; // Word with given bits already set (ready to test) | |
3353 | u_int32_t firstBit; // Bit index within word of first bit to allocate | |
3354 | u_int32_t numBits; // Number of bits in word to allocate | |
3355 | u_int32_t *buffer = NULL; | |
3356 | uintptr_t blockRef; | |
3357 | u_int32_t bitsPerBlock; | |
3358 | u_int32_t wordsPerBlock; | |
39236c6e | 3359 | u_int32_t blockCount = 0; |
6d2010ae A |
3360 | int error; |
3361 | ||
3362 | if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) | |
3363 | KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0); | |
3364 | ||
3365 | /* | |
3366 | * Pre-read the bitmap block containing the first word of allocation | |
3367 | */ | |
3368 | error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); | |
3369 | if (error) | |
3370 | goto JustReturn; | |
3371 | ||
3372 | /* | |
3373 | * Initialize currentWord, and wordsLeft. | |
3374 | */ | |
3375 | { | |
3376 | u_int32_t wordIndexInBlock; | |
39236c6e | 3377 | |
6d2010ae A |
3378 | bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte; |
3379 | wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord; | |
3380 | ||
3381 | wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord; | |
3382 | currentWord = buffer + wordIndexInBlock; | |
3383 | wordsLeft = wordsPerBlock - wordIndexInBlock; | |
3384 | } | |
39236c6e | 3385 | |
6d2010ae A |
3386 | /* |
3387 | * First test any non word aligned bits. | |
3388 | */ | |
3389 | firstBit = startingBlock % kBitsPerWord; | |
3390 | if (firstBit != 0) { | |
3391 | bitMask = kAllBitsSetInWord >> firstBit; | |
3392 | numBits = kBitsPerWord - firstBit; | |
3393 | if (numBits > numBlocks) { | |
3394 | numBits = numBlocks; | |
3395 | bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); | |
3396 | } | |
3397 | if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { | |
3398 | if (stop_on_first) { | |
3399 | blockCount = 1; | |
3400 | goto Exit; | |
3401 | } | |
3402 | blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask)); | |
3403 | } | |
3404 | numBlocks -= numBits; | |
3405 | ++currentWord; | |
3406 | --wordsLeft; | |
91447636 A |
3407 | } |
3408 | ||
3409 | /* | |
3410 | * Test whole words (32 blocks) at a time. | |
3411 | */ | |
3412 | while (numBlocks >= kBitsPerWord) { | |
3413 | if (wordsLeft == 0) { | |
3414 | /* Read in the next bitmap block. */ | |
3415 | startingBlock += bitsPerBlock; | |
39236c6e | 3416 | |
91447636 A |
3417 | buffer = NULL; |
3418 | error = ReleaseBitmapBlock(hfsmp, blockRef, false); | |
3419 | if (error) goto Exit; | |
3420 | ||
3421 | error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); | |
3422 | if (error) goto Exit; | |
3423 | ||
3424 | /* Readjust currentWord and wordsLeft. */ | |
3425 | currentWord = buffer; | |
3426 | wordsLeft = wordsPerBlock; | |
3427 | } | |
3428 | if (*currentWord != 0) { | |
6d2010ae A |
3429 | if (stop_on_first) { |
3430 | blockCount = 1; | |
3431 | goto Exit; | |
3432 | } | |
3433 | blockCount += num_bits_set(*currentWord); | |
91447636 A |
3434 | } |
3435 | numBlocks -= kBitsPerWord; | |
3436 | ++currentWord; | |
3437 | --wordsLeft; | |
3438 | } | |
39236c6e | 3439 | |
91447636 A |
3440 | /* |
3441 | * Test any remaining blocks. | |
3442 | */ | |
3443 | if (numBlocks != 0) { | |
3444 | bitMask = ~(kAllBitsSetInWord >> numBlocks); | |
3445 | if (wordsLeft == 0) { | |
3446 | /* Read in the next bitmap block */ | |
3447 | startingBlock += bitsPerBlock; | |
39236c6e | 3448 | |
91447636 A |
3449 | buffer = NULL; |
3450 | error = ReleaseBitmapBlock(hfsmp, blockRef, false); | |
3451 | if (error) goto Exit; | |
3452 | ||
3453 | error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); | |
3454 | if (error) goto Exit; | |
3455 | ||
3456 | currentWord = buffer; | |
3457 | wordsLeft = wordsPerBlock; | |
3458 | } | |
3459 | if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { | |
6d2010ae A |
3460 | if (stop_on_first) { |
3461 | blockCount = 1; | |
3462 | goto Exit; | |
3463 | } | |
3464 | blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask)); | |
91447636 A |
3465 | } |
3466 | } | |
3467 | Exit: | |
3468 | if (buffer) { | |
3469 | (void)ReleaseBitmapBlock(hfsmp, blockRef, false); | |
3470 | } | |
6d2010ae A |
3471 | if (allocCount) { |
3472 | *allocCount = blockCount; | |
3473 | } | |
3474 | ||
3475 | JustReturn: | |
3476 | if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) | |
3477 | KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0); | |
3478 | ||
3479 | return (error); | |
3480 | } | |
3481 | ||
3482 | /* | |
3483 | * Count total number of blocks that are allocated in the given | |
3484 | * range from the bitmap. This is used to preflight total blocks | |
3485 | * that need to be relocated during volume resize. | |
3486 | * | |
3487 | * The journal or allocation file lock must be held. | |
3488 | * | |
3489 | * Returns: | |
3490 | * 0 on success, non-zero on failure. | |
3491 | * On failure, allocCount is zero. | |
3492 | */ | |
39236c6e | 3493 | int |
6d2010ae A |
3494 | hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock, |
3495 | u_int32_t numBlocks, u_int32_t *allocCount) | |
3496 | { | |
3497 | return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount); | |
3498 | } | |
3499 | ||
3500 | /* | |
3501 | * Test to see if any blocks in a range are allocated. | |
3502 | * | |
3503 | * Note: On error, this function returns 1, which means that | |
3504 | * one or more blocks in the range are allocated. This function | |
3505 | * is primarily used for volume resize and we do not want | |
3506 | * to report to the caller that the blocks are free when we | |
3507 | * were not able to deterministically find it out. So on error, | |
3508 | * we always report that the blocks are allocated. | |
3509 | * | |
3510 | * The journal or allocation file lock must be held. | |
3511 | * | |
3512 | * Returns | |
3513 | * 0 if all blocks in the range are free. | |
3514 | * 1 if blocks in the range are allocated, or there was an error. | |
3515 | */ | |
39236c6e | 3516 | int |
6d2010ae A |
3517 | hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks) |
3518 | { | |
3519 | int error; | |
3520 | u_int32_t allocCount; | |
3521 | ||
3522 | error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount); | |
3523 | if (error) { | |
3524 | /* On error, we always say that the blocks are allocated | |
3525 | * so that volume resize does not return false success. | |
3526 | */ | |
3527 | return 1; | |
3528 | } else { | |
3529 | /* The function was deterministically able to find out | |
3530 | * if there was any block allocated or not. In that case, | |
3531 | * the value in allocCount is good enough to be returned | |
3532 | * back to the caller. | |
3533 | */ | |
3534 | return allocCount; | |
3535 | } | |
3536 | } | |
3537 | ||
3538 | /* | |
39236c6e | 3539 | * CONFIG_HFS_RBTREE |
6d2010ae A |
3540 | * Check to see if the red-black tree is live. Allocation file lock must be held |
3541 | * shared or exclusive to call this function. Note that we may call this even if | |
3542 | * HFS is built without activating the red-black tree code. | |
3543 | */ | |
3544 | __private_extern__ | |
3545 | int | |
3546 | hfs_isrbtree_active(struct hfsmount *hfsmp){ | |
39236c6e A |
3547 | |
3548 | #pragma unused (hfsmp) | |
3549 | ||
3550 | /* Just return 0 for now */ | |
3551 | return 0; | |
3552 | } | |
3553 | ||
3554 | ||
3555 | ||
3556 | /* Summary Table Functions */ | |
3557 | /* | |
3558 | * hfs_check_summary: | |
3559 | * | |
3560 | * This function should be used to query the summary table to see if we can | |
3561 | * bypass a bitmap block or not when we're trying to find a free allocation block. | |
3562 | * | |
3563 | * | |
3564 | * Inputs: | |
3565 | * allocblock - allocation block number. Will be used to infer the correct summary bit. | |
3566 | * hfsmp -- filesystem in question. | |
3567 | * | |
3568 | * Output Arg: | |
3569 | * *freeblocks - set to 1 if we believe at least one free blocks in this vcbVBMIOSize | |
3570 | * page of bitmap file. | |
3571 | * | |
3572 | * | |
3573 | * Returns: | |
3574 | * 0 on success | |
3575 | * EINVAL on error | |
3576 | * | |
3577 | */ | |
3578 | ||
3579 | static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t allocblock, uint32_t *freeblocks) { | |
3580 | ||
3581 | int err = EINVAL; | |
3582 | if (hfsmp->vcbVBMIOSize) { | |
3583 | if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { | |
3584 | uint32_t index; | |
3585 | if (hfs_get_summary_index (hfsmp, allocblock, &index)) { | |
3586 | *freeblocks = 0; | |
3587 | return EINVAL; | |
3588 | } | |
3589 | ||
3590 | /* Ok, now that we have the bit index into the array, what byte is it in ? */ | |
3591 | uint32_t byteindex = index / kBitsPerByte; | |
3592 | uint8_t current_byte = hfsmp->hfs_summary_table[byteindex]; | |
3593 | uint8_t bit_in_byte = index % kBitsPerByte; | |
3594 | ||
3595 | if (current_byte & (1 << bit_in_byte)) { | |
3596 | /* | |
3597 | * We do not believe there is anything free in the | |
3598 | * entire vcbVBMIOSize'd block. | |
3599 | */ | |
3600 | *freeblocks = 0; | |
3601 | } | |
3602 | else { | |
3603 | /* Looks like there might be a free block here... */ | |
3604 | *freeblocks = 1; | |
3605 | } | |
3606 | } | |
3607 | err = 0; | |
6d2010ae | 3608 | } |
39236c6e A |
3609 | |
3610 | return err; | |
3611 | } | |
3612 | ||
3613 | ||
3614 | #if 0 | |
3615 | /* | |
3616 | * hfs_get_next_summary | |
3617 | * | |
3618 | * From a given allocation block, jump to the allocation block at the start of the | |
3619 | * next vcbVBMIOSize boundary. This is useful when trying to quickly skip over | |
3620 | * large swaths of bitmap once we have determined that the bitmap is relatively full. | |
3621 | * | |
3622 | * Inputs: hfsmount, starting allocation block number | |
3623 | * Output Arg: *newblock will contain the allocation block number to start | |
3624 | * querying. | |
3625 | * | |
3626 | * Returns: | |
3627 | * 0 on success | |
3628 | * EINVAL if the block argument is too large to be used, or the summary table not live. | |
3629 | * EFBIG if there are no more summary bits to be queried | |
3630 | */ | |
3631 | static int | |
3632 | hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) { | |
3633 | ||
3634 | u_int32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte; | |
3635 | u_int32_t start_offset; | |
3636 | u_int32_t next_offset; | |
3637 | int err = EINVAL; | |
3638 | ||
3639 | if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { | |
3640 | if ((err = hfs_get_summary_index(hfsmp, block, &start_offset))) { | |
3641 | return err; | |
3642 | } | |
3643 | ||
3644 | next_offset = start_offset++; | |
3645 | ||
3646 | if ((start_offset >= hfsmp->hfs_summary_size) || (next_offset >= hfsmp->hfs_summary_size)) { | |
3647 | /* Can't jump to the next summary bit. */ | |
3648 | return EINVAL; | |
3649 | } | |
3650 | ||
3651 | /* Otherwise, compute and return */ | |
3652 | *newblock = next_offset * bits_per_iosize; | |
3653 | if (*newblock >= hfsmp->totalBlocks) { | |
3654 | return EINVAL; | |
6d2010ae | 3655 | } |
39236c6e | 3656 | err = 0; |
6d2010ae | 3657 | } |
39236c6e A |
3658 | |
3659 | return err; | |
3660 | } | |
3661 | ||
6d2010ae | 3662 | #endif |
39236c6e A |
3663 | |
3664 | /* | |
3665 | * hfs_release_summary | |
3666 | * | |
3667 | * Given an extent that is about to be de-allocated on-disk, determine the number | |
3668 | * of summary bitmap bits that need to be marked as 'potentially available'. | |
3669 | * Then go ahead and mark them as free. | |
3670 | * | |
3671 | * Inputs: | |
3672 | * hfsmp - hfs mount | |
3673 | * block - starting allocation block. | |
3674 | * length - length of the extent. | |
3675 | * | |
3676 | * Returns: | |
3677 | * EINVAL upon any errors. | |
3678 | */ | |
3679 | static int hfs_release_summary(struct hfsmount *hfsmp, uint32_t start_blk, uint32_t length) { | |
3680 | int err = EINVAL; | |
3681 | uint32_t end_blk = (start_blk + length) - 1; | |
3682 | ||
3683 | if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { | |
3684 | /* Figure out what the starting / ending block's summary bits are */ | |
3685 | uint32_t start_bit; | |
3686 | uint32_t end_bit; | |
3687 | uint32_t current_bit; | |
3688 | ||
3689 | err = hfs_get_summary_index (hfsmp, start_blk, &start_bit); | |
3690 | if (err) { | |
3691 | goto release_err; | |
3692 | } | |
3693 | err = hfs_get_summary_index (hfsmp, end_blk, &end_bit); | |
3694 | if (err) { | |
3695 | goto release_err; | |
3696 | } | |
3697 | ||
3698 | if (ALLOC_DEBUG) { | |
3699 | if (start_bit > end_bit) { | |
3700 | panic ("HFS: start > end!, %d %d ", start_bit, end_bit); | |
3701 | } | |
3702 | } | |
3703 | current_bit = start_bit; | |
3704 | while (current_bit <= end_bit) { | |
3705 | err = hfs_set_summary (hfsmp, current_bit, 0); | |
3706 | current_bit++; | |
3707 | } | |
3708 | } | |
3709 | ||
3710 | release_err: | |
3711 | return err; | |
3712 | } | |
3713 | ||
3714 | /* | |
3715 | * hfs_find_summary_free | |
3716 | * | |
3717 | * Given a allocation block as input, returns an allocation block number as output as a | |
3718 | * suggestion for where to start scanning the bitmap in order to find free blocks. It will | |
3719 | * determine the vcbVBMIOsize of the input allocation block, convert that into a summary | |
3720 | * bit, then keep iterating over the summary bits in order to find the first free one. | |
3721 | * | |
3722 | * Inputs: | |
3723 | * hfsmp - hfs mount | |
3724 | * block - starting allocation block | |
3725 | * newblock - output block as suggestion | |
3726 | * | |
3727 | * Returns: | |
3728 | * 0 on success | |
3729 | * ENOSPC if we could not find a free block | |
3730 | */ | |
3731 | ||
3732 | int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) { | |
3733 | ||
3734 | int err = ENOSPC; | |
3735 | uint32_t bit_index = 0; | |
3736 | uint32_t maybe_has_blocks = 0; | |
3737 | ||
3738 | if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { | |
3739 | uint32_t byte_index; | |
3740 | uint8_t curbyte; | |
3741 | uint8_t bit_in_byte; | |
3742 | uint32_t summary_cap; | |
3743 | ||
3744 | /* | |
3745 | * We generate a cap for the summary search because the summary table | |
3746 | * always represents a full summary of the bitmap FILE, which may | |
3747 | * be way more bits than are necessary for the actual filesystem | |
3748 | * whose allocations are mapped by the bitmap. | |
3749 | * | |
3750 | * Compute how much of hfs_summary_size is useable for the given number | |
3751 | * of allocation blocks eligible on this FS. | |
3752 | */ | |
3753 | err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit, &summary_cap); | |
3754 | if (err) { | |
3755 | goto summary_exit; | |
3756 | } | |
3757 | ||
3758 | /* Check the starting block first */ | |
3759 | err = hfs_check_summary (hfsmp, block, &maybe_has_blocks); | |
3760 | if (err) { | |
3761 | goto summary_exit; | |
3762 | } | |
3763 | ||
3764 | if (maybe_has_blocks) { | |
3765 | /* | |
3766 | * It looks like the initial start block could have something. | |
3767 | * Short-circuit and just use that. | |
3768 | */ | |
3769 | *newblock = block; | |
3770 | goto summary_exit; | |
3771 | } | |
3772 | ||
3773 | /* | |
3774 | * OK, now we know that the first block was useless. | |
3775 | * Get the starting summary bit, and find it in the array | |
3776 | */ | |
3777 | maybe_has_blocks = 0; | |
3778 | err = hfs_get_summary_index (hfsmp, block, &bit_index); | |
3779 | if (err) { | |
3780 | goto summary_exit; | |
3781 | } | |
3782 | ||
3783 | /* Iterate until we find something. */ | |
3784 | while (bit_index <= summary_cap) { | |
3785 | byte_index = bit_index / kBitsPerByte; | |
3786 | curbyte = hfsmp->hfs_summary_table[byte_index]; | |
3787 | bit_in_byte = bit_index % kBitsPerByte; | |
3788 | ||
3789 | if (curbyte & (1 << bit_in_byte)) { | |
3790 | /* nothing here. increment and move on */ | |
3791 | bit_index++; | |
3792 | } | |
3793 | else { | |
3794 | /* | |
3795 | * found something! convert bit_index back into | |
3796 | * an allocation block for use. 'newblock' will now | |
3797 | * contain the proper allocation block # based on the bit | |
3798 | * index. | |
3799 | */ | |
3800 | err = hfs_get_summary_allocblock (hfsmp, bit_index, newblock); | |
3801 | if (err) { | |
3802 | goto summary_exit; | |
3803 | } | |
3804 | maybe_has_blocks = 1; | |
3805 | break; | |
3806 | } | |
3807 | } | |
3808 | ||
3809 | /* If our loop didn't find anything, set err to ENOSPC */ | |
3810 | if (maybe_has_blocks == 0) { | |
3811 | err = ENOSPC; | |
3812 | } | |
3813 | } | |
3814 | ||
3815 | /* If the summary table is not active for this mount, we'll just return ENOSPC */ | |
3816 | summary_exit: | |
3817 | if (maybe_has_blocks) { | |
3818 | err = 0; | |
3819 | } | |
3820 | ||
3821 | return err; | |
3822 | } | |
3823 | ||
3824 | /* | |
3825 | * hfs_get_summary_allocblock | |
3826 | * | |
3827 | * Convert a summary bit into an allocation block number to use to start searching for free blocks. | |
3828 | * | |
3829 | * Inputs: | |
3830 | * hfsmp - hfs mount | |
3831 | * summarybit - summmary bit index | |
3832 | * *alloc - allocation block number in the bitmap file. | |
3833 | * | |
3834 | * Output: | |
3835 | * 0 on success | |
3836 | * EINVAL on failure | |
3837 | */ | |
3838 | int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t | |
3839 | summarybit, uint32_t *alloc) { | |
3840 | uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte; | |
3841 | uint32_t allocblk; | |
3842 | ||
3843 | allocblk = summarybit * bits_per_iosize; | |
3844 | ||
3845 | if (allocblk >= hfsmp->totalBlocks) { | |
3846 | return EINVAL; | |
3847 | } | |
3848 | else { | |
3849 | *alloc = allocblk; | |
3850 | } | |
3851 | ||
3852 | return 0; | |
3853 | } | |
3854 | ||
3855 | ||
3856 | /* | |
3857 | * hfs_set_summary: | |
3858 | * | |
3859 | * This function should be used to manipulate the summary table | |
3860 | * | |
3861 | * The argument 'inuse' will set the value of the bit in question to one or zero | |
3862 | * depending on its value. | |
3863 | * | |
3864 | * Inputs: | |
3865 | * hfsmp - hfs mount | |
3866 | * summarybit - the bit index into the summary table to set/unset. | |
3867 | * inuse - the value to assign to the bit. | |
3868 | * | |
3869 | * Returns: | |
3870 | * 0 on success | |
3871 | * EINVAL on error | |
3872 | * | |
3873 | */ | |
3874 | ||
3875 | static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse) { | |
3876 | ||
3877 | int err = EINVAL; | |
3878 | if (hfsmp->vcbVBMIOSize) { | |
3879 | if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { | |
3880 | ||
3881 | if (ALLOC_DEBUG) { | |
3882 | if (hfsmp->hfs_summary_table == NULL) { | |
3883 | panic ("hfs_set_summary: no table for %p ", hfsmp); | |
3884 | } | |
3885 | } | |
3886 | ||
3887 | /* Ok, now that we have the bit index into the array, what byte is it in ? */ | |
3888 | uint32_t byte_index = summarybit / kBitsPerByte; | |
3889 | uint8_t current_byte = hfsmp->hfs_summary_table[byte_index]; | |
3890 | uint8_t bit_in_byte = summarybit % kBitsPerByte; | |
3891 | ||
3892 | if (inuse) { | |
3893 | current_byte = (current_byte | (1 << bit_in_byte)); | |
3894 | } | |
3895 | else { | |
3896 | current_byte = (current_byte & ~(1 << bit_in_byte)); | |
3897 | } | |
3898 | ||
3899 | hfsmp->hfs_summary_table[byte_index] = current_byte; | |
3900 | } | |
3901 | err = 0; | |
3902 | } | |
3903 | ||
3904 | return err; | |
3905 | } | |
3906 | ||
3907 | ||
3908 | /* | |
3909 | * hfs_get_summary_index: | |
3910 | * | |
3911 | * This is a helper function which determines what summary bit represents the vcbVBMIOSize worth | |
3912 | * of IO against the bitmap file. | |
3913 | * | |
3914 | * Returns: | |
3915 | * 0 on success | |
3916 | * EINVAL on failure | |
3917 | */ | |
3918 | static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t* index) { | |
3919 | uint32_t summary_bit; | |
3920 | uint32_t bits_per_iosize; | |
3921 | int err = EINVAL; | |
3922 | ||
3923 | if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { | |
3924 | /* Is the input block bigger than the total number of blocks? */ | |
3925 | if (block >= hfsmp->totalBlocks) { | |
3926 | return EINVAL; | |
3927 | } | |
3928 | ||
3929 | /* Is there even a vbmIOSize set? */ | |
3930 | if (hfsmp->vcbVBMIOSize == 0) { | |
3931 | return EINVAL; | |
3932 | } | |
3933 | ||
3934 | bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte; | |
3935 | ||
3936 | summary_bit = block / bits_per_iosize; | |
3937 | ||
3938 | *index = summary_bit; | |
3939 | err = 0; | |
3940 | } | |
3941 | ||
3942 | return err; | |
3943 | } | |
3944 | ||
3945 | /* | |
3946 | * hfs_init_summary | |
3947 | * | |
3948 | * From a given mount structure, compute how big the summary table should be for the given | |
3949 | * filesystem, then allocate and bzero the memory. | |
3950 | * | |
3951 | * Returns: | |
3952 | * 0 on success | |
3953 | * EINVAL on failure | |
3954 | */ | |
3955 | int | |
3956 | hfs_init_summary (struct hfsmount *hfsmp) { | |
3957 | ||
3958 | uint32_t summary_size; | |
3959 | uint32_t summary_size_bytes; | |
3960 | uint8_t *summary_table; | |
3961 | ||
3962 | if (hfsmp->hfs_allocation_cp == NULL) { | |
3963 | if (ALLOC_DEBUG) { | |
3964 | printf("hfs: summary table cannot progress without a bitmap cnode! \n"); | |
3965 | } | |
3966 | return EINVAL; | |
3967 | } | |
3968 | /* | |
3969 | * The practical maximum size of the summary table is 16KB: | |
3970 | * | |
3971 | * (512MB maximum bitmap size / (4k -- min alloc block size)) / 8 bits/byte. | |
3972 | * | |
3973 | * HFS+ will allow filesystems with allocation block sizes smaller than 4k, but | |
3974 | * the end result is that we'll start to issue I/O in 2k or 1k sized chunks, which makes | |
3975 | * supporting this much worse. The math would instead look like this: | |
3976 | * (512MB / 2k) / 8 == 32k. | |
3977 | * | |
3978 | * So, we will disallow the summary table if the allocation block size is < 4k. | |
3979 | */ | |
3980 | ||
3981 | if (hfsmp->blockSize < HFS_MIN_SUMMARY_BLOCKSIZE) { | |
3982 | printf("hfs: summary table not allowed on FS with block size of %d\n", hfsmp->blockSize); | |
3983 | return EINVAL; | |
3984 | } | |
3985 | ||
3986 | summary_size = hfsmp->hfs_allocation_cp->c_blocks; | |
3987 | ||
3988 | if (ALLOC_DEBUG) { | |
3989 | printf("HFS Summary Table Initialization: Bitmap %u blocks\n", | |
3990 | hfsmp->hfs_allocation_cp->c_blocks); | |
3991 | } | |
3992 | ||
3993 | /* | |
3994 | * If the bitmap IO size is not the same as the allocation block size then | |
3995 | * then re-compute the number of summary bits necessary. Note that above, the | |
3996 | * the default size is the number of allocation blocks in the bitmap *FILE* | |
3997 | * (not the number of bits in the bitmap itself). If the allocation block size | |
3998 | * is large enough though, we may need to increase this. | |
3999 | */ | |
4000 | if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) { | |
4001 | uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize; | |
4002 | lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize; | |
4003 | ||
4004 | /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */ | |
4005 | summary_size = (uint32_t) lrg_size; | |
4006 | } | |
4007 | ||
4008 | /* | |
4009 | * If the block size is the same as the IO Size, then the total number of blocks | |
4010 | * is already equal to the number of IO units, which is our number of summary bits. | |
4011 | */ | |
4012 | ||
4013 | summary_size_bytes = summary_size / kBitsPerByte; | |
4014 | /* Always add one byte, just in case we have a dangling number of bits */ | |
4015 | summary_size_bytes++; | |
4016 | ||
4017 | if (ALLOC_DEBUG) { | |
4018 | printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, summary_size); | |
4019 | printf("HFS Summary Table Size (in bytes) %d \n", summary_size_bytes); | |
4020 | } | |
4021 | ||
4022 | /* Store the field in the mount point, and then MALLOC/bzero the memory */ | |
4023 | hfsmp->hfs_summary_size = summary_size; | |
4024 | hfsmp->hfs_summary_bytes = summary_size_bytes; | |
4025 | ||
4026 | MALLOC (summary_table, uint8_t*, summary_size_bytes, M_TEMP, M_WAITOK); | |
4027 | if (summary_table == NULL) { | |
4028 | return ENOMEM; | |
4029 | } | |
4030 | bzero (summary_table, summary_size_bytes); | |
4031 | ||
4032 | /* enable the summary table */ | |
4033 | hfsmp->hfs_flags |= HFS_SUMMARY_TABLE; | |
4034 | hfsmp->hfs_summary_table = summary_table; | |
4035 | ||
4036 | if (ALLOC_DEBUG) { | |
4037 | if (hfsmp->hfs_summary_table == NULL) { | |
4038 | panic ("HFS Summary Init: no table for %p\n", hfsmp); | |
4039 | } | |
4040 | } | |
4041 | return 0; | |
4042 | } | |
4043 | ||
4044 | /* | |
4045 | * hfs_rebuild_summary | |
4046 | * | |
4047 | * This function should be used to allocate a new hunk of memory for use as a summary | |
4048 | * table, then copy the existing data into it. We use it whenever the filesystem's size | |
4049 | * changes. When a resize is in progress, you can still use the extant summary | |
4050 | * table if it is active. | |
4051 | * | |
4052 | * Inputs: | |
4053 | * hfsmp -- FS in question | |
4054 | * newlength -- new length of the FS in allocation blocks. | |
4055 | * | |
4056 | * Outputs: | |
4057 | * 0 on success, EINVAL on failure. If this function fails, the summary table | |
4058 | * will be disabled for future use. | |
4059 | * | |
4060 | */ | |
4061 | static int hfs_rebuild_summary (struct hfsmount *hfsmp) { | |
4062 | ||
4063 | uint32_t new_summary_size; | |
4064 | ||
4065 | new_summary_size = hfsmp->hfs_allocation_cp->c_blocks; | |
4066 | ||
4067 | ||
4068 | if (ALLOC_DEBUG) { | |
4069 | printf("HFS Summary Table Re-init: bitmap %u blocks\n", new_summary_size); | |
4070 | } | |
4071 | ||
4072 | /* | |
4073 | * If the bitmap IO size is not the same as the allocation block size, then re-compute | |
4074 | * the number of summary bits necessary. Note that above, the default size is the number | |
4075 | * of allocation blocks in the bitmap *FILE* (not the number of bits that the bitmap manages). | |
4076 | * If the allocation block size is large enough though, we may need to increase this, as | |
4077 | * bitmap IO is capped at 64k per IO | |
4078 | */ | |
4079 | if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) { | |
4080 | uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize; | |
4081 | lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize; | |
4082 | ||
4083 | /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */ | |
4084 | new_summary_size = (uint32_t) lrg_size; | |
4085 | } | |
4086 | ||
4087 | /* | |
4088 | * Ok, we have the new summary bitmap theoretical max size. See if it's the same as | |
4089 | * what we've got already... | |
4090 | */ | |
4091 | if (new_summary_size != hfsmp->hfs_summary_size) { | |
4092 | uint32_t summarybytes = new_summary_size / kBitsPerByte; | |
4093 | uint32_t copysize; | |
4094 | uint8_t *newtable; | |
4095 | /* Add one byte for slop */ | |
4096 | summarybytes++; | |
4097 | ||
4098 | if (ALLOC_DEBUG) { | |
4099 | printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, new_summary_size); | |
4100 | printf("HFS Summary Table Size (in bytes) %d \n", summarybytes); | |
4101 | } | |
4102 | ||
4103 | /* Attempt to MALLOC the memory */ | |
4104 | MALLOC (newtable, uint8_t*, summarybytes, M_TEMP, M_WAITOK); | |
4105 | if (newtable == NULL) { | |
4106 | /* | |
4107 | * ERROR! We need to disable the table now | |
4108 | */ | |
4109 | FREE (hfsmp->hfs_summary_table, M_TEMP); | |
4110 | hfsmp->hfs_summary_table = NULL; | |
4111 | hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE; | |
4112 | return EINVAL; | |
4113 | } | |
4114 | bzero (newtable, summarybytes); | |
4115 | ||
4116 | /* | |
4117 | * The new table may be smaller than the old one. If this is true, then | |
4118 | * we can't copy the full size of the existing summary table into the new | |
4119 | * one. | |
4120 | * | |
4121 | * The converse is not an issue since we bzeroed the table above. | |
4122 | */ | |
4123 | copysize = hfsmp->hfs_summary_bytes; | |
4124 | if (summarybytes < hfsmp->hfs_summary_bytes) { | |
4125 | copysize = summarybytes; | |
4126 | } | |
4127 | memcpy (newtable, hfsmp->hfs_summary_table, copysize); | |
4128 | ||
4129 | /* We're all good. Destroy the old copy and update ptrs */ | |
4130 | FREE (hfsmp->hfs_summary_table, M_TEMP); | |
4131 | ||
4132 | hfsmp->hfs_summary_table = newtable; | |
4133 | hfsmp->hfs_summary_size = new_summary_size; | |
4134 | hfsmp->hfs_summary_bytes = summarybytes; | |
4135 | } | |
4136 | ||
4137 | return 0; | |
4138 | } | |
4139 | ||
4140 | ||
4141 | #if ALLOC_DEBUG | |
4142 | /* | |
4143 | * hfs_validate_summary | |
4144 | * | |
4145 | * Validation routine for the summary table. Debug-only function. | |
4146 | * | |
4147 | * Bitmap lock must be held. | |
4148 | * | |
4149 | */ | |
4150 | void hfs_validate_summary (struct hfsmount *hfsmp) { | |
4151 | uint32_t i; | |
4152 | int err; | |
4153 | ||
4154 | /* | |
4155 | * Iterate over all of the bits in the summary table, and verify if | |
4156 | * there really are free blocks in the pages that we believe may | |
4157 | * may contain free blocks. | |
4158 | */ | |
4159 | ||
4160 | if (hfsmp->hfs_summary_table == NULL) { | |
4161 | panic ("HFS Summary: No HFS summary table!"); | |
4162 | } | |
4163 | ||
4164 | /* 131072 bits == 16384 bytes. This is the theoretical max size of the summary table. we add 1 byte for slop */ | |
4165 | if (hfsmp->hfs_summary_size == 0 || hfsmp->hfs_summary_size > 131080) { | |
4166 | panic("HFS Summary: Size is bad! %d", hfsmp->hfs_summary_size); | |
4167 | } | |
4168 | ||
4169 | if (hfsmp->vcbVBMIOSize == 0) { | |
4170 | panic("HFS Summary: no VCB VBM IO Size !"); | |
4171 | } | |
4172 | ||
4173 | printf("hfs: summary validation beginning on %s\n", hfsmp->vcbVN); | |
4174 | printf("hfs: summary validation %d summary bits, %d summary blocks\n", hfsmp->hfs_summary_size, hfsmp->totalBlocks); | |
4175 | ||
4176 | ||
4177 | /* iterate through all possible summary bits */ | |
4178 | for (i = 0; i < hfsmp->hfs_summary_size ; i++) { | |
4179 | ||
4180 | uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte; | |
4181 | uint32_t byte_offset = hfsmp->vcbVBMIOSize * i; | |
4182 | ||
4183 | /* Compute the corresponding allocation block for the summary bit. */ | |
4184 | uint32_t alloc_block = i * bits_per_iosize; | |
4185 | ||
4186 | /* | |
4187 | * We use a uint32_t pointer here because it will speed up | |
4188 | * access to the real bitmap data on disk. | |
4189 | */ | |
4190 | uint32_t *block_data; | |
4191 | struct buf *bp; | |
4192 | int counter; | |
4193 | int counter_max; | |
4194 | int saw_free_bits = 0; | |
4195 | ||
4196 | /* Get the block */ | |
4197 | if ((err = ReadBitmapRange (hfsmp, byte_offset, hfsmp->vcbVBMIOSize, &block_data, &bp))) { | |
4198 | panic ("HFS Summary: error (%d) in ReadBitmapRange!", err); | |
4199 | } | |
4200 | ||
4201 | /* Query the status of the bit and then make sure we match */ | |
4202 | uint32_t maybe_has_free_blocks; | |
4203 | err = hfs_check_summary (hfsmp, alloc_block, &maybe_has_free_blocks); | |
4204 | if (err) { | |
4205 | panic ("HFS Summary: hfs_check_summary returned error (%d) ", err); | |
4206 | } | |
4207 | counter_max = hfsmp->vcbVBMIOSize / kBytesPerWord; | |
4208 | ||
4209 | for (counter = 0; counter < counter_max; counter++) { | |
4210 | uint32_t word = block_data[counter]; | |
4211 | ||
4212 | /* We assume that we'll not find any free bits here. */ | |
4213 | if (word != kAllBitsSetInWord) { | |
4214 | if (maybe_has_free_blocks) { | |
4215 | /* All done */ | |
4216 | saw_free_bits = 1; | |
4217 | break; | |
4218 | } | |
4219 | else { | |
4220 | panic ("HFS Summary: hfs_check_summary saw free bits!"); | |
4221 | } | |
4222 | } | |
4223 | } | |
4224 | ||
4225 | if (maybe_has_free_blocks && (saw_free_bits == 0)) { | |
4226 | panic ("HFS Summary: did not see free bits !"); | |
4227 | } | |
4228 | ||
4229 | /* Release the block. */ | |
4230 | if ((err = ReleaseScanBitmapRange (bp))) { | |
4231 | panic ("HFS Summary: Error (%d) in ReleaseScanBitmapRange", err); | |
4232 | } | |
4233 | } | |
4234 | ||
4235 | printf("hfs: summary validation completed successfully on %s\n", hfsmp->vcbVN); | |
4236 | ||
4237 | return; | |
4238 | } | |
4239 | #endif | |
4240 | ||
4241 | /* | |
4242 | * hfs_alloc_scan_range: | |
4243 | * | |
4244 | * This function should be used to scan large ranges of the allocation bitmap | |
4245 | * at one time. It makes two key assumptions: | |
4246 | * | |
4247 | * 1) Bitmap lock is held during the duration of the call (exclusive) | |
4248 | * 2) There are no pages in the buffer cache for any of the bitmap | |
4249 | * blocks that we may encounter. It *MUST* be completely empty. | |
4250 | * | |
4251 | * The expected use case is when we are scanning the bitmap in full while we are | |
4252 | * still mounting the filesystem in order to issue TRIMs or build up the summary | |
4253 | * table for the mount point. It should be done after any potential journal replays | |
4254 | * are completed and their I/Os fully issued. | |
4255 | * | |
4256 | * The key reason for assumption (2) above is that this function will try to issue | |
4257 | * I/O against the bitmap file in chunks as large a possible -- essentially as | |
4258 | * much as the buffer layer will handle (1MB). Because the size of these I/Os | |
4259 | * is larger than what would be expected during normal runtime we must invalidate | |
4260 | * the buffers as soon as we are done with them so that they do not persist in | |
4261 | * the buffer cache for other threads to find, as they'll typically be doing | |
4262 | * allocation-block size I/Os instead. | |
4263 | * | |
4264 | * Input Args: | |
4265 | * hfsmp - hfs mount data structure | |
4266 | * startbit - allocation block # to start our scan. It must be aligned | |
4267 | * on a vcbVBMIOsize boundary. | |
4268 | * list - journal trim list data structure for issuing TRIMs | |
4269 | * | |
4270 | * Output Args: | |
4271 | * bitToScan - Return the next bit to scan if this function is called again. | |
4272 | * Caller will supply this into the next invocation | |
4273 | * of this call as 'startbit'. | |
4274 | */ | |
4275 | ||
4276 | static int hfs_alloc_scan_range(struct hfsmount *hfsmp, u_int32_t startbit, | |
4277 | u_int32_t *bitToScan, struct jnl_trim_list *list) { | |
4278 | ||
4279 | int error; | |
4280 | int readwrite = 1; | |
4281 | u_int32_t curAllocBlock; | |
4282 | struct buf *blockRef = NULL; | |
4283 | u_int32_t *buffer = NULL; | |
4284 | u_int32_t free_offset = 0; //tracks the start of the current free range | |
4285 | u_int32_t size = 0; // tracks the length of the current free range. | |
4286 | u_int32_t iosize = 0; //how much io we should generate against the bitmap | |
4287 | u_int32_t byte_off; // byte offset into the bitmap file. | |
4288 | u_int32_t completed_size; // how much io was actually completed | |
4289 | u_int32_t last_bitmap_block; | |
4290 | u_int32_t current_word; | |
4291 | u_int32_t word_index = 0; | |
4292 | ||
4293 | /* summary table building */ | |
4294 | uint32_t summary_bit = 0; | |
4295 | uint32_t saw_free_blocks = 0; | |
4296 | uint32_t last_marked = 0; | |
4297 | ||
4298 | if (hfsmp->hfs_flags & HFS_READ_ONLY) { | |
4299 | readwrite = 0; | |
4300 | } | |
4301 | ||
4302 | /* | |
4303 | * Compute how much I/O we should generate here. | |
4304 | * hfs_scan_range_size will validate that the start bit | |
4305 | * converted into a byte offset into the bitmap file, | |
4306 | * is aligned on a VBMIOSize boundary. | |
4307 | */ | |
4308 | error = hfs_scan_range_size (hfsmp, startbit, &iosize); | |
4309 | if (error) { | |
4310 | if (ALLOC_DEBUG) { | |
4311 | panic ("hfs_alloc_scan_range: hfs_scan_range_size error %d\n", error); | |
4312 | } | |
4313 | return error; | |
4314 | } | |
4315 | ||
4316 | if (iosize < hfsmp->vcbVBMIOSize) { | |
4317 | if (ALLOC_DEBUG) { | |
4318 | panic ("hfs_alloc_scan_range: iosize too small! (iosize %d)\n", iosize); | |
4319 | } | |
4320 | return EINVAL; | |
4321 | } | |
4322 | ||
4323 | /* hfs_scan_range_size should have verified startbit. Convert it to bytes */ | |
4324 | byte_off = startbit / kBitsPerByte; | |
4325 | ||
4326 | /* | |
4327 | * When the journal replays blocks, it does so by writing directly to the disk | |
4328 | * device (bypassing any filesystem vnodes and such). When it finishes its I/Os | |
4329 | * it also immediately re-reads and invalidates the range covered by the bp so | |
4330 | * it does not leave anything lingering in the cache (for iosize reasons). | |
4331 | * | |
4332 | * As such, it is safe to do large I/Os here with ReadBitmapRange. | |
4333 | * | |
4334 | * NOTE: It is not recommended, but it is possible to call the function below | |
4335 | * on sections of the bitmap that may be in core already as long as the pages are not | |
4336 | * dirty. In that case, we'd notice that something starting at that | |
4337 | * logical block of the bitmap exists in the metadata cache, and we'd check | |
4338 | * if the iosize requested is the same as what was already allocated for it. | |
4339 | * Odds are pretty good we're going to request something larger. In that case, | |
4340 | * we just free the existing memory associated with the buf and reallocate a | |
4341 | * larger range. This function should immediately invalidate it as soon as we're | |
4342 | * done scanning, so this shouldn't cause any coherency issues. | |
4343 | */ | |
4344 | ||
4345 | error = ReadBitmapRange(hfsmp, byte_off, iosize, &buffer, &blockRef); | |
4346 | if (error) { | |
4347 | if (ALLOC_DEBUG) { | |
4348 | panic ("hfs_alloc_scan_range: start %d iosize %d ReadBitmapRange error %d\n", startbit, iosize, error); | |
4349 | } | |
4350 | return error; | |
4351 | } | |
4352 | ||
4353 | /* | |
4354 | * At this point, we have a giant wired buffer that represents some portion of | |
4355 | * the bitmap file that we want to analyze. We may not have gotten all 'iosize' | |
4356 | * bytes though, so clip our ending bit to what we actually read in. | |
4357 | */ | |
4358 | completed_size = buf_count(blockRef); | |
4359 | last_bitmap_block = completed_size * kBitsPerByte; | |
4360 | last_bitmap_block = last_bitmap_block + startbit; | |
4361 | ||
4362 | /* Cap the last block to the total number of blocks if required */ | |
4363 | if (last_bitmap_block > hfsmp->totalBlocks) { | |
4364 | last_bitmap_block = hfsmp->totalBlocks; | |
4365 | } | |
4366 | ||
4367 | /* curAllocBlock represents the logical block we're analyzing. */ | |
4368 | curAllocBlock = startbit; | |
4369 | word_index = 0; | |
4370 | size = 0; | |
4371 | ||
4372 | if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { | |
4373 | if (hfs_get_summary_index (hfsmp, startbit, &summary_bit)) { | |
4374 | error = EINVAL; | |
4375 | if (ALLOC_DEBUG) { | |
4376 | panic ("hfs_alloc_scan_range: Could not acquire summary index for %u", startbit); | |
4377 | } | |
4378 | return error; | |
4379 | } | |
4380 | /* | |
4381 | * summary_bit should now be set to the summary bit corresponding to | |
4382 | * the allocation block of the first bit that we're supposed to scan | |
4383 | */ | |
4384 | } | |
4385 | saw_free_blocks = 0; | |
4386 | ||
4387 | while (curAllocBlock < last_bitmap_block) { | |
4388 | u_int32_t bit; | |
4389 | ||
4390 | /* Update the summary table as needed */ | |
4391 | if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { | |
4392 | if (ALLOC_DEBUG) { | |
4393 | if (hfsmp->hfs_summary_table == NULL) { | |
4394 | panic ("hfs_alloc_scan_range: no summary table!"); | |
4395 | } | |
4396 | } | |
4397 | ||
4398 | uint32_t temp_summary; | |
4399 | error = hfs_get_summary_index (hfsmp, curAllocBlock, &temp_summary); | |
4400 | if (error) { | |
4401 | if (ALLOC_DEBUG) { | |
4402 | panic ("hfs_alloc_scan_range: could not get summary index for %u", curAllocBlock); | |
4403 | } | |
4404 | return EINVAL; | |
4405 | } | |
4406 | ||
4407 | if (ALLOC_DEBUG) { | |
4408 | if (temp_summary < summary_bit) { | |
4409 | panic ("hfs_alloc_scan_range: backwards summary bit?\n"); | |
4410 | } | |
4411 | } | |
4412 | ||
4413 | /* | |
4414 | * If temp_summary is greater than summary_bit, then this | |
4415 | * means that the next allocation block crosses a vcbVBMIOSize boundary | |
4416 | * and we should treat this range of on-disk data as part of a new summary | |
4417 | * bit. | |
4418 | */ | |
4419 | if (temp_summary > summary_bit) { | |
4420 | if (saw_free_blocks == 0) { | |
4421 | /* Mark the bit as totally consumed in the summary table */ | |
4422 | hfs_set_summary (hfsmp, summary_bit, 1); | |
4423 | } | |
4424 | else { | |
4425 | /* Mark the bit as potentially free in summary table */ | |
4426 | hfs_set_summary (hfsmp, summary_bit, 0); | |
4427 | } | |
4428 | last_marked = summary_bit; | |
4429 | /* | |
4430 | * Any time we set the summary table, update our counter which tracks | |
4431 | * what the last bit that was fully marked in the summary table. | |
4432 | * | |
4433 | * Then reset our marker which says we haven't seen a free bit yet. | |
4434 | */ | |
4435 | saw_free_blocks = 0; | |
4436 | summary_bit = temp_summary; | |
4437 | } | |
4438 | } /* End summary table conditions */ | |
4439 | ||
4440 | current_word = SWAP_BE32(buffer[word_index]); | |
4441 | /* Iterate through the word 1 bit at a time... */ | |
4442 | for (bit = 0 ; bit < kBitsPerWord ; bit++, curAllocBlock++) { | |
4443 | if (curAllocBlock >= last_bitmap_block) { | |
4444 | break; | |
4445 | } | |
4446 | u_int32_t allocated = (current_word & (kHighBitInWordMask >> bit)); | |
4447 | ||
4448 | if (allocated) { | |
4449 | if (size != 0) { | |
4450 | if (readwrite) { | |
4451 | /* Insert the previously tracked range of free blocks to the trim list */ | |
4452 | hfs_track_unmap_blocks (hfsmp, free_offset, size, list); | |
4453 | } | |
4454 | add_free_extent_cache (hfsmp, free_offset, size); | |
4455 | size = 0; | |
4456 | free_offset = 0; | |
4457 | } | |
4458 | } | |
4459 | else { | |
4460 | /* Not allocated */ | |
4461 | size++; | |
4462 | if (free_offset == 0) { | |
4463 | /* Start a new run of free spcae at curAllocBlock */ | |
4464 | free_offset = curAllocBlock; | |
4465 | } | |
4466 | if (saw_free_blocks == 0) { | |
4467 | saw_free_blocks = 1; | |
4468 | } | |
4469 | } | |
4470 | } /* end for loop iterating through the word */ | |
4471 | ||
4472 | if (curAllocBlock < last_bitmap_block) { | |
4473 | word_index++; | |
4474 | } | |
4475 | ||
4476 | } /* End while loop (iterates through last_bitmap_block) */ | |
4477 | ||
4478 | ||
4479 | /* | |
4480 | * We've (potentially) completed our pass through this region of bitmap, | |
4481 | * but one thing we may not have done is updated that last summary bit for | |
4482 | * the last page we scanned, because we would have never transitioned across | |
4483 | * a vcbVBMIOSize boundary again. Check for that and update the last bit | |
4484 | * as needed. | |
4485 | * | |
4486 | * Note that 'last_bitmap_block' is *not* inclusive WRT the very last bit in the bitmap | |
4487 | * for the region of bitmap on-disk that we were scanning. (it is one greater). | |
4488 | */ | |
4489 | if ((curAllocBlock >= last_bitmap_block) && | |
4490 | (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) { | |
4491 | uint32_t temp_summary; | |
4492 | /* temp_block should be INSIDE the region we just scanned, so subtract 1 */ | |
4493 | uint32_t temp_block = last_bitmap_block - 1; | |
4494 | error = hfs_get_summary_index (hfsmp, temp_block, &temp_summary); | |
4495 | if (error) { | |
4496 | if (ALLOC_DEBUG) { | |
4497 | panic ("hfs_alloc_scan_range: end bit curAllocBlock %u, last_bitmap_block %u", curAllocBlock, last_bitmap_block); | |
4498 | } | |
4499 | return EINVAL; | |
4500 | } | |
4501 | ||
4502 | /* Did we already update this in the table? */ | |
4503 | if (temp_summary > last_marked) { | |
4504 | if (saw_free_blocks == 0) { | |
4505 | hfs_set_summary (hfsmp, temp_summary, 1); | |
4506 | } | |
4507 | else { | |
4508 | hfs_set_summary (hfsmp, temp_summary, 0); | |
4509 | } | |
4510 | } | |
4511 | } | |
4512 | ||
4513 | /* | |
4514 | * We may have been tracking a range of free blocks that hasn't been inserted yet. | |
4515 | * Keep the logic for the TRIM and free extent separate from that of the summary | |
4516 | * table management even though they are closely linked. | |
4517 | */ | |
4518 | if (size != 0) { | |
4519 | if (readwrite) { | |
4520 | hfs_track_unmap_blocks (hfsmp, free_offset, size, list); | |
4521 | } | |
4522 | add_free_extent_cache (hfsmp, free_offset, size); | |
4523 | } | |
4524 | ||
4525 | /* | |
4526 | * curAllocBlock represents the next block we need to scan when we return | |
4527 | * to this function. | |
4528 | */ | |
4529 | *bitToScan = curAllocBlock; | |
4530 | ReleaseScanBitmapRange(blockRef); | |
4531 | ||
6d2010ae | 4532 | return 0; |
39236c6e | 4533 | |
6d2010ae A |
4534 | } |
4535 | ||
316670eb | 4536 | |
39236c6e A |
4537 | |
4538 | /* | |
4539 | * Compute the maximum I/O size to generate against the bitmap file | |
4540 | * Will attempt to generate at LEAST VBMIOsize I/Os for interior ranges of the bitmap. | |
316670eb | 4541 | * |
39236c6e A |
4542 | * Inputs: |
4543 | * hfsmp -- hfsmount to look at | |
4544 | * bitmap_off -- bit offset into the bitmap file | |
4545 | * | |
4546 | * Outputs: | |
4547 | * iosize -- iosize to generate. | |
6d2010ae | 4548 | * |
316670eb | 4549 | * Returns: |
39236c6e | 4550 | * 0 on success; EINVAL otherwise |
6d2010ae | 4551 | */ |
39236c6e | 4552 | static int hfs_scan_range_size (struct hfsmount *hfsmp, uint32_t bitmap_st, uint32_t *iosize) { |
6d2010ae | 4553 | |
6d2010ae | 4554 | /* |
39236c6e A |
4555 | * The maximum bitmap size is 512MB regardless of ABN size, so we can get away |
4556 | * with 32 bit math in this function. | |
4557 | */ | |
4558 | ||
4559 | uint32_t bitmap_len; | |
4560 | uint32_t remaining_bitmap; | |
4561 | uint32_t target_iosize; | |
4562 | uint32_t bitmap_off; | |
4563 | ||
4564 | /* Is this bit index not word aligned? If so, immediately fail. */ | |
4565 | if (bitmap_st % kBitsPerWord) { | |
4566 | if (ALLOC_DEBUG) { | |
4567 | panic ("hfs_scan_range_size unaligned start bit! bitmap_st %d \n", bitmap_st); | |
4568 | } | |
4569 | return EINVAL; | |
6d2010ae | 4570 | } |
39236c6e A |
4571 | |
4572 | /* bitmap_off is in bytes, not allocation blocks/bits */ | |
4573 | bitmap_off = bitmap_st / kBitsPerByte; | |
4574 | ||
4575 | if ((hfsmp->totalBlocks <= bitmap_st) || (bitmap_off > (512 * 1024 * 1024))) { | |
4576 | if (ALLOC_DEBUG) { | |
4577 | panic ("hfs_scan_range_size: invalid start! bitmap_st %d, bitmap_off %d\n", bitmap_st, bitmap_off); | |
6d2010ae | 4578 | } |
39236c6e | 4579 | return EINVAL; |
6d2010ae | 4580 | } |
39236c6e A |
4581 | |
4582 | /* | |
4583 | * Also invalid if it's not at least aligned to HFS bitmap logical | |
4584 | * block boundaries. We don't have to emit an iosize that's an | |
4585 | * exact multiple of the VBMIOSize, but it must start on such | |
4586 | * a boundary. | |
4587 | * | |
4588 | * The vcbVBMIOSize may be SMALLER than the allocation block size | |
4589 | * on a FS with giant allocation blocks, but it will never be | |
4590 | * greater than it, so it should be safe to start I/O | |
4591 | * aligned on a VBMIOsize boundary. | |
4592 | */ | |
4593 | if (bitmap_off & (hfsmp->vcbVBMIOSize - 1)) { | |
4594 | if (ALLOC_DEBUG) { | |
4595 | panic ("hfs_scan_range_size: unaligned start! bitmap_off %d\n", bitmap_off); | |
4596 | } | |
4597 | return EINVAL; | |
6d2010ae | 4598 | } |
39236c6e | 4599 | |
6d2010ae | 4600 | /* |
39236c6e A |
4601 | * Generate the total bitmap file length in bytes, then round up |
4602 | * that value to the end of the last allocation block, if needed (It | |
4603 | * will probably be needed). We won't scan past the last actual | |
4604 | * allocation block. | |
4605 | * | |
4606 | * Unless we're completing the bitmap scan (or bitmap < 1MB), we | |
4607 | * have to complete the I/O on VBMIOSize boundaries, but we can only read | |
4608 | * up until the end of the bitmap file. | |
6d2010ae | 4609 | */ |
39236c6e A |
4610 | bitmap_len = hfsmp->totalBlocks / kBitsPerByte; |
4611 | if (bitmap_len % (hfsmp->blockSize)) { | |
4612 | bitmap_len = (bitmap_len / hfsmp->blockSize); | |
4613 | /* round up to the end of the next alloc block */ | |
4614 | bitmap_len++; | |
4615 | ||
4616 | /* Convert the # of alloc blocks back to bytes. */ | |
4617 | bitmap_len = bitmap_len * hfsmp->blockSize; | |
4618 | } | |
4619 | ||
4620 | remaining_bitmap = bitmap_len - bitmap_off; | |
4621 | ||
4622 | /* | |
4623 | * io size is the MIN of the maximum I/O we can generate or the | |
4624 | * remaining amount of bitmap. | |
4625 | */ | |
4626 | target_iosize = MIN((MAXBSIZE), remaining_bitmap); | |
4627 | *iosize = target_iosize; | |
4628 | ||
6d2010ae A |
4629 | return 0; |
4630 | } | |
4631 | ||
316670eb | 4632 | |
39236c6e A |
4633 | |
4634 | ||
316670eb A |
4635 | /* |
4636 | * This function is basically the same as hfs_isallocated, except it's designed for | |
4637 | * use with the red-black tree validation code. It assumes we're only checking whether | |
4638 | * one bit is active, and that we're going to pass in the buf to use, since GenerateTree | |
4639 | * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation. | |
4640 | * | |
4641 | * This should not be called in general purpose scanning code. | |
4642 | */ | |
4643 | int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) { | |
39236c6e | 4644 | |
316670eb A |
4645 | u_int32_t *currentWord; // Pointer to current word within bitmap block |
4646 | u_int32_t bitMask; // Word with given bits already set (ready to test) | |
4647 | u_int32_t firstBit; // Bit index within word of first bit to allocate | |
4648 | u_int32_t numBits; // Number of bits in word to allocate | |
4649 | u_int32_t bitsPerBlock; | |
4650 | uintptr_t blockRef; | |
4651 | u_int32_t wordsPerBlock; | |
4652 | u_int32_t numBlocks = 1; | |
4653 | u_int32_t *buffer = NULL; | |
4654 | ||
4655 | int inuse = 0; | |
4656 | int error; | |
39236c6e A |
4657 | |
4658 | ||
316670eb A |
4659 | if (bp_buf) { |
4660 | /* just use passed-in buffer if avail. */ | |
4661 | buffer = bp_buf; | |
4662 | } | |
4663 | else { | |
4664 | /* | |
4665 | * Pre-read the bitmap block containing the first word of allocation | |
4666 | */ | |
4667 | error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); | |
4668 | if (error) | |
4669 | return (error); | |
4670 | } | |
39236c6e | 4671 | |
316670eb A |
4672 | /* |
4673 | * Initialize currentWord, and wordsLeft. | |
4674 | */ | |
4675 | u_int32_t wordIndexInBlock; | |
39236c6e | 4676 | |
316670eb A |
4677 | bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte; |
4678 | wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord; | |
39236c6e | 4679 | |
316670eb A |
4680 | wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord; |
4681 | currentWord = buffer + wordIndexInBlock; | |
39236c6e | 4682 | |
316670eb A |
4683 | /* |
4684 | * First test any non word aligned bits. | |
4685 | */ | |
4686 | firstBit = startingBlock % kBitsPerWord; | |
4687 | bitMask = kAllBitsSetInWord >> firstBit; | |
4688 | numBits = kBitsPerWord - firstBit; | |
4689 | if (numBits > numBlocks) { | |
4690 | numBits = numBlocks; | |
4691 | bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); | |
4692 | } | |
4693 | if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { | |
4694 | inuse = 1; | |
4695 | goto Exit; | |
4696 | } | |
4697 | numBlocks -= numBits; | |
4698 | ++currentWord; | |
39236c6e | 4699 | |
316670eb A |
4700 | Exit: |
4701 | if(bp_buf == NULL) { | |
4702 | if (buffer) { | |
4703 | (void)ReleaseBitmapBlock(hfsmp, blockRef, false); | |
4704 | } | |
4705 | } | |
4706 | return (inuse); | |
6d2010ae A |
4707 | |
4708 | ||
6d2010ae | 4709 | |
91447636 A |
4710 | } |
4711 | ||
6d2010ae A |
4712 | /* |
4713 | * This function resets all of the data structures relevant to the | |
4714 | * free extent cache stored in the hfsmount struct. | |
4715 | * | |
4716 | * If we are using the red-black tree code then we need to account for the fact that | |
4717 | * we may encounter situations where we need to jettison the tree. If that is the | |
4718 | * case, then we fail-over to the bitmap scanning logic, but we need to ensure that | |
4719 | * the free ext cache is zeroed before we start using it. | |
d1ecb069 | 4720 | * |
6d2010ae A |
4721 | * We also reset and disable the cache when allocLimit is updated... which |
4722 | * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()). | |
4723 | * It is independent of the type of allocator being used currently. | |
d1ecb069 | 4724 | */ |
6d2010ae | 4725 | void ResetVCBFreeExtCache(struct hfsmount *hfsmp) |
d1ecb069 | 4726 | { |
6d2010ae A |
4727 | int bytes; |
4728 | void *freeExt; | |
1c79356b | 4729 | |
6d2010ae A |
4730 | if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) |
4731 | KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0); | |
4732 | ||
4733 | lck_spin_lock(&hfsmp->vcbFreeExtLock); | |
39236c6e | 4734 | |
6d2010ae A |
4735 | /* reset Free Extent Count */ |
4736 | hfsmp->vcbFreeExtCnt = 0; | |
39236c6e | 4737 | |
6d2010ae A |
4738 | /* reset the actual array */ |
4739 | bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor); | |
4740 | freeExt = (void*)(hfsmp->vcbFreeExt); | |
39236c6e | 4741 | |
6d2010ae | 4742 | bzero (freeExt, bytes); |
39236c6e | 4743 | |
6d2010ae A |
4744 | lck_spin_unlock(&hfsmp->vcbFreeExtLock); |
4745 | ||
4746 | if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) | |
4747 | KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0); | |
4748 | ||
4749 | return; | |
4750 | } | |
4751 | ||
4752 | /* | |
4753 | * This function is used to inform the allocator if we have to effectively shrink | |
4754 | * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs. | |
4755 | * | |
4756 | * The bitmap lock must be held when calling this function. This function also modifies the | |
4757 | * allocLimit field in the hfs mount point structure in the general case. | |
4758 | * | |
4759 | * In the shrinking case, we'll have to remove all free extents from the red-black | |
4760 | * tree past the specified offset new_end_block. In the growth case, we'll have to force | |
4761 | * a re-scan of the new allocation blocks from our current allocLimit to the new end block. | |
4762 | * | |
4763 | * new_end_block represents the total number of blocks available for allocation in the resized | |
4764 | * filesystem. Block #new_end_block should not be allocatable in the resized filesystem since it | |
4765 | * will be out of the (0, n-1) range that are indexable in the bitmap. | |
4766 | * | |
4767 | * Returns 0 on success | |
4768 | * errno on failure | |
4769 | */ | |
4770 | __private_extern__ | |
4771 | u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) { | |
39236c6e | 4772 | |
6d2010ae | 4773 | /* |
39236c6e | 4774 | * Update allocLimit to the argument specified |
6d2010ae A |
4775 | */ |
4776 | hfsmp->allocLimit = new_end_block; | |
4777 | ||
4778 | /* Invalidate the free extent cache completely so that | |
4779 | * it does not have any extents beyond end of current | |
4780 | * volume. | |
4781 | */ | |
4782 | ResetVCBFreeExtCache(hfsmp); | |
4783 | ||
39236c6e A |
4784 | /* Force a rebuild of the summary table. */ |
4785 | (void) hfs_rebuild_summary (hfsmp); | |
6d2010ae A |
4786 | |
4787 | return 0; | |
4788 | ||
4789 | } | |
4790 | ||
4791 | ||
316670eb A |
4792 | /* |
4793 | * Remove an extent from the list of free extents. | |
4794 | * | |
4795 | * This is a low-level routine. It does not handle overlaps or splitting; | |
4796 | * that is the responsibility of the caller. The input extent must exactly | |
4797 | * match an extent already in the list; it will be removed, and any following | |
4798 | * extents in the list will be shifted up. | |
4799 | * | |
4800 | * Inputs: | |
4801 | * startBlock - Start of extent to remove | |
4802 | * blockCount - Number of blocks in extent to remove | |
4803 | * | |
4804 | * Result: | |
4805 | * The index of the extent that was removed. | |
4806 | */ | |
4807 | static void remove_free_extent_list(struct hfsmount *hfsmp, int index) | |
4808 | { | |
4809 | if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) { | |
4810 | if (ALLOC_DEBUG) | |
4811 | panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt); | |
4812 | else | |
4813 | printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt); | |
4814 | return; | |
4815 | } | |
4816 | int shift_count = hfsmp->vcbFreeExtCnt - index - 1; | |
4817 | if (shift_count > 0) { | |
4818 | memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0])); | |
4819 | } | |
4820 | hfsmp->vcbFreeExtCnt--; | |
4821 | } | |
4822 | ||
4823 | ||
4824 | /* | |
4825 | * Add an extent to the list of free extents. | |
4826 | * | |
4827 | * This is a low-level routine. It does not handle overlaps or coalescing; | |
4828 | * that is the responsibility of the caller. This routine *does* make | |
4829 | * sure that the extent it is adding is inserted in the correct location. | |
4830 | * If the list is full, this routine will handle either removing the last | |
4831 | * extent in the list to make room for the new extent, or ignoring the | |
4832 | * new extent if it is "worse" than the last extent in the list. | |
4833 | * | |
4834 | * Inputs: | |
4835 | * startBlock - Start of extent to add | |
4836 | * blockCount - Number of blocks in extent to add | |
4837 | * | |
4838 | * Result: | |
4839 | * The index where the extent that was inserted, or kMaxFreeExtents | |
4840 | * if the extent was not inserted (the list was full, and the extent | |
4841 | * being added was "worse" than everything in the list). | |
4842 | */ | |
4843 | static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount) | |
4844 | { | |
4845 | uint32_t i; | |
39236c6e | 4846 | |
316670eb A |
4847 | /* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */ |
4848 | if (ALLOC_DEBUG) { | |
4849 | uint32_t endBlock = startBlock + blockCount; | |
4850 | for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) { | |
4851 | if (endBlock < hfsmp->vcbFreeExt[i].startBlock || | |
39236c6e A |
4852 | startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) { |
4853 | continue; | |
316670eb A |
4854 | } |
4855 | panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d", | |
39236c6e | 4856 | hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i); |
316670eb A |
4857 | } |
4858 | } | |
4859 | ||
4860 | /* Figure out what index the new extent should be inserted at. */ | |
4861 | for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) { | |
4862 | if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { | |
4863 | /* The list is sorted by increasing offset. */ | |
4864 | if (startBlock < hfsmp->vcbFreeExt[i].startBlock) { | |
4865 | break; | |
4866 | } | |
4867 | } else { | |
4868 | /* The list is sorted by decreasing size. */ | |
4869 | if (blockCount > hfsmp->vcbFreeExt[i].blockCount) { | |
4870 | break; | |
4871 | } | |
4872 | } | |
4873 | } | |
39236c6e | 4874 | |
316670eb A |
4875 | /* When we get here, i is the index where the extent should be inserted. */ |
4876 | if (i == kMaxFreeExtents) { | |
4877 | /* | |
4878 | * The new extent is worse than anything already in the list, | |
4879 | * and the list is full, so just ignore the extent to be added. | |
4880 | */ | |
4881 | return i; | |
4882 | } | |
39236c6e | 4883 | |
316670eb A |
4884 | /* |
4885 | * Grow the list (if possible) to make room for an insert. | |
4886 | */ | |
4887 | if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents) | |
4888 | hfsmp->vcbFreeExtCnt++; | |
39236c6e | 4889 | |
316670eb A |
4890 | /* |
4891 | * If we'll be keeping any extents after the insert position, then shift them. | |
4892 | */ | |
4893 | int shift_count = hfsmp->vcbFreeExtCnt - i - 1; | |
4894 | if (shift_count > 0) { | |
4895 | memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0])); | |
4896 | } | |
39236c6e | 4897 | |
316670eb A |
4898 | /* Finally, store the new extent at its correct position. */ |
4899 | hfsmp->vcbFreeExt[i].startBlock = startBlock; | |
4900 | hfsmp->vcbFreeExt[i].blockCount = blockCount; | |
4901 | return i; | |
4902 | } | |
4903 | ||
4904 | ||
6d2010ae A |
4905 | /* |
4906 | * Remove an entry from free extent cache after it has been allocated. | |
4907 | * | |
316670eb A |
4908 | * This is a high-level routine. It handles removing a portion of a |
4909 | * cached extent, potentially splitting it into two (if the cache was | |
4910 | * already full, throwing away the extent that would sort last). It | |
4911 | * also handles removing an extent that overlaps multiple extents in | |
4912 | * the cache. | |
6d2010ae A |
4913 | * |
4914 | * Inputs: | |
316670eb A |
4915 | * hfsmp - mount point structure |
4916 | * startBlock - starting block of the extent to be removed. | |
4917 | * blockCount - number of blocks of the extent to be removed. | |
6d2010ae A |
4918 | */ |
4919 | static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount) | |
4920 | { | |
316670eb A |
4921 | u_int32_t i, insertedIndex; |
4922 | u_int32_t currentStart, currentEnd, endBlock; | |
6d2010ae | 4923 | int extentsRemoved = 0; |
39236c6e | 4924 | |
6d2010ae A |
4925 | if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) |
4926 | KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0); | |
39236c6e | 4927 | |
316670eb | 4928 | endBlock = startBlock + blockCount; |
39236c6e | 4929 | |
6d2010ae | 4930 | lck_spin_lock(&hfsmp->vcbFreeExtLock); |
39236c6e | 4931 | |
316670eb A |
4932 | /* |
4933 | * Iterate over all of the extents in the free extent cache, removing or | |
4934 | * updating any entries that overlap with the input extent. | |
4935 | */ | |
4936 | for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) { | |
4937 | currentStart = hfsmp->vcbFreeExt[i].startBlock; | |
4938 | currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount; | |
39236c6e | 4939 | |
316670eb A |
4940 | /* |
4941 | * If the current extent is entirely before or entirely after the | |
4942 | * the extent to be removed, then we keep it as-is. | |
6d2010ae | 4943 | */ |
316670eb A |
4944 | if (currentEnd <= startBlock || currentStart >= endBlock) { |
4945 | continue; | |
4946 | } | |
39236c6e | 4947 | |
316670eb A |
4948 | /* |
4949 | * If the extent being removed entirely contains the current extent, | |
4950 | * then remove the current extent. | |
4951 | */ | |
4952 | if (startBlock <= currentStart && endBlock >= currentEnd) { | |
4953 | remove_free_extent_list(hfsmp, i); | |
39236c6e | 4954 | |
316670eb A |
4955 | /* |
4956 | * We just removed the extent at index i. The extent at | |
4957 | * index i+1 just got shifted to index i. So decrement i | |
4958 | * to undo the loop's "++i", and the next iteration will | |
4959 | * examine index i again, which contains the next extent | |
4960 | * in the list. | |
6d2010ae | 4961 | */ |
316670eb A |
4962 | --i; |
4963 | ++extentsRemoved; | |
4964 | continue; | |
6d2010ae | 4965 | } |
39236c6e | 4966 | |
316670eb A |
4967 | /* |
4968 | * If the extent being removed is strictly "in the middle" of the | |
4969 | * current extent, then we need to split the current extent into | |
4970 | * two discontiguous extents (the "head" and "tail"). The good | |
4971 | * news is that we don't need to examine any other extents in | |
4972 | * the list. | |
6d2010ae | 4973 | */ |
316670eb A |
4974 | if (startBlock > currentStart && endBlock < currentEnd) { |
4975 | remove_free_extent_list(hfsmp, i); | |
4976 | add_free_extent_list(hfsmp, currentStart, startBlock - currentStart); | |
4977 | add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock); | |
4978 | break; | |
4979 | } | |
39236c6e | 4980 | |
316670eb A |
4981 | /* |
4982 | * The only remaining possibility is that the extent to be removed | |
4983 | * overlaps the start or end (but not both!) of the current extent. | |
4984 | * So we need to replace the current extent with a shorter one. | |
4985 | * | |
4986 | * The only tricky part is that the updated extent might be at a | |
4987 | * different index than the original extent. If the updated extent | |
4988 | * was inserted after the current extent, then we need to re-examine | |
4989 | * the entry at index i, since it now contains the extent that was | |
4990 | * previously at index i+1. If the updated extent was inserted | |
4991 | * before or at the same index as the removed extent, then the | |
4992 | * following extents haven't changed position. | |
4993 | */ | |
4994 | remove_free_extent_list(hfsmp, i); | |
4995 | if (startBlock > currentStart) { | |
4996 | /* Remove the tail of the current extent. */ | |
4997 | insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart); | |
4998 | } else { | |
4999 | /* Remove the head of the current extent. */ | |
5000 | insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock); | |
5001 | } | |
5002 | if (insertedIndex > i) { | |
5003 | --i; /* Undo the "++i" in the loop, so we examine the entry at index i again. */ | |
5004 | } | |
6d2010ae | 5005 | } |
39236c6e | 5006 | |
6d2010ae | 5007 | lck_spin_unlock(&hfsmp->vcbFreeExtLock); |
39236c6e | 5008 | |
6d2010ae | 5009 | sanity_check_free_ext(hfsmp, 0); |
39236c6e | 5010 | |
6d2010ae A |
5011 | if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) |
5012 | KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0); | |
39236c6e | 5013 | |
d1ecb069 A |
5014 | return; |
5015 | } | |
5016 | ||
316670eb | 5017 | |
6d2010ae | 5018 | /* |
316670eb | 5019 | * Add an entry to free extent cache after it has been deallocated. |
d1ecb069 | 5020 | * |
316670eb A |
5021 | * This is a high-level routine. It will merge overlapping or contiguous |
5022 | * extents into a single, larger extent. | |
5023 | * | |
5024 | * If the extent provided has blocks beyond current allocLimit, it is | |
5025 | * clipped to allocLimit (so that we won't accidentally find and allocate | |
5026 | * space beyond allocLimit). | |
d1ecb069 | 5027 | * |
6d2010ae | 5028 | * Inputs: |
316670eb A |
5029 | * hfsmp - mount point structure |
5030 | * startBlock - starting block of the extent to be removed. | |
5031 | * blockCount - number of blocks of the extent to be removed. | |
6d2010ae A |
5032 | * |
5033 | * Returns: | |
316670eb A |
5034 | * true - if the extent was added successfully to the list |
5035 | * false - if the extent was not added to the list, maybe because | |
5036 | * the extent was beyond allocLimit, or is not best | |
5037 | * candidate to be put in the cache. | |
d1ecb069 | 5038 | */ |
316670eb | 5039 | static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount) |
d1ecb069 | 5040 | { |
6d2010ae | 5041 | Boolean retval = false; |
316670eb A |
5042 | uint32_t endBlock; |
5043 | uint32_t currentEnd; | |
5044 | uint32_t i; | |
39236c6e | 5045 | |
6d2010ae A |
5046 | if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) |
5047 | KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0); | |
39236c6e | 5048 | |
6d2010ae A |
5049 | /* No need to add extent that is beyond current allocLimit */ |
5050 | if (startBlock >= hfsmp->allocLimit) { | |
5051 | goto out_not_locked; | |
5052 | } | |
39236c6e | 5053 | |
6d2010ae A |
5054 | /* If end of the free extent is beyond current allocLimit, clip the extent */ |
5055 | if ((startBlock + blockCount) > hfsmp->allocLimit) { | |
5056 | blockCount = hfsmp->allocLimit - startBlock; | |
5057 | } | |
39236c6e | 5058 | |
6d2010ae | 5059 | lck_spin_lock(&hfsmp->vcbFreeExtLock); |
39236c6e | 5060 | |
316670eb A |
5061 | /* |
5062 | * Make a pass through the free extent cache, looking for known extents that | |
5063 | * overlap or are contiguous with the extent to be added. We'll remove those | |
5064 | * extents from the cache, and incorporate them into the new extent to be added. | |
6d2010ae | 5065 | */ |
316670eb A |
5066 | endBlock = startBlock + blockCount; |
5067 | for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) { | |
5068 | currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount; | |
5069 | if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) { | |
5070 | /* Extent i does not overlap and is not contiguous, so keep it. */ | |
5071 | continue; | |
6d2010ae | 5072 | } else { |
316670eb A |
5073 | /* We need to remove extent i and combine it with the input extent. */ |
5074 | if (hfsmp->vcbFreeExt[i].startBlock < startBlock) | |
5075 | startBlock = hfsmp->vcbFreeExt[i].startBlock; | |
5076 | if (currentEnd > endBlock) | |
5077 | endBlock = currentEnd; | |
39236c6e | 5078 | |
316670eb A |
5079 | remove_free_extent_list(hfsmp, i); |
5080 | /* | |
5081 | * We just removed the extent at index i. The extent at | |
5082 | * index i+1 just got shifted to index i. So decrement i | |
5083 | * to undo the loop's "++i", and the next iteration will | |
5084 | * examine index i again, which contains the next extent | |
5085 | * in the list. | |
5086 | */ | |
5087 | --i; | |
6d2010ae A |
5088 | } |
5089 | } | |
316670eb | 5090 | add_free_extent_list(hfsmp, startBlock, endBlock - startBlock); |
39236c6e | 5091 | |
6d2010ae | 5092 | lck_spin_unlock(&hfsmp->vcbFreeExtLock); |
39236c6e | 5093 | |
6d2010ae A |
5094 | out_not_locked: |
5095 | sanity_check_free_ext(hfsmp, 0); | |
39236c6e | 5096 | |
6d2010ae A |
5097 | if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) |
5098 | KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0); | |
39236c6e | 5099 | |
d1ecb069 A |
5100 | return retval; |
5101 | } | |
6d2010ae A |
5102 | |
5103 | /* Debug function to check if the free extent cache is good or not */ | |
5104 | static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated) | |
5105 | { | |
5106 | u_int32_t i, j; | |
5107 | ||
39236c6e A |
5108 | /* Do not do anything if debug is not on */ |
5109 | if (ALLOC_DEBUG == 0) { | |
6d2010ae A |
5110 | return; |
5111 | } | |
5112 | ||
5113 | lck_spin_lock(&hfsmp->vcbFreeExtLock); | |
39236c6e | 5114 | |
316670eb A |
5115 | if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents) |
5116 | panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt); | |
39236c6e | 5117 | |
6d2010ae A |
5118 | /* |
5119 | * Iterate the Free extent cache and ensure no entries are bogus or refer to | |
5120 | * allocated blocks. | |
5121 | */ | |
5122 | for(i=0; i < hfsmp->vcbFreeExtCnt; i++) { | |
5123 | u_int32_t start, nblocks; | |
5124 | ||
5125 | start = hfsmp->vcbFreeExt[i].startBlock; | |
5126 | nblocks = hfsmp->vcbFreeExt[i].blockCount; | |
5127 | ||
6d2010ae A |
5128 | /* Check if any of the blocks in free extent cache are allocated. |
5129 | * This should not be enabled always because it might take | |
5130 | * very long for large extents that get added to the list. | |
5131 | * | |
5132 | * We have to drop vcbFreeExtLock while we call hfs_isallocated | |
5133 | * because it is going to do I/O. Note that the free extent | |
5134 | * cache could change. That's a risk we take when using this | |
5135 | * debugging code. (Another alternative would be to try to | |
5136 | * detect when the free extent cache changed, and perhaps | |
5137 | * restart if the list changed while we dropped the lock.) | |
5138 | */ | |
5139 | if (check_allocated) { | |
5140 | lck_spin_unlock(&hfsmp->vcbFreeExtLock); | |
5141 | if (hfs_isallocated(hfsmp, start, nblocks)) { | |
5142 | panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n", | |
39236c6e | 5143 | hfsmp, i, start, nblocks); |
6d2010ae A |
5144 | } |
5145 | lck_spin_lock(&hfsmp->vcbFreeExtLock); | |
5146 | } | |
5147 | ||
5148 | /* Check if any part of the extent is beyond allocLimit */ | |
5149 | if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) { | |
5150 | panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n", | |
5151 | hfsmp, i, start, nblocks, hfsmp->allocLimit); | |
5152 | } | |
5153 | ||
5154 | /* Check if there are any duplicate start blocks */ | |
5155 | for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) { | |
5156 | if (start == hfsmp->vcbFreeExt[j].startBlock) { | |
5157 | panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n", | |
39236c6e A |
5158 | hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock, |
5159 | hfsmp->vcbFreeExt[j].blockCount); | |
6d2010ae A |
5160 | } |
5161 | } | |
5162 | ||
5163 | /* Check if the entries are out of order */ | |
5164 | if ((i+1) != hfsmp->vcbFreeExtCnt) { | |
5165 | if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { | |
5166 | /* sparse devices are sorted by starting block number (ascending) */ | |
5167 | if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) { | |
5168 | panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n", | |
39236c6e A |
5169 | hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, |
5170 | hfsmp->vcbFreeExt[i+1].blockCount); | |
6d2010ae A |
5171 | } |
5172 | } else { | |
5173 | /* normally sorted by block count (descending) */ | |
5174 | if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) { | |
5175 | panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n", | |
39236c6e A |
5176 | hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, |
5177 | hfsmp->vcbFreeExt[i+1].blockCount); | |
6d2010ae A |
5178 | } |
5179 | } | |
5180 | } | |
5181 | } | |
5182 | lck_spin_unlock(&hfsmp->vcbFreeExtLock); | |
5183 | } | |
39236c6e | 5184 | |
04b8595b A |
5185 | #define BIT_RIGHT_MASK(bit) (0xffffffffffffffffull >> (bit)) |
5186 | #define kHighBitInDoubleWordMask 0x8000000000000000ull | |
5187 | ||
5188 | static int clzll(uint64_t x) | |
5189 | { | |
5190 | if (x == 0) | |
5191 | return 64; | |
5192 | else | |
5193 | return __builtin_clzll(x); | |
5194 | } | |
5195 | ||
5196 | #if !HFS_ALLOC_TEST | |
5197 | ||
5198 | static errno_t get_more_bits(bitmap_context_t *bitmap_ctx) | |
5199 | { | |
5200 | uint32_t start_bit; | |
5201 | uint32_t iosize = 0; | |
5202 | uint32_t byte_offset; | |
5203 | uint32_t last_bitmap_block; | |
5204 | int error; | |
5205 | struct hfsmount *hfsmp = bitmap_ctx->hfsmp; | |
5206 | #if !HFS_ALLOC_TEST | |
5207 | uint64_t lock_elapsed; | |
5208 | #endif | |
5209 | ||
5210 | ||
5211 | if (bitmap_ctx->bp) | |
5212 | ReleaseScanBitmapRange(bitmap_ctx->bp); | |
5213 | ||
5214 | if (msleep(NULL, NULL, PINOD | PCATCH, | |
5215 | "hfs_fsinfo", NULL) == EINTR) { | |
5216 | return EINTR; | |
5217 | } | |
5218 | ||
5219 | #if !HFS_ALLOC_TEST | |
5220 | /* | |
5221 | * Let someone else use the allocation map after we've processed over HFS_FSINFO_MAX_LOCKHELD_TIME . | |
5222 | * lock_start is initialized in hfs_find_free_extents(). | |
5223 | */ | |
5224 | absolutetime_to_nanoseconds(mach_absolute_time() - bitmap_ctx->lock_start, &lock_elapsed); | |
5225 | ||
5226 | if (lock_elapsed >= HFS_FSINFO_MAX_LOCKHELD_TIME) { | |
5227 | ||
5228 | hfs_systemfile_unlock(hfsmp, bitmap_ctx->lockflags); | |
5229 | ||
5230 | /* add tsleep here to force context switch and fairness */ | |
5231 | tsleep((caddr_t)get_more_bits, PRIBIO, "hfs_fsinfo", 1); | |
5232 | ||
5233 | hfs_journal_lock(hfsmp); | |
5234 | ||
5235 | /* Flush the journal and wait for all I/Os to finish up */ | |
5236 | error = hfs_journal_flush(hfsmp, TRUE); | |
5237 | if (error) { | |
5238 | hfs_journal_unlock(hfsmp); | |
5239 | return error; | |
5240 | } | |
5241 | ||
5242 | /* | |
5243 | * Take bitmap lock to ensure it is not being modified while journal is still held. | |
5244 | * Since we are reading larger than normal blocks from the bitmap, which | |
5245 | * might confuse other parts of the bitmap code using normal blocks, we | |
5246 | * take exclusive lock here. | |
5247 | */ | |
5248 | bitmap_ctx->lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); | |
5249 | ||
5250 | bitmap_ctx->lock_start = mach_absolute_time(); | |
5251 | ||
5252 | /* Release the journal lock */ | |
5253 | hfs_journal_unlock(hfsmp); | |
5254 | ||
5255 | /* | |
5256 | * Bitmap is read in large block size (up to 1MB), | |
5257 | * unlike the runtime which reads the bitmap in the | |
5258 | * 4K block size. If the bitmap is read by both ways | |
5259 | * at the same time, it can result in multiple buf_t with | |
5260 | * different sizes and potentially case data corruption. | |
5261 | * To avoid this, we invalidate all the existing buffers | |
5262 | * associated with the bitmap vnode. | |
5263 | */ | |
5264 | error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0); | |
5265 | if (error) { | |
5266 | /* hfs_systemfile_unlock will be called in the caller */ | |
5267 | return error; | |
5268 | } | |
5269 | } | |
5270 | #endif | |
5271 | ||
5272 | start_bit = bitmap_ctx->run_offset; | |
5273 | ||
5274 | if (start_bit >= bitmap_ctx->hfsmp->totalBlocks) { | |
5275 | bitmap_ctx->chunk_end = 0; | |
5276 | bitmap_ctx->bp = NULL; | |
5277 | bitmap_ctx->bitmap = NULL; | |
5278 | return 0; | |
5279 | } | |
5280 | ||
5281 | assert(start_bit % 8 == 0); | |
5282 | ||
5283 | /* | |
5284 | * Compute how much I/O we should generate here. | |
5285 | * hfs_scan_range_size will validate that the start bit | |
5286 | * converted into a byte offset into the bitmap file, | |
5287 | * is aligned on a VBMIOSize boundary. | |
5288 | */ | |
5289 | error = hfs_scan_range_size (bitmap_ctx->hfsmp, start_bit, &iosize); | |
5290 | if (error) | |
5291 | return error; | |
5292 | ||
5293 | /* hfs_scan_range_size should have verified startbit. Convert it to bytes */ | |
5294 | byte_offset = start_bit / kBitsPerByte; | |
5295 | ||
5296 | /* | |
5297 | * When the journal replays blocks, it does so by writing directly to the disk | |
5298 | * device (bypassing any filesystem vnodes and such). When it finishes its I/Os | |
5299 | * it also immediately re-reads and invalidates the range covered by the bp so | |
5300 | * it does not leave anything lingering in the cache (for iosize reasons). | |
5301 | * | |
5302 | * As such, it is safe to do large I/Os here with ReadBitmapRange. | |
5303 | * | |
5304 | * NOTE: It is not recommended, but it is possible to call the function below | |
5305 | * on sections of the bitmap that may be in core already as long as the pages are not | |
5306 | * dirty. In that case, we'd notice that something starting at that | |
5307 | * logical block of the bitmap exists in the metadata cache, and we'd check | |
5308 | * if the iosize requested is the same as what was already allocated for it. | |
5309 | * Odds are pretty good we're going to request something larger. In that case, | |
5310 | * we just free the existing memory associated with the buf and reallocate a | |
5311 | * larger range. This function should immediately invalidate it as soon as we're | |
5312 | * done scanning, so this shouldn't cause any coherency issues. | |
5313 | */ | |
5314 | error = ReadBitmapRange(bitmap_ctx->hfsmp, byte_offset, iosize, (uint32_t **)&bitmap_ctx->bitmap, &bitmap_ctx->bp); | |
5315 | if (error) | |
5316 | return error; | |
5317 | ||
5318 | /* | |
5319 | * At this point, we have a giant wired buffer that represents some portion of | |
5320 | * the bitmap file that we want to analyze. We may not have gotten all 'iosize' | |
5321 | * bytes though, so clip our ending bit to what we actually read in. | |
5322 | */ | |
5323 | last_bitmap_block = start_bit + buf_count(bitmap_ctx->bp) * kBitsPerByte; | |
5324 | ||
5325 | /* Cap the last block to the total number of blocks if required */ | |
5326 | if (last_bitmap_block > bitmap_ctx->hfsmp->totalBlocks) | |
5327 | last_bitmap_block = bitmap_ctx->hfsmp->totalBlocks; | |
5328 | ||
5329 | bitmap_ctx->chunk_current = 0; // new chunk of bitmap | |
5330 | bitmap_ctx->chunk_end = last_bitmap_block - start_bit; | |
5331 | ||
5332 | return 0; | |
5333 | } | |
5334 | ||
5335 | #endif // !HFS_ALLOC_TEST | |
5336 | ||
5337 | // Returns number of contiguous bits set at start | |
5338 | static int bit_count_set(void *bitmap, int start, int end) | |
5339 | { | |
5340 | if (start == end) | |
5341 | return 0; | |
5342 | ||
5343 | assert(end > start); | |
5344 | ||
5345 | const int start_bit = start & 63; | |
5346 | const int end_bit = end & 63; | |
5347 | ||
5348 | uint64_t *p = (uint64_t *)bitmap + start / 64; | |
5349 | uint64_t x = ~OSSwapBigToHostInt64(*p); | |
5350 | ||
5351 | if ((start & ~63) == (end & ~63)) { | |
5352 | // Start and end in same 64 bits | |
5353 | x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit); | |
5354 | return clzll(x) - start_bit; | |
5355 | } | |
5356 | ||
5357 | // Deal with initial unaligned bit | |
5358 | x &= BIT_RIGHT_MASK(start_bit); | |
5359 | ||
5360 | if (x) | |
5361 | return clzll(x) - start_bit; | |
5362 | ||
5363 | // Go fast | |
5364 | ++p; | |
5365 | int count = 64 - start_bit; | |
5366 | int nquads = (end - end_bit - start - 1) / 64; | |
5367 | ||
5368 | while (nquads--) { | |
5369 | if (*p != 0xffffffffffffffffull) { | |
5370 | x = ~OSSwapBigToHostInt64(*p); | |
5371 | return count + clzll(x); | |
5372 | } | |
5373 | ++p; | |
5374 | count += 64; | |
5375 | } | |
5376 | ||
5377 | if (end_bit) { | |
5378 | x = ~OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit); | |
5379 | count += clzll(x); | |
5380 | } | |
5381 | ||
5382 | return count; | |
5383 | } | |
5384 | ||
5385 | /* Returns the number of a run of cleared bits: | |
5386 | * bitmap is a single chunk of memory being examined | |
5387 | * start: the start bit relative to the current buffer to be examined; start is inclusive. | |
5388 | * end: the end bit relative to the current buffer to be examined; end is not inclusive. | |
5389 | */ | |
5390 | static int bit_count_clr(void *bitmap, int start, int end) | |
5391 | { | |
5392 | if (start == end) | |
5393 | return 0; | |
5394 | ||
5395 | assert(end > start); | |
5396 | ||
5397 | const int start_bit = start & 63; | |
5398 | const int end_bit = end & 63; | |
5399 | ||
5400 | uint64_t *p = (uint64_t *)bitmap + start / 64; | |
5401 | uint64_t x = OSSwapBigToHostInt64(*p); | |
5402 | ||
5403 | if ((start & ~63) == (end & ~63)) { | |
5404 | // Start and end in same 64 bits | |
5405 | x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit); | |
5406 | ||
5407 | return clzll(x) - start_bit; | |
5408 | } | |
5409 | ||
5410 | // Deal with initial unaligned bit | |
5411 | x &= BIT_RIGHT_MASK(start_bit); | |
5412 | ||
5413 | if (x) | |
5414 | return clzll(x) - start_bit; | |
5415 | ||
5416 | // Go fast | |
5417 | ++p; | |
5418 | int count = 64 - start_bit; | |
5419 | int nquads = (end - end_bit - start - 1) / 64; | |
5420 | ||
5421 | while (nquads--) { | |
5422 | if (*p) { | |
5423 | x = OSSwapBigToHostInt64(*p); | |
5424 | return count + clzll(x); | |
5425 | } | |
5426 | ++p; | |
5427 | count += 64; | |
5428 | } | |
5429 | ||
5430 | if (end_bit) { | |
5431 | x = OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit); | |
5432 | ||
5433 | count += clzll(x); | |
5434 | } | |
5435 | ||
5436 | return count; | |
5437 | } | |
5438 | ||
5439 | #if !HFS_ALLOC_TEST | |
5440 | static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set) | |
5441 | { | |
5442 | uint32_t end, start_summary_bit, end_summary_bit; | |
5443 | errno_t error = 0; | |
5444 | ||
5445 | if (count == 0) | |
5446 | goto out; | |
5447 | ||
5448 | if (!ISSET(bitmap_ctx->hfsmp->hfs_flags, HFS_SUMMARY_TABLE)) | |
5449 | return 0; | |
5450 | ||
5451 | if (hfs_get_summary_index (bitmap_ctx->hfsmp, start, &start_summary_bit)) { | |
5452 | error = EINVAL; | |
5453 | goto out; | |
5454 | } | |
5455 | ||
5456 | end = start + count - 1; | |
5457 | if (hfs_get_summary_index (bitmap_ctx->hfsmp, end, &end_summary_bit)) { | |
5458 | error = EINVAL; | |
5459 | goto out; | |
5460 | } | |
5461 | ||
5462 | // if summary table bit has been updated with free block previously, leave it. | |
5463 | if ((start_summary_bit == bitmap_ctx->last_free_summary_bit) && set) | |
5464 | start_summary_bit++; | |
5465 | ||
5466 | for (uint32_t summary_bit = start_summary_bit; summary_bit <= end_summary_bit; summary_bit++) | |
5467 | hfs_set_summary (bitmap_ctx->hfsmp, summary_bit, set); | |
5468 | ||
5469 | if (!set) | |
5470 | bitmap_ctx->last_free_summary_bit = end_summary_bit; | |
5471 | ||
5472 | out: | |
5473 | return error; | |
5474 | ||
5475 | } | |
5476 | #endif //!HFS_ALLOC_TEST | |
5477 | ||
5478 | /* | |
5479 | * Read in chunks of the bitmap into memory, and find a run of cleared/set bits; | |
5480 | * the run can extend across chunk boundaries. | |
5481 | * bit_count_clr can be passed to get a run of cleared bits. | |
5482 | * bit_count_set can be passed to get a run of set bits. | |
5483 | */ | |
5484 | static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count) | |
5485 | { | |
5486 | int count; | |
5487 | errno_t error = 0; | |
5488 | ||
5489 | *bit_count = 0; | |
5490 | ||
5491 | do { | |
5492 | if (bitmap_ctx->run_offset == 0 || bitmap_ctx->chunk_current == bitmap_ctx->chunk_end) { | |
5493 | if ((error = get_more_bits(bitmap_ctx)) != 0) | |
5494 | goto out; | |
5495 | } | |
5496 | ||
5497 | if (bitmap_ctx->chunk_end == 0) | |
5498 | break; | |
5499 | ||
5500 | count = fn(bitmap_ctx->bitmap, bitmap_ctx->chunk_current, bitmap_ctx->chunk_end); | |
5501 | ||
5502 | bitmap_ctx->run_offset += count; | |
5503 | bitmap_ctx->chunk_current += count; | |
5504 | *bit_count += count; | |
5505 | ||
5506 | } while (bitmap_ctx->chunk_current >= bitmap_ctx->chunk_end && count); | |
5507 | ||
5508 | out: | |
5509 | return error; | |
5510 | ||
5511 | } | |
5512 | ||
5513 | // Returns count of number of bits clear | |
5514 | static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count) | |
5515 | { | |
5516 | return hfs_bit_count(bitmap_ctx, bit_count_clr, count); | |
5517 | } | |
5518 | ||
5519 | // Returns count of number of bits set | |
5520 | static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count) | |
5521 | { | |
5522 | return hfs_bit_count(bitmap_ctx, bit_count_set, count); | |
5523 | } | |
5524 | ||
5525 | static uint32_t hfs_bit_offset(bitmap_context_t *bitmap_ctx) | |
5526 | { | |
5527 | return bitmap_ctx->run_offset; | |
5528 | } | |
5529 | ||
5530 | /* | |
5531 | * Perform a full scan of the bitmap file. | |
5532 | * Note: during the scan of bitmap file, it may drop and reacquire the | |
5533 | * bitmap lock to let someone else use the bitmap for fairness. | |
5534 | * Currently it is used by HFS_GET_FSINFO statistic gathing, which | |
5535 | * is run while other processes might perform HFS operations. | |
5536 | */ | |
5537 | ||
5538 | errno_t hfs_find_free_extents(struct hfsmount *hfsmp, | |
5539 | void (*callback)(void *data, off_t free_extent_size), void *callback_arg) | |
5540 | { | |
5541 | struct bitmap_context bitmap_ctx; | |
5542 | uint32_t count; | |
5543 | errno_t error = 0; | |
5544 | ||
5545 | if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) { | |
5546 | error = hfs_init_summary(hfsmp); | |
5547 | if (error) | |
5548 | return error; | |
5549 | } | |
5550 | ||
5551 | bzero(&bitmap_ctx, sizeof(struct bitmap_context)); | |
5552 | ||
5553 | /* | |
5554 | * The journal maintains list of recently deallocated blocks to | |
5555 | * issue DKIOCUNMAPs when the corresponding journal transaction is | |
5556 | * flushed to the disk. To avoid any race conditions, we only | |
5557 | * want one active trim list. Therefore we make sure that the | |
5558 | * journal trim list is sync'ed, empty, and not modifiable for | |
5559 | * the duration of our scan. | |
5560 | * | |
5561 | * Take the journal lock before flushing the journal to the disk. | |
5562 | * We will keep on holding the journal lock till we don't get the | |
5563 | * bitmap lock to make sure that no new journal transactions can | |
5564 | * start. This will make sure that the journal trim list is not | |
5565 | * modified after the journal flush and before getting bitmap lock. | |
5566 | * We can release the journal lock after we acquire the bitmap | |
5567 | * lock as it will prevent any further block deallocations. | |
5568 | */ | |
5569 | hfs_journal_lock(hfsmp); | |
5570 | ||
5571 | /* Flush the journal and wait for all I/Os to finish up */ | |
5572 | error = hfs_journal_flush(hfsmp, TRUE); | |
5573 | if (error) { | |
5574 | hfs_journal_unlock(hfsmp); | |
5575 | return error; | |
5576 | } | |
5577 | ||
5578 | /* | |
5579 | * Take bitmap lock to ensure it is not being modified. | |
5580 | * Since we are reading larger than normal blocks from the bitmap, which | |
5581 | * might confuse other parts of the bitmap code using normal blocks, we | |
5582 | * take exclusive lock here. | |
5583 | */ | |
5584 | bitmap_ctx.lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); | |
5585 | ||
5586 | #if !HFS_ALLOC_TEST | |
5587 | bitmap_ctx.lock_start = mach_absolute_time(); | |
5588 | #endif | |
5589 | ||
5590 | /* Release the journal lock */ | |
5591 | hfs_journal_unlock(hfsmp); | |
5592 | ||
5593 | /* | |
5594 | * Bitmap is read in large block size (up to 1MB), | |
5595 | * unlike the runtime which reads the bitmap in the | |
5596 | * 4K block size. If the bitmap is read by both ways | |
5597 | * at the same time, it can result in multiple buf_t with | |
5598 | * different sizes and potentially case data corruption. | |
5599 | * To avoid this, we invalidate all the existing buffers | |
5600 | * associated with the bitmap vnode. | |
5601 | */ | |
5602 | error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0); | |
5603 | if (error) | |
5604 | goto out; | |
5605 | ||
5606 | /* | |
5607 | * Get the list of all free extent ranges. hfs_alloc_scan_range() | |
5608 | * will call hfs_fsinfo_data_add() to account for all the free | |
5609 | * extent ranges found during scan. | |
5610 | */ | |
5611 | bitmap_ctx.hfsmp = hfsmp; | |
5612 | bitmap_ctx.run_offset = 0; | |
5613 | ||
5614 | while (bitmap_ctx.run_offset < hfsmp->totalBlocks) { | |
5615 | ||
5616 | uint32_t start = hfs_bit_offset(&bitmap_ctx); | |
5617 | ||
5618 | if ((error = hfs_bit_count_clr(&bitmap_ctx, &count)) != 0) | |
5619 | goto out; | |
5620 | ||
5621 | if (count) | |
5622 | callback(callback_arg, hfs_blk_to_bytes(count, hfsmp->blockSize)); | |
5623 | ||
5624 | if ((error = update_summary_table(&bitmap_ctx, start, count, false)) != 0) | |
5625 | goto out; | |
5626 | ||
5627 | start = hfs_bit_offset(&bitmap_ctx); | |
5628 | ||
5629 | if ((error = hfs_bit_count_set(&bitmap_ctx, &count)) != 0) | |
5630 | goto out; | |
5631 | ||
5632 | if ((error = update_summary_table(&bitmap_ctx, start, count, true)) != 0) | |
5633 | goto out; | |
5634 | } | |
5635 | ||
5636 | out: | |
5637 | if (bitmap_ctx.lockflags) { | |
5638 | hfs_systemfile_unlock(hfsmp, bitmap_ctx.lockflags); | |
5639 | } | |
5640 | ||
5641 | return error; | |
5642 | } | |
5643 |