]> git.saurik.com Git - apple/xnu.git/blame - bsd/hfs/hfscommon/Misc/VolumeAllocation.c
xnu-1699.32.7.tar.gz
[apple/xnu.git] / bsd / hfs / hfscommon / Misc / VolumeAllocation.c
CommitLineData
1c79356b 1/*
6d2010ae 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 File: VolumeAllocation.c
30
31 Contains: Routines for accessing and modifying the volume bitmap.
32
33 Version: HFS Plus 1.0
34
6d2010ae 35 Copyright: ÔøΩ 1996-2009 by Apple Computer, Inc., all rights reserved.
0b4e3aa0 36
1c79356b
A
37*/
38
39/*
40Public routines:
41 BlockAllocate
42 Allocate space on a volume. Can allocate space contiguously.
43 If not contiguous, then allocation may be less than what was
44 asked for. Returns the starting block number, and number of
45 blocks. (Will only do a single extent???)
46 BlockDeallocate
47 Deallocate a contiguous run of allocation blocks.
6d2010ae
A
48
49 BlockMarkAllocated
50 Exported wrapper to mark blocks as in-use. This will correctly determine
51 whether or not the red-black tree is enabled and call the appropriate function
52 if applicable.
1c79356b 53 BlockMarkFree
6d2010ae
A
54 Exported wrapper to mark blocks as freed. This will correctly determine whether or
55 not the red-black tree is enabled and call the appropriate function if applicable.
56
57
58 ResetVCBFreeExtCache
59 Since the red-black tree obviates the need to maintain the free extent cache, we do
60 not update it if the tree is also live. As a result, if we ever need to destroy the trees
61 we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
62 bitmap scanning allocator.
63 We also reset and disable the free extent cache when volume resizing is
64 in flight.
65
66 UpdateAllocLimit
67 Adjusts the AllocLimit field in the hfs mount point. This is used when we need to prevent
68 allocations from occupying space in the region we are modifying during a filesystem resize.
69 At other times, it should be consistent with the total number of allocation blocks in the
70 filesystem. It is also used to shrink or grow the number of blocks that the red-black tree should
71 know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
72 number of items in the tree that we can allocate from.
73
74Internal routines:
75 Note that the RBTree routines are guarded by a cpp check for CONFIG_HFS_ALLOC_RBTREE. This
76 is to cut down on space for functions that could not possibly be used if they are not planning to
77 use the red-black tree code.
78
79 BlockMarkFreeRBTree
80 Make an internal call to BlockMarkFree and then update
81 and/or create Red-Black Tree allocation tree nodes to correspond
82 to the free space being generated.
83 BlockMarkFreeInternal
1c79356b 84 Mark a contiguous range of blocks as free. The corresponding
6d2010ae
A
85 bits in the volume bitmap will be cleared. This will actually do the work
86 of modifying the bitmap for us.
87
88 BlockMarkAllocatedRBTree
89 Make an internal call to BlockAllocateMarked, which will update the
90 bitmap on-disk when we allocate blocks. If that is successful, then
91 we'll remove the appropriate entries from the red-black tree.
92 BlockMarkAllocatedInternal
1c79356b
A
93 Mark a contiguous range of blocks as allocated. The cor-
94 responding bits in the volume bitmap are set. Also tests to see
6d2010ae
A
95 if any of the blocks were previously unallocated.
96 BlockFindContiguous
1c79356b
A
97 Find a contiguous range of blocks of a given size. The caller
98 specifies where to begin the search (by block number). The
6d2010ae
A
99 block number of the first block in the range is returned. This is only
100 called by the bitmap scanning logic as the red-black tree should be able
101 to do this internally by searching its tree.
0b4e3aa0
A
102 BlockAllocateAny
103 Find and allocate a contiguous range of blocks up to a given size. The
104 first range of contiguous free blocks found are allocated, even if there
105 are fewer blocks than requested (and even if a contiguous range of blocks
106 of the given size exists elsewhere).
6d2010ae
A
107 BlockAllocateAnyBitmap
108 Finds a range of blocks per the above requirements without using the
109 Allocation RB Tree. This relies on the bitmap-scanning logic in order to find
110 any valid range of free space needed.
111 BlockAllocateAnyRBTree
112 Finds a valid range of blocks per the above requirements by searching
113 the red-black tree. We can just make an internal call to
114 BlockAllocateContigRBTree to find the valid range.
1c79356b
A
115 BlockAllocateContig
116 Find and allocate a contiguous range of blocks of a given size. If
117 a contiguous range of free blocks of the given size isn't found, then
6d2010ae
A
118 the allocation fails (i.e. it is "all or nothing"). This routine is
119 essentially a wrapper function around its related sub-functions,
120 BlockAllocateContigBitmap and BlockAllocateContigRBTree, which use,
121 respectively, the original HFS+ bitmap scanning logic and the new
122 Red-Black Tree to search and manage free-space decisions. This function
123 contains logic for when to use which of the allocation algorithms,
124 depending on the free space contained in the volume.
125 BlockAllocateContigBitmap
126 Finds and allocates a range of blocks specified by the size parameters
127 using the original HFS+ bitmap scanning logic. The red-black tree
128 will not be updated if this function is used.
129 BlockAllocateContigRBTree
130 Finds and allocates a range of blocks specified by the size parameters
131 using the new red/black tree data structure and search algorithms
132 provided by the tree library. Updates the red/black tree nodes after
133 the on-disk data structure (bitmap) has been updated.
0b4e3aa0
A
134 BlockAllocateKnown
135 Try to allocate space from known free space in the volume's
136 free extent cache.
137
1c79356b
A
138 ReadBitmapBlock
139 Given an allocation block number, read the bitmap block that
140 contains that allocation block into a caller-supplied buffer.
0b4e3aa0
A
141
142 ReleaseBitmapBlock
143 Release a bitmap block back into the buffer cache.
6d2010ae
A
144
145
146Debug/Test Routines
147 hfs_isallocated
148 Test to see if any blocks in a range are allocated. Journal or
149 allocation file lock must be held.
150
151 hfs_isallocated_scan
152 Test to see if any blocks in a range are allocated. Releases and
153 invalidates the block used when finished.
154
155 hfs_isrbtree_active
156 Test to see if the allocation red-black tree is live. This function
157 requires either an exclusive or shared lock on the allocation bitmap file
158 in the HFS mount structure, to prevent red-black tree pointers from disappearing.
159
160 hfs_isrbtree_allocated
161 Test to see if the specified extent is marked as allocated in the red-black tree.
162 Multiplexes between the metadata zone trees and the normal allocation zone trees
163 depending on the offset of the extent specified.
164
165 check_rbtree_extents
166 Void function that wraps around the above function (hfs_isrbtree_allocated)
167 and checks to see that the return value was appropriate based on the assertion we're
168 trying to validate (whether or not the specified extent should be marked as free
169 or allocated).
170
171 hfs_validate_rbtree
172 Exhaustive search function that will check every allocation block for its status in the
173 red-black tree and then check the corresponding status in the bitmap file. If the two are out
174 of sync, it will panic. Note that this function is extremely expensive and must NEVER
175 be run outside of debug code.
176
177 hfs_checktreelinks
178 Checks the embedded linked list structure of the red black tree for integrity. The next pointer
179 should always point to whatever extent_tree_offset_next returns.
180
181
182Red Black Tree Specific Routines
183 GenerateTree
184 Build a red-black tree for the given filesystem's bitmap.
185
186 DestroyTrees
187 Destroy the tree on the given filesystem
188
189
190 hfs_alloc_scan_block
191 Given a starting allocation block number, figures out which physical block contains that
192 allocation block's bit, and scans it from the starting bit until either the ending bit or
193 the end of the block. Free space extents are inserted into the appropriate red-black tree.
194
1c79356b
A
195*/
196
197#include "../../hfs_macos_defs.h"
198
199#include <sys/types.h>
200#include <sys/buf.h>
201#include <sys/systm.h>
060df5ea 202#include <sys/sysctl.h>
593a1d5f 203#include <sys/disk.h>
6d2010ae
A
204#include <sys/ubc.h>
205#include <sys/uio.h>
060df5ea 206#include <kern/kalloc.h>
1c79356b
A
207
208#include "../../hfs.h"
209#include "../../hfs_dbg.h"
210#include "../../hfs_format.h"
211#include "../../hfs_endian.h"
6d2010ae 212#include "../../hfs_macos_defs.h"
1c79356b 213#include "../headers/FileMgrInternal.h"
6d2010ae
A
214#include "../headers/HybridAllocator.h"
215#include "../../hfs_kdebug.h"
1c79356b 216
060df5ea
A
217#ifndef CONFIG_HFS_TRIM
218#define CONFIG_HFS_TRIM 0
219#endif
220
6d2010ae
A
221/*
222 * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
223 * KERNEL_DEBUG_CONSTANT events are enabled at runtime. (They're
224 * disabled by default because there can be a lot of these events,
225 * and we don't want to overwhelm the kernel debug buffer. If you
226 * want to watch these events in particular, just set the sysctl.)
227 */
228static int hfs_kdebug_allocation = 0;
229SYSCTL_DECL(_vfs_generic);
230SYSCTL_NODE(_vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system");
231SYSCTL_NODE(_vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug");
232SYSCTL_INT(_vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations");
233enum {
234 /*
235 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
236 * BlockDeallocate, including the internal BlockAllocateXxx
237 * routines so we can see how an allocation was satisfied.
238 *
239 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
240 * free extent cache.
241 *
242 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
243 *
244 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
245 * or clearing bits, scanning the bitmap).
246 */
247 HFSDBG_ALLOC_ENABLED = 1,
248 HFSDBG_EXT_CACHE_ENABLED = 2,
249 HFSDBG_UNMAP_ENABLED = 4,
250 HFSDBG_BITMAP_ENABLED = 8
251};
1c79356b
A
252
253enum {
0b4e3aa0 254 kBytesPerWord = 4,
1c79356b
A
255 kBitsPerByte = 8,
256 kBitsPerWord = 32,
0b4e3aa0
A
257
258 kBitsWithinWordMask = kBitsPerWord-1
1c79356b
A
259};
260
261#define kLowBitInWordMask 0x00000001ul
262#define kHighBitInWordMask 0x80000000ul
263#define kAllBitsSetInWord 0xFFFFFFFFul
264
265
6d2010ae
A
266#define ALLOC_DEBUG 0
267
1c79356b
A
268static OSErr ReadBitmapBlock(
269 ExtendedVCB *vcb,
2d21ac55
A
270 u_int32_t bit,
271 u_int32_t **buffer,
b0d623f7 272 uintptr_t *blockRef);
0b4e3aa0
A
273
274static OSErr ReleaseBitmapBlock(
275 ExtendedVCB *vcb,
b0d623f7 276 uintptr_t blockRef,
0b4e3aa0
A
277 Boolean dirty);
278
279static OSErr BlockAllocateAny(
280 ExtendedVCB *vcb,
2d21ac55
A
281 u_int32_t startingBlock,
282 u_int32_t endingBlock,
283 u_int32_t maxBlocks,
55e303ae 284 Boolean useMetaZone,
2d21ac55
A
285 u_int32_t *actualStartBlock,
286 u_int32_t *actualNumBlocks);
1c79356b 287
6d2010ae
A
288static OSErr BlockAllocateAnyBitmap(
289 ExtendedVCB *vcb,
290 u_int32_t startingBlock,
291 u_int32_t endingBlock,
292 u_int32_t maxBlocks,
293 Boolean useMetaZone,
294 u_int32_t *actualStartBlock,
295 u_int32_t *actualNumBlocks);
296
1c79356b
A
297static OSErr BlockAllocateContig(
298 ExtendedVCB *vcb,
2d21ac55
A
299 u_int32_t startingBlock,
300 u_int32_t minBlocks,
301 u_int32_t maxBlocks,
55e303ae 302 Boolean useMetaZone,
2d21ac55
A
303 u_int32_t *actualStartBlock,
304 u_int32_t *actualNumBlocks);
1c79356b 305
6d2010ae
A
306static OSErr BlockAllocateContigBitmap(
307 ExtendedVCB *vcb,
308 u_int32_t startingBlock,
309 u_int32_t minBlocks,
310 u_int32_t maxBlocks,
311 Boolean useMetaZone,
312 u_int32_t *actualStartBlock,
313 u_int32_t *actualNumBlocks);
314
1c79356b
A
315static OSErr BlockFindContiguous(
316 ExtendedVCB *vcb,
2d21ac55
A
317 u_int32_t startingBlock,
318 u_int32_t endingBlock,
319 u_int32_t minBlocks,
320 u_int32_t maxBlocks,
55e303ae 321 Boolean useMetaZone,
2d21ac55
A
322 u_int32_t *actualStartBlock,
323 u_int32_t *actualNumBlocks);
1c79356b 324
0b4e3aa0
A
325static OSErr BlockAllocateKnown(
326 ExtendedVCB *vcb,
2d21ac55
A
327 u_int32_t maxBlocks,
328 u_int32_t *actualStartBlock,
329 u_int32_t *actualNumBlocks);
0b4e3aa0 330
6d2010ae
A
331static OSErr BlockMarkAllocatedInternal (
332 ExtendedVCB *vcb,
333 u_int32_t startingBlock,
334 register u_int32_t numBlocks);
335
336static OSErr BlockMarkFreeInternal(
337 ExtendedVCB *vcb,
338 u_int32_t startingBlock,
339 u_int32_t numBlocks,
340 Boolean do_validate);
341
342#if CONFIG_HFS_ALLOC_RBTREE
343
344static OSErr ReleaseRBScanBitmapBlock( struct buf *bp );
345
346static OSErr BlockAllocateAnyRBTree(
347 ExtendedVCB *vcb,
348 u_int32_t startingBlock,
349 u_int32_t maxBlocks,
350 Boolean useMetaZone,
351 u_int32_t *actualStartBlock,
352 u_int32_t *actualNumBlocks);
353
354static OSErr BlockAllocateContigRBTree(
355 ExtendedVCB *vcb,
356 u_int32_t startingBlock,
357 u_int32_t minBlocks,
358 u_int32_t maxBlocks,
359 Boolean useMetaZone,
360 u_int32_t *actualStartBlock,
361 u_int32_t *actualNumBlocks,
362 u_int32_t forceContig);
363
364static OSErr BlockMarkAllocatedRBTree(
365 ExtendedVCB *vcb,
366 u_int32_t startingBlock,
367 u_int32_t numBlocks);
368
369static OSErr BlockMarkFreeRBTree(
370 ExtendedVCB *vcb,
371 u_int32_t startingBlock,
372 u_int32_t numBlocks);
373
374static int
375hfs_isrbtree_allocated (struct hfsmount * hfsmp,
376 u_int32_t startBlock,
377 u_int32_t numBlocks,
378 extent_node_t** node1);
379
380extern void
381hfs_validate_rbtree (struct hfsmount *hfsmp,
382 u_int32_t start,
383 u_int32_t end);
384
385static void hfs_checktreelinks (struct hfsmount *hfsmp);
386
387
388void check_rbtree_extents (struct hfsmount *hfsmp,
389 u_int32_t start,
390 u_int32_t numBlocks,
391 int shouldBeFree);
392
393int hfs_isallocated_scan (struct hfsmount *hfsmp,
394 u_int32_t startingBlock,
395 u_int32_t *bp_buf);
396
397static int hfs_alloc_scan_block(struct hfsmount *hfsmp,
398 u_int32_t startbit,
399 u_int32_t endBit,
400 u_int32_t *bitToScan);
401
402#define ASSERT_FREE 1
403#define ASSERT_ALLOC 0
404
405#endif /* CONFIG_HFS_ALLOC_RBTREE */
406
407/* Functions for manipulating free extent cache */
408static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
409static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
410static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
411
412#if ALLOC_DEBUG
413/*
414 * Extra #includes for the debug function below. These are not normally #included because
415 * they would constitute a layering violation
416 */
417#include <vfs/vfs_journal.h>
418#include <sys/disk.h>
419
420/*
421 * Validation Routine to verify that the TRIM list maintained by the journal
422 * is in good shape relative to what we think the bitmap should have. We should
423 * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
424 * we panic.
425 */
426int trim_validate_bitmap (struct hfsmount *hfsmp) {
427 u_int64_t blockno_offset;
428 u_int64_t numblocks;
429 int i;
430 int count;
431 u_int32_t startblk;
432 u_int32_t blks;
433 int err = 0;
434 uint32_t alloccount = 0;
435
436 if (hfsmp->jnl) {
437 struct journal *jnl = (struct journal*)hfsmp->jnl;
438 if (jnl->active_tr) {
439 struct jnl_trim_list *trim = &(jnl->active_tr->trim);
440 count = trim->extent_count;
441 for (i = 0; i < count; i++) {
442 blockno_offset = trim->extents[i].offset;
443 blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
444 blockno_offset = blockno_offset / hfsmp->blockSize;
445 numblocks = trim->extents[i].length / hfsmp->blockSize;
446
447 startblk = (u_int32_t)blockno_offset;
448 blks = (u_int32_t) numblocks;
449 err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);
450
451 if (err == 0 && alloccount != 0) {
452 panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
453 }
454 }
455 }
456 }
457 return 0;
458}
1c79356b 459
6d2010ae 460#endif
060df5ea
A
461
462/*
463;________________________________________________________________________________
464;
465; Routine: hfs_unmap_free_extent
466;
467; Function: Make note of a range of allocation blocks that should be
468; unmapped (trimmed). That is, the given range of blocks no
469; longer have useful content, and the device can unmap the
470; previous contents. For example, a solid state disk may reuse
471; the underlying storage for other blocks.
472;
473; This routine is only supported for journaled volumes. The extent
474; being freed is passed to the journal code, and the extent will
475; be unmapped after the current transaction is written to disk.
476;
477; Input Arguments:
478; hfsmp - The volume containing the allocation blocks.
479; startingBlock - The first allocation block of the extent being freed.
480; numBlocks - The number of allocation blocks of the extent being freed.
481;________________________________________________________________________________
482*/
483static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
484{
6d2010ae
A
485 u_int64_t offset;
486 u_int64_t length;
487 int err;
488
489 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
490 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
491
492 if (hfsmp->jnl != NULL) {
493 offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
494 length = (u_int64_t) numBlocks * hfsmp->blockSize;
495
496 err = journal_trim_add_extent(hfsmp->jnl, offset, length);
497 if (err) {
498 printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent", err);
060df5ea
A
499 }
500 }
6d2010ae
A
501
502 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
503 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0);
060df5ea
A
504}
505
506
507/*
508;________________________________________________________________________________
509;
510; Routine: hfs_unmap_alloc_extent
511;
512; Function: Make note of a range of allocation blocks, some of
513; which may have previously been passed to hfs_unmap_free_extent,
514; is now in use on the volume. The given blocks will be removed
515; from any pending DKIOCUNMAP.
516;
517; Input Arguments:
518; hfsmp - The volume containing the allocation blocks.
519; startingBlock - The first allocation block of the extent being allocated.
520; numBlocks - The number of allocation blocks being allocated.
521;________________________________________________________________________________
522*/
523static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
524{
6d2010ae
A
525 u_int64_t offset;
526 u_int64_t length;
527 int err;
528
529 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
530 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
531
532 if (hfsmp->jnl != NULL) {
533 offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
534 length = (u_int64_t) numBlocks * hfsmp->blockSize;
060df5ea 535
6d2010ae
A
536 err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
537 if (err) {
538 printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent", err);
060df5ea
A
539 }
540 }
6d2010ae
A
541
542 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
543 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0);
060df5ea
A
544}
545
546
1c79356b
A
547/*
548;________________________________________________________________________________
549;
6d2010ae 550; Routine: hfs_trim_callback
1c79356b 551;
6d2010ae
A
552; Function: This function is called when a transaction that freed extents
553; (via hfs_unmap_free_extent/journal_trim_add_extent) has been
554; written to the on-disk journal. This routine will add those
555; extents to the free extent cache so that they can be reused.
1c79356b 556;
6d2010ae
A
557; CAUTION: This routine is called while the journal's trim lock
558; is held shared, so that no other thread can reuse any portion
559; of those extents. We must be very careful about which locks
560; we take from within this callback, to avoid deadlock. The
561; call to add_free_extent_cache will end up taking the cache's
562; lock (just long enough to add these extents to the cache).
1c79356b 563;
6d2010ae
A
564; CAUTION: If the journal becomes invalid (eg., due to an I/O
565; error when trying to write to the journal), this callback
566; will stop getting called, even if extents got freed before
567; the journal became invalid!
1c79356b 568;
6d2010ae
A
569; Input Arguments:
570; arg - The hfsmount of the volume containing the extents.
571; extent_count - The number of extents freed in the transaction.
572; extents - An array of extents (byte ranges) that were freed.
1c79356b
A
573;________________________________________________________________________________
574*/
6d2010ae
A
575__private_extern__ void
576hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
b0d623f7 577{
6d2010ae
A
578 uint32_t i;
579 uint32_t startBlock, numBlocks;
580 struct hfsmount *hfsmp = arg;
581
582 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
583 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0);
584
585 for (i=0; i<extent_count; ++i) {
586 /* Convert the byte range in *extents back to a range of allocation blocks. */
587 startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize;
588 numBlocks = extents[i].length / hfsmp->blockSize;
589 (void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
b0d623f7 590 }
6d2010ae
A
591
592 if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
593 KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0);
b0d623f7
A
594}
595
1c79356b 596
6d2010ae
A
597/*
598 ;________________________________________________________________________________
599 ;
600 ; Routine: BlockAllocate
601 ;
602 ; Function: Allocate space on a volume. If contiguous allocation is requested,
603 ; at least the requested number of bytes will be allocated or an
604 ; error will be returned. If contiguous allocation is not forced,
605 ; the space will be allocated with the first largest extent available
606 ; at the requested starting allocation block. If there is not enough
607 ; room there, a block allocation of less than the requested size will be
608 ; allocated.
609 ;
610 ; If the requested starting block is 0 (for new file allocations),
611 ; the volume's allocation block pointer will be used as a starting
612 ; point.
613 ;
614 ; Input Arguments:
615 ; vcb - Pointer to ExtendedVCB for the volume to allocate space on
616 ; fcb - Pointer to FCB for the file for which storage is being allocated
617 ; startingBlock - Preferred starting allocation block, 0 = no preference
618 ; minBlocks - Number of blocks requested. If the allocation is non-contiguous,
619 ; less than this may actually be allocated
620 ; maxBlocks - The maximum number of blocks to allocate. If there is additional free
621 ; space after bytesRequested, then up to maxBlocks bytes should really
622 ; be allocated. (Used by ExtendFileC to round up allocations to a multiple
623 ; of the file's clump size.)
624 ; flags - Flags to specify options like contiguous, use metadata zone,
625 ; skip free block check, etc.
626 ;
627 ; Output:
628 ; (result) - Error code, zero for successful allocation
629 ; *startBlock - Actual starting allocation block
630 ; *actualBlocks - Actual number of allocation blocks allocated
631 ;
632 ; Side effects:
633 ; The volume bitmap is read and updated; the volume bitmap cache may be changed.
634 ;________________________________________________________________________________
635 */
1c79356b
A
636OSErr BlockAllocate (
637 ExtendedVCB *vcb, /* which volume to allocate space on */
2d21ac55
A
638 u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */
639 u_int32_t minBlocks, /* desired number of blocks to allocate */
640 u_int32_t maxBlocks, /* maximum number of blocks to allocate */
0b4c1975 641 u_int32_t flags, /* option flags */
2d21ac55
A
642 u_int32_t *actualStartBlock, /* actual first block of allocation */
643 u_int32_t *actualNumBlocks) /* number of blocks actually allocated; if forceContiguous */
55e303ae 644 /* was zero, then this may represent fewer than minBlocks */
1c79356b 645{
2d21ac55 646 u_int32_t freeBlocks;
1c79356b 647 OSErr err;
1c79356b 648 Boolean updateAllocPtr = false; // true if nextAllocation needs to be updated
6d2010ae 649 struct hfsmount *hfsmp;
0b4c1975
A
650 Boolean useMetaZone;
651 Boolean forceContiguous;
652
6d2010ae
A
653 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
654 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
655
0b4c1975
A
656 if (flags & HFS_ALLOC_FORCECONTIG) {
657 forceContiguous = true;
658 } else {
659 forceContiguous = false;
660 }
661
662 if (flags & HFS_ALLOC_METAZONE) {
663 useMetaZone = true;
664 } else {
665 useMetaZone = false;
666 }
1c79356b 667
6d2010ae
A
668 //TODO: Figure out when we need to re-enable the RB-Tree.
669
670
671 //TODO: Make sure we use allocLimit when appropriate.
672
673 /*
674 * TODO: Update BlockAllocate and its sub-functions to do cooperative allocation and bitmap scanning
675 * in conjunction with the Generate Tree function. If the red-black tree does not currently contain
676 * an allocation block of appropriate size, then start scanning blocks FOR the tree generation function until
677 * we find what we need. We'll update the tree fields when we're done, indicating that we've advanced the
678 * high water mark for the tree.
679 */
680
1c79356b
A
681 //
682 // Initialize outputs in case we get an error
683 //
684 *actualStartBlock = 0;
685 *actualNumBlocks = 0;
6d2010ae
A
686 hfsmp = VCBTOHFS (vcb);
687 freeBlocks = hfs_freeblks(hfsmp, 0);
688
1c79356b 689
0b4c1975
A
690 /* Skip free block check if blocks are being allocated for relocating
691 * data during truncating a volume.
692 *
693 * During hfs_truncatefs(), the volume free block count is updated
694 * before relocating data to reflect the total number of free blocks
695 * that will exist on the volume after resize is successful. This
696 * means that we have reserved allocation blocks required for relocating
697 * the data and hence there is no need to check the free blocks.
698 * It will also prevent resize failure when the number of blocks in
699 * an extent being relocated is more than the free blocks that will
700 * exist after the volume is resized.
55e303ae 701 */
0b4c1975
A
702 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
703 // If the disk is already full, don't bother.
704 if (freeBlocks == 0) {
705 err = dskFulErr;
706 goto Exit;
707 }
708 if (forceContiguous && freeBlocks < minBlocks) {
709 err = dskFulErr;
710 goto Exit;
711 }
712
713 /*
714 * Clip if necessary so we don't over-subscribe the free blocks.
715 */
716 if (minBlocks > freeBlocks) {
717 minBlocks = freeBlocks;
718 }
719 if (maxBlocks > freeBlocks) {
720 maxBlocks = freeBlocks;
721 }
55e303ae
A
722 }
723
1c79356b
A
724 //
725 // If caller didn't specify a starting block number, then use the volume's
726 // next block to allocate from.
727 //
728 if (startingBlock == 0) {
91447636 729 HFS_MOUNT_LOCK(vcb, TRUE);
6d2010ae
A
730
731 /* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
b0d623f7
A
732 if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
733 startingBlock = vcb->sparseAllocation;
6d2010ae
A
734 }
735 else {
b0d623f7
A
736 startingBlock = vcb->nextAllocation;
737 }
91447636 738 HFS_MOUNT_UNLOCK(vcb, TRUE);
1c79356b
A
739 updateAllocPtr = true;
740 }
6d2010ae
A
741
742
2d21ac55 743 if (startingBlock >= vcb->allocLimit) {
55e303ae
A
744 startingBlock = 0; /* overflow so start at beginning */
745 }
0b4e3aa0 746
1c79356b
A
747 //
748 // If the request must be contiguous, then find a sequence of free blocks
749 // that is long enough. Otherwise, find the first free block.
750 //
751 if (forceContiguous) {
55e303ae
A
752 err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks,
753 useMetaZone, actualStartBlock, actualNumBlocks);
9bccf70c 754 /*
6d2010ae
A
755 * If we allocated from a new position then also update the roving allocator.
756 * This will keep the roving allocation pointer up-to-date even
757 * if we are using the new R/B tree allocator, since
758 * it doesn't matter to us here, how the underlying allocator found
759 * the block to vend out.
9bccf70c 760 */
55e303ae
A
761 if ((err == noErr) &&
762 (*actualStartBlock > startingBlock) &&
763 ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
764 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
b0d623f7 765 updateAllocPtr = true;
55e303ae 766 }
1c79356b 767 } else {
6d2010ae
A
768#if CONFIG_HFS_ALLOC_RBTREE
769 /*
770 * If the RB-Tree Allocator is live, just go straight for a
771 * BlockAllocateAny call and return the result. Otherwise,
772 * resort to the bitmap scanner.
773 */
774 if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
775 /* Start by trying to allocate from the starting block forward */
776 err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit,
777 maxBlocks, useMetaZone, actualStartBlock,
778 actualNumBlocks);
779
780 /*
781 * Because the RB-Tree is live, the previous call to BlockAllocateAny
782 * will use the rbtree variant. As a result, it will automatically search the
783 * metadata zone for a valid extent if needed. If we get a return value of
784 * noErr, we found a valid extent and we can skip to the end. If the error indicates
785 * the disk is full, that's an equally valid return code and we can skip to the end, too.
786 */
787 if (err == noErr || err == dskFulErr) {
788 goto Exit;
789 }
790 else {
791 //TODO: only tear down tree if the tree is finished building.
792 //Make sure to handle the ENOSPC condition properly. We shouldn't error out in that case.
793 /* Tear down tree if we encounter an error */
794 if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE) {
795 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
796 DestroyTrees(hfsmp);
797 ResetVCBFreeExtCache(hfsmp);
798 }
799 else {
800 goto Exit;
801 }
802 // fall through to the normal allocation since the rb-tree allocation failed.
803 }
804 }
805#endif
806
0b4e3aa0
A
807 /*
808 * Scan the bitmap once, gather the N largest free extents, then
809 * allocate from these largest extents. Repeat as needed until
810 * we get all the space we needed. We could probably build up
811 * that list when the higher level caller tried (and failed) a
812 * contiguous allocation first.
6d2010ae
A
813 *
814 * Note that the free-extent cache will be cease to be updated if
815 * we are using the red-black tree for allocations. If we jettison
816 * the tree, then we will reset the free-extent cache and start over.
0b4e3aa0 817 */
6d2010ae 818
0b4e3aa0 819 err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks);
6d2010ae
A
820 /* dskFulErr out of BlockAllocateKnown indicates an empty Free Extent Cache */
821
822 if (err == dskFulErr) {
823 /*
824 * Now we have to do a bigger scan. Start at startingBlock and go up until the
825 * allocation limit.
826 */
2d21ac55 827 err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit,
55e303ae
A
828 maxBlocks, useMetaZone, actualStartBlock,
829 actualNumBlocks);
6d2010ae
A
830 }
831 if (err == dskFulErr) {
832 /*
833 * We may be out of space in the normal zone; go up to the starting block from
834 * the start of the volume.
835 */
55e303ae
A
836 err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks,
837 useMetaZone, actualStartBlock,
838 actualNumBlocks);
6d2010ae 839 }
1c79356b
A
840 }
841
91447636
A
842Exit:
843 // if we actually allocated something then go update the
844 // various bits of state that we maintain regardless of
845 // whether there was an error (i.e. partial allocations
846 // still need to update things like the free block count).
847 //
848 if (*actualNumBlocks != 0) {
1c79356b
A
849 //
850 // If we used the volume's roving allocation pointer, then we need to update it.
851 // Adding in the length of the current allocation might reduce the next allocate
852 // call by avoiding a re-scan of the already allocated space. However, the clump
853 // just allocated can quite conceivably end up being truncated or released when
854 // the file is closed or its EOF changed. Leaving the allocation pointer at the
855 // start of the last allocation will avoid unnecessary fragmentation in this case.
856 //
91447636 857 HFS_MOUNT_LOCK(vcb, TRUE);
1c79356b 858
6d2010ae 859 lck_spin_lock(&hfsmp->vcbFreeExtLock);
b0d623f7
A
860 if (vcb->vcbFreeExtCnt == 0 && vcb->hfs_freed_block_count == 0) {
861 vcb->sparseAllocation = *actualStartBlock;
862 }
6d2010ae 863 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
b0d623f7
A
864 if (*actualNumBlocks < vcb->hfs_freed_block_count) {
865 vcb->hfs_freed_block_count -= *actualNumBlocks;
866 } else {
867 vcb->hfs_freed_block_count = 0;
868 }
6d2010ae 869
55e303ae 870 if (updateAllocPtr &&
6d2010ae
A
871 ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
872 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
2d21ac55 873 HFS_UPDATE_NEXT_ALLOCATION(vcb, *actualStartBlock);
55e303ae 874 }
b0d623f7 875
6d2010ae 876 (void) remove_free_extent_cache(hfsmp, *actualStartBlock, *actualNumBlocks);
0b4c1975
A
877
878 /*
879 * Update the number of free blocks on the volume
880 *
881 * Skip updating the free blocks count if the block are
882 * being allocated to relocate data as part of hfs_truncatefs()
883 */
884 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
885 vcb->freeBlocks -= *actualNumBlocks;
886 }
1c79356b 887 MarkVCBDirty(vcb);
91447636
A
888 HFS_MOUNT_UNLOCK(vcb, TRUE);
889
890 hfs_generate_volume_notifications(VCBTOHFS(vcb));
1c79356b
A
891 }
892
6d2010ae
A
893 if (ALLOC_DEBUG) {
894 if (err == noErr) {
895 if (*actualStartBlock >= hfsmp->totalBlocks) {
896 panic ("BlockAllocate: vending invalid blocks!");
897 }
898 if (*actualStartBlock >= hfsmp->allocLimit) {
899 panic ("BlockAllocate: vending block past allocLimit!");
900 }
901
902 if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->totalBlocks) {
903 panic ("BlockAllocate: vending too many invalid blocks!");
904 }
905
906 if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->allocLimit) {
907 panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
908 }
909 }
910 }
911
912 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
913 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
914
1c79356b
A
915 return err;
916}
917
918
1c79356b
A
919/*
920;________________________________________________________________________________
921;
6d2010ae 922; Routine: BlockDeallocate
1c79356b
A
923;
924; Function: Update the bitmap to deallocate a run of disk allocation blocks
925;
926; Input Arguments:
927; vcb - Pointer to ExtendedVCB for the volume to free space on
928; firstBlock - First allocation block to be freed
929; numBlocks - Number of allocation blocks to free up (must be > 0!)
930;
931; Output:
932; (result) - Result code
933;
934; Side effects:
935; The volume bitmap is read and updated; the volume bitmap cache may be changed.
6d2010ae 936; The Allocator's red-black trees may also be modified as a result.
1c79356b
A
937;________________________________________________________________________________
938*/
939
940OSErr BlockDeallocate (
941 ExtendedVCB *vcb, // Which volume to deallocate space on
2d21ac55 942 u_int32_t firstBlock, // First block in range to deallocate
0b4c1975
A
943 u_int32_t numBlocks, // Number of contiguous blocks to deallocate
944 u_int32_t flags)
1c79356b
A
945{
946 OSErr err;
6d2010ae
A
947 struct hfsmount *hfsmp;
948 hfsmp = VCBTOHFS(vcb);
1c79356b 949
6d2010ae
A
950 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
951 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0);
952
1c79356b
A
953 //
954 // If no blocks to deallocate, then exit early
955 //
956 if (numBlocks == 0) {
957 err = noErr;
958 goto Exit;
959 }
6d2010ae
A
960
961
962 if (ALLOC_DEBUG) {
963 if (firstBlock >= hfsmp->totalBlocks) {
964 panic ("BlockDeallocate: freeing invalid blocks!");
965 }
966
967 if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {
968 panic ("BlockDeallocate: freeing too many invalid blocks!");
969 }
970 }
971
972
973
974
975 /*
976 * If we're using the red-black tree code, then try to free the
977 * blocks by marking them in the red-black tree first. If the tree
978 * is not active for whatever reason (or we're not using the
979 * R/B Tree code at all), then go straight for the BlockMarkFree
980 * function.
981 *
982 * Remember that we can get into this function if the tree isn't finished
983 * building. In that case, check to see if the block we're de-allocating is
984 * past the high watermark
985 */
986#if CONFIG_HFS_ALLOC_RBTREE
987 if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
988 /*
989 * BlockMarkFreeRBTree deals with the case where we are resizing the
990 * filesystem (shrinking), and we need to manipulate the bitmap beyond the portion
991 * that is currenly controlled by the r/b tree.
992 */
993
994 //TODO: Update multiplexing code for the half-finished case.
995 err = BlockMarkFreeRBTree(vcb, firstBlock, numBlocks);
996 adjustFreeExtCache = 0;
997 }
998 else {
999 err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
1000 }
1c79356b 1001
6d2010ae
A
1002#else
1003 err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
1004#endif
1c79356b
A
1005 if (err)
1006 goto Exit;
1007
1008 //
1009 // Update the volume's free block count, and mark the VCB as dirty.
1010 //
91447636 1011 HFS_MOUNT_LOCK(vcb, TRUE);
0b4c1975
A
1012
1013 /*
1014 * Do not update the free block count. This flags is specified
1015 * when a volume is being truncated.
1016 */
1017 if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
1018 vcb->freeBlocks += numBlocks;
1019 }
1020
b0d623f7 1021 vcb->hfs_freed_block_count += numBlocks;
b0d623f7
A
1022
1023 if (vcb->nextAllocation == (firstBlock + numBlocks)) {
2d21ac55 1024 HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
b0d623f7
A
1025 }
1026
6d2010ae
A
1027 if (hfsmp->jnl == NULL) {
1028 /*
1029 * In the journal case, we'll add the free extent once the journal
1030 * calls us back to tell us it wrote the transaction to disk.
1031 */
1032 (void) add_free_extent_cache(vcb, firstBlock, numBlocks);
1033
1034 /*
1035 * If the journal case, we'll only update sparseAllocation once the
1036 * free extent cache becomes empty (when we remove the last entry
1037 * from the cache). Skipping it here means we're less likely to
1038 * find a recently freed extent via the bitmap before it gets added
1039 * to the free extent cache.
1040 */
1041 if (firstBlock < vcb->sparseAllocation) {
1042 vcb->sparseAllocation = firstBlock;
b0d623f7
A
1043 }
1044 }
6d2010ae 1045
1c79356b 1046 MarkVCBDirty(vcb);
91447636
A
1047 HFS_MOUNT_UNLOCK(vcb, TRUE);
1048
1049 hfs_generate_volume_notifications(VCBTOHFS(vcb));
1c79356b
A
1050Exit:
1051
6d2010ae
A
1052 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1053 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0);
1054
1c79356b
A
1055 return err;
1056}
1057
1058
2d21ac55 1059u_int8_t freebitcount[16] = {
55e303ae
A
1060 4, 3, 3, 2, 3, 2, 2, 1, /* 0 1 2 3 4 5 6 7 */
1061 3, 2, 2, 1, 2, 1, 1, 0, /* 8 9 A B C D E F */
1062};
1063
2d21ac55 1064u_int32_t
55e303ae 1065MetaZoneFreeBlocks(ExtendedVCB *vcb)
1c79356b 1066{
2d21ac55
A
1067 u_int32_t freeblocks;
1068 u_int32_t *currCache;
b0d623f7 1069 uintptr_t blockRef;
2d21ac55
A
1070 u_int32_t bit;
1071 u_int32_t lastbit;
55e303ae
A
1072 int bytesleft;
1073 int bytesperblock;
2d21ac55
A
1074 u_int8_t byte;
1075 u_int8_t *buffer;
91447636 1076
55e303ae
A
1077 blockRef = 0;
1078 bytesleft = freeblocks = 0;
91447636 1079 buffer = NULL;
55e303ae
A
1080 bit = VCBTOHFS(vcb)->hfs_metazone_start;
1081 if (bit == 1)
1082 bit = 0;
1c79356b 1083
55e303ae
A
1084 lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
1085 bytesperblock = vcb->vcbVBMIOSize;
1086
1087 /*
1088 * Count all the bits from bit to lastbit.
1089 */
1090 while (bit < lastbit) {
1091 /*
1092 * Get next bitmap block.
1093 */
1094 if (bytesleft == 0) {
1095 if (blockRef) {
1096 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1097 blockRef = 0;
1098 }
1099 if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef) != 0) {
1100 return (0);
1101 }
2d21ac55 1102 buffer = (u_int8_t *)currCache;
55e303ae
A
1103 bytesleft = bytesperblock;
1104 }
1105 byte = *buffer++;
1106 freeblocks += freebitcount[byte & 0x0F];
1107 freeblocks += freebitcount[(byte >> 4) & 0x0F];
1108 bit += kBitsPerByte;
1109 --bytesleft;
1110 }
1111 if (blockRef)
1112 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1113
1114 return (freeblocks);
1c79356b
A
1115}
1116
1117
55e303ae
A
1118/*
1119 * Obtain the next allocation block (bit) that's
1120 * outside the metadata allocation zone.
1121 */
2d21ac55 1122static u_int32_t NextBitmapBlock(
55e303ae 1123 ExtendedVCB *vcb,
2d21ac55 1124 u_int32_t bit)
55e303ae
A
1125{
1126 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1127
1128 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
1129 return (bit);
1130 /*
1131 * Skip over metadata allocation zone.
1132 */
1133 if ((bit >= hfsmp->hfs_metazone_start) &&
1134 (bit <= hfsmp->hfs_metazone_end)) {
1135 bit = hfsmp->hfs_metazone_end + 1;
1136 }
1137 return (bit);
1138}
1139
1c79356b
A
1140
1141/*
1142;_______________________________________________________________________
1143;
1144; Routine: ReadBitmapBlock
1145;
1146; Function: Read in a bitmap block corresponding to a given allocation
0b4e3aa0 1147; block (bit). Return a pointer to the bitmap block.
1c79356b
A
1148;
1149; Inputs:
1150; vcb -- Pointer to ExtendedVCB
0b4e3aa0 1151; bit -- Allocation block whose bitmap block is desired
1c79356b
A
1152;
1153; Outputs:
1154; buffer -- Pointer to bitmap block corresonding to "block"
0b4e3aa0 1155; blockRef
1c79356b
A
1156;_______________________________________________________________________
1157*/
1158static OSErr ReadBitmapBlock(
1159 ExtendedVCB *vcb,
2d21ac55
A
1160 u_int32_t bit,
1161 u_int32_t **buffer,
b0d623f7 1162 uintptr_t *blockRef)
1c79356b
A
1163{
1164 OSErr err;
0b4e3aa0
A
1165 struct buf *bp = NULL;
1166 struct vnode *vp = NULL;
91447636 1167 daddr64_t block;
2d21ac55 1168 u_int32_t blockSize;
1c79356b 1169
6d2010ae
A
1170 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1171 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0);
1172
0b4e3aa0 1173 /*
91447636 1174 * volume bitmap blocks are protected by the allocation file lock
0b4e3aa0 1175 */
91447636 1176 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1c79356b 1177
2d21ac55 1178 blockSize = (u_int32_t)vcb->vcbVBMIOSize;
91447636 1179 block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
1c79356b 1180
0b4e3aa0 1181 if (vcb->vcbSigWord == kHFSPlusSigWord) {
91447636 1182 vp = vcb->hfs_allocation_vp; /* use allocation file vnode */
1c79356b 1183
0b4e3aa0
A
1184 } else /* hfs */ {
1185 vp = VCBTOHFS(vcb)->hfs_devvp; /* use device I/O vnode */
1186 block += vcb->vcbVBMSt; /* map to physical block */
1c79356b 1187 }
1c79356b 1188
91447636 1189 err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);
1c79356b 1190
0b4e3aa0
A
1191 if (bp) {
1192 if (err) {
91447636 1193 buf_brelse(bp);
2d21ac55 1194 *blockRef = 0;
0b4e3aa0
A
1195 *buffer = NULL;
1196 } else {
b0d623f7 1197 *blockRef = (uintptr_t)bp;
2d21ac55 1198 *buffer = (u_int32_t *)buf_dataptr(bp);
0b4e3aa0 1199 }
1c79356b
A
1200 }
1201
6d2010ae
A
1202 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1203 KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0);
1204
1c79356b
A
1205 return err;
1206}
1207
1208
0b4e3aa0
A
1209/*
1210;_______________________________________________________________________
1211;
1212; Routine: ReleaseBitmapBlock
1213;
1214; Function: Relase a bitmap block.
1215;
1216; Inputs:
1217; vcb
1218; blockRef
1219; dirty
1220;_______________________________________________________________________
1221*/
1222static OSErr ReleaseBitmapBlock(
1223 ExtendedVCB *vcb,
b0d623f7 1224 uintptr_t blockRef,
0b4e3aa0
A
1225 Boolean dirty)
1226{
1227 struct buf *bp = (struct buf *)blockRef;
55e303ae 1228
6d2010ae
A
1229 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1230 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0);
1231
55e303ae
A
1232 if (blockRef == 0) {
1233 if (dirty)
b0d623f7 1234 panic("hfs: ReleaseBitmapBlock: missing bp");
55e303ae
A
1235 return (0);
1236 }
0b4e3aa0
A
1237
1238 if (bp) {
1239 if (dirty) {
b4c24cb9
A
1240 // XXXdbg
1241 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1242
1243 if (hfsmp->jnl) {
2d21ac55 1244 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
b4c24cb9 1245 } else {
91447636 1246 buf_bdwrite(bp);
b4c24cb9 1247 }
0b4e3aa0 1248 } else {
91447636 1249 buf_brelse(bp);
0b4e3aa0
A
1250 }
1251 }
1252
6d2010ae
A
1253 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
1254 KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0);
1255
1256 return (0);
1257}
1258
1259#if CONFIG_HFS_ALLOC_RBTREE
1260/*
1261 * ReleaseRBScanBitmapBlock is used to release struct bufs that were
1262 * created for use by the Red-Black tree generation code. We want to force
1263 * them to be purged out of the buffer cache ASAP, so we'll release them differently
1264 * than in the ReleaseBitmapBlock case. Alternately, we know that we're only reading
1265 * the blocks, so we will never dirty them as part of the tree building scan.
1266 */
1267
1268static OSErr ReleaseRBScanBitmapBlock(struct buf *bp ) {
1269
1270 if (bp == NULL) {
1271 return (0);
1272 }
1273
1274 if (bp) {
1275 /* Mark the buffer invalid if it isn't locked, then release it */
1276 if ((buf_flags(bp) & B_LOCKED) == 0) {
1277 buf_markinvalid(bp);
1278 }
1279 buf_brelse(bp);
1280 }
1281
0b4e3aa0 1282 return (0);
6d2010ae
A
1283
1284
0b4e3aa0
A
1285}
1286
6d2010ae
A
1287#endif
1288
1c79356b
A
1289
1290/*
1291_______________________________________________________________________
1292
1293Routine: BlockAllocateContig
1294
1295Function: Allocate a contiguous group of allocation blocks. The
1296 allocation is all-or-nothing. The caller guarantees that
1297 there are enough free blocks (though they may not be
1298 contiguous, in which case this call will fail).
1299
1300Inputs:
1301 vcb Pointer to volume where space is to be allocated
1302 startingBlock Preferred first block for allocation
1303 minBlocks Minimum number of contiguous blocks to allocate
1304 maxBlocks Maximum number of contiguous blocks to allocate
55e303ae 1305 useMetaZone
1c79356b
A
1306
1307Outputs:
1308 actualStartBlock First block of range allocated, or 0 if error
1309 actualNumBlocks Number of blocks allocated, or 0 if error
1310_______________________________________________________________________
1311*/
1312static OSErr BlockAllocateContig(
1313 ExtendedVCB *vcb,
2d21ac55
A
1314 u_int32_t startingBlock,
1315 u_int32_t minBlocks,
1316 u_int32_t maxBlocks,
55e303ae 1317 Boolean useMetaZone,
2d21ac55
A
1318 u_int32_t *actualStartBlock,
1319 u_int32_t *actualNumBlocks)
1c79356b 1320{
1c79356b 1321
6d2010ae
A
1322#if CONFIG_HFS_ALLOC_RBTREE
1323 if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
1324 return BlockAllocateContigRBTree(vcb, startingBlock, minBlocks, maxBlocks, useMetaZone,
1325 actualStartBlock, actualNumBlocks, 1);
1326 }
1327#endif
1328 return BlockAllocateContigBitmap(vcb, startingBlock, minBlocks,
1329 maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
1330}
0b4e3aa0 1331
6d2010ae
A
1332/*
1333 * Variant of BlockAllocateContig that uses the original bitmap-searching logic
1334 */
1335
1336static OSErr BlockAllocateContigBitmap(
1337 ExtendedVCB *vcb,
1338 u_int32_t startingBlock,
1339 u_int32_t minBlocks,
1340 u_int32_t maxBlocks,
1341 Boolean useMetaZone,
1342 u_int32_t *actualStartBlock,
1343 u_int32_t *actualNumBlocks)
1344{
1345 OSErr err;
1346
1347 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1348 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
1349
1350 //
1351 // Find a contiguous group of blocks at least minBlocks long.
1352 // Determine the number of contiguous blocks available (up
1353 // to maxBlocks).
1354 //
1355
1356 /*
1357 * NOTE: If the only contiguous free extent of at least minBlocks
1358 * crosses startingBlock (i.e. starts before, ends after), then we
1359 * won't find it. Earlier versions *did* find this case by letting
1360 * the second search look past startingBlock by minBlocks. But
1361 * with the free extent cache, this can lead to duplicate entries
1362 * in the cache, causing the same blocks to be allocated twice.
0b4e3aa0 1363 */
2d21ac55 1364 err = BlockFindContiguous(vcb, startingBlock, vcb->allocLimit, minBlocks,
55e303ae 1365 maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
0b4e3aa0
A
1366 if (err == dskFulErr && startingBlock != 0) {
1367 /*
1368 * Constrain the endingBlock so we don't bother looking for ranges
1369 * that would overlap those found in the previous call.
1370 */
55e303ae
A
1371 err = BlockFindContiguous(vcb, 1, startingBlock, minBlocks, maxBlocks,
1372 useMetaZone, actualStartBlock, actualNumBlocks);
1c79356b 1373 }
1c79356b
A
1374 //
1375 // Now mark those blocks allocated.
1376 //
2d21ac55 1377 if (err == noErr)
6d2010ae
A
1378 err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
1379
1380 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1381 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
1382
1383 return err;
1384}
1385
1386#if CONFIG_HFS_ALLOC_RBTREE
1387/*
1388 * Variant of BlockAllocateContig that uses the newer red-black tree library
1389 * in order to manage free space extents. This will search the red-black tree
1390 * and return results in the same fashion as BlockAllocateContigBitmap.
1391 *
1392 * Note that this function is invoked from both the red-black tree variant of BlockAllocateany
1393 * as well as BlockAllocateContig. In order to determine when we should vend contiguous chunks over
1394 * locality-based-searches, we use the forceContig argument to determine who called us.
1395 */
1396
1397static OSErr BlockAllocateContigRBTree(
1398 ExtendedVCB *vcb,
1399 u_int32_t startingBlock,
1400 u_int32_t minBlocks,
1401 u_int32_t maxBlocks,
1402 Boolean useMetaZone,
1403 u_int32_t *actualStartBlock,
1404 u_int32_t *actualNumBlocks,
1405 u_int32_t forceContig)
1406{
1407 OSErr err;
1408 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1409 extent_node_t search_sentinel;
1410 extent_node_t *node = NULL;
1411 extent_node_t tempnode;
1412
1413 bzero (&tempnode, sizeof(extent_node_t));
1414
1415 /* Begin search at the end of the file, via startingBlock */
1416 memset (&search_sentinel, 0, sizeof(extent_node_t));
1417 search_sentinel.offset = startingBlock;
1418
1419 *actualStartBlock = 0;
1420 *actualNumBlocks = 0;
1421
1422 /*
1423 * Find the first available extent that satifies the allocation by searching
1424 * from the starting point and moving forward
1425 */
1426 node = extent_tree_off_search_next(&hfsmp->offset_tree, &search_sentinel);
1427
1428 if (node) {
1429 *actualStartBlock = node->offset;
1430 *actualNumBlocks = node->length;
1431 }
1432
1433 /* If we managed to grab at least minBlocks of space, then we're done. */
1434
1435 if (*actualNumBlocks >= minBlocks) {
1436 if (*actualNumBlocks > maxBlocks) {
1437 *actualNumBlocks = maxBlocks;
1438 }
1439
1440
1441 /* Check to see if blocks are already marked as in-use */
1442 if (ALLOC_DEBUG) {
1443 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1444 if (hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
1445 printf("bad node: %p, offset %d, length %d\n", node, node->offset,node->length);
1446 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n",
1447 *actualStartBlock, *actualNumBlocks);
1448 }
1449 }
1450
1451 /*
1452 * BlockMarkAllocatedRBTree is responsible for removing the nodes
1453 * from the red-black tree after the bitmap has been updated on-disk.
1454 */
1455 err = BlockMarkAllocatedRBTree(vcb, *actualStartBlock, *actualNumBlocks);
1456 if (err == noErr) {
1457
1458 if ( ALLOC_DEBUG ) {
1459 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1460 if (!hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
1461 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
1462 *actualStartBlock, *actualNumBlocks);
1463 }
1464 check_rbtree_extents (VCBTOHFS(vcb), *actualStartBlock, *actualNumBlocks, ASSERT_ALLOC);
1465 }
1466
1467 return err;
1468 }
1469 }
1470
1471 /*
1472 * We may have failed to grow at the end of the file. We'll try to find
1473 * appropriate free extents, searching by size in the normal allocation zone.
1474 *
1475 * However, if we're allocating on behalf of a sparse device that hasn't explicitly
1476 * requested a contiguous chunk, then we try to search by offset, even if it
1477 * means fragmenting the file. We want all available entries starting
1478 * from the front of the disk to avoid creating new bandfiles. As a result,
1479 * we'll start by searching the offset tree rather than the normal length
1480 * tree. Note that this function can be invoked from BlockAllocateAny, in
1481 * which the minimum block size is 1 block, making it easy to succeed.
1482 */
1483 search_sentinel.offset = hfsmp->hfs_metazone_end;
1484 search_sentinel.length = minBlocks;
1485
1486 if ((vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) && (forceContig == 0)) {
1487 /* just start with the first offset node */
1488 node = extent_tree_off_search_next(&hfsmp->offset_tree, &search_sentinel);
1489 }
1490 else {
1491 /*
1492 * Otherwise, start from the end of the metadata zone or our next allocation pointer,
1493 * and try to find the first chunk of size >= min.
1494 */
1495 node = extent_tree_off_search_nextWithSize (&hfsmp->offset_tree, &search_sentinel);
1496
1497 if (node == NULL) {
1498 extent_node_t *metaend_node;
1499 /*
1500 * Maybe there's a free extent coalesced with the space still in the metadata
1501 * zone. If there is, find it and allocate from the middle of it, starting at
1502 * the end of the metadata zone.
1503 *
1504 * If search_prev yields a result that is not offset == metazone_end, then that
1505 * means no node existed at that offset. If the previous node's offset + length crosses
1506 * the metazone boundary, then allocate from there. If it is too small to
1507 * cross the metazone boundary, then it is of no importance and we'd have to
1508 * report ENOSPC.
1509 */
1510 metaend_node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
1511
1512 if ((metaend_node) && (metaend_node->offset < hfsmp->hfs_metazone_end)) {
1513 u_int32_t node_end = metaend_node->offset + metaend_node->length;
1514 if (node_end > hfsmp->hfs_metazone_end) {
1515 u_int32_t modified_length = node_end - hfsmp->hfs_metazone_end;
1516 if (modified_length >= minBlocks) {
1517 /*
1518 * Then we can allocate it. Fill in the contents into tempnode,
1519 * and BlockMarkAllocatedRBTree below will take care of the rest.
1520 */
1521 tempnode.offset = hfsmp->hfs_metazone_end;
1522 tempnode.length = MIN(minBlocks, node_end - tempnode.offset);
1523 node = &tempnode;
1524 }
1525 }
1526 }
1527 }
1528 }
1529
1530 /* If we can't find anything useful, search the metadata zone as a last resort. */
1531
1532 if ((!node) && useMetaZone) {
1533 search_sentinel.offset = 0;
1534 search_sentinel.length = minBlocks;
1535 node = extent_tree_off_search_nextWithSize (&hfsmp->offset_tree, &search_sentinel);
1536 }
1537
1538 /* If we found something useful, then go ahead and update the bitmap */
1539 if ((node) && (node->length >= minBlocks)) {
1540 *actualStartBlock = node->offset;
1541 if (node->length >= maxBlocks) {
1542 *actualNumBlocks = maxBlocks;
1543 }
1544 else {
1545 *actualNumBlocks = node->length;
1546 }
1547
1548 err = BlockMarkAllocatedRBTree(vcb, *actualStartBlock, *actualNumBlocks);
1549
1550 if (err == noErr) {
1551 if ( ALLOC_DEBUG ) {
1552 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
1553 if (!hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
1554 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
1555 *actualStartBlock, *actualNumBlocks);
1556 }
1557 check_rbtree_extents (VCBTOHFS(vcb), *actualStartBlock, *actualNumBlocks, ASSERT_ALLOC);
1558 }
1559 }
1560 }
1561 else {
1562 int destroy_trees = 0;
1563 /*
1564 * TODO: Add High-water mark check here. If we couldn't find anything useful,
1565 * when do we tear down the tree? Or should the logic be in BlockAllocateContig??
1566 */
1567 if (destroy_trees) {
1568 DestroyTrees(VCBTOHFS(vcb));
1569 /* Reset the Free Ext Cache since we'll be using it now. */
1570 ResetVCBFreeExtCache(VCBTOHFS(vcb));
1571 }
1572
1573 if (ALLOC_DEBUG) {
1574 printf("HFS allocator: No space on FS (%s). Node %p Start %d Min %d, Max %d, Tree still alive.\n",
1575 hfsmp->vcbVN, node, startingBlock, minBlocks, maxBlocks);
1576
1577 /* Dump the list ? */
1578 extent_tree_offset_print(&hfsmp->offset_tree);
1579
1580 printf("HFS allocator: Done printing list on FS (%s). Min %d, Max %d, Tree still alive.\n",
1581 hfsmp->vcbVN, minBlocks, maxBlocks);
1582
1583
1584
1585 }
1586 err = dskFulErr;
1587 }
1588
1589 if (err == noErr) {
1590 if (ALLOC_DEBUG) {
1591 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
1592 panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
1593 }
1594 }
1595 else {
1596 *actualStartBlock = 0;
1597 *actualNumBlocks = 0;
1598 }
1c79356b
A
1599
1600 return err;
6d2010ae 1601
1c79356b 1602}
6d2010ae
A
1603#endif
1604
1605
1c79356b 1606
1c79356b
A
1607/*
1608_______________________________________________________________________
1609
1610Routine: BlockAllocateAny
1611
1612Function: Allocate one or more allocation blocks. If there are fewer
1613 free blocks than requested, all free blocks will be
1614 allocated. The caller guarantees that there is at least
1615 one free block.
1616
1617Inputs:
1618 vcb Pointer to volume where space is to be allocated
1619 startingBlock Preferred first block for allocation
1620 endingBlock Last block to check + 1
1621 maxBlocks Maximum number of contiguous blocks to allocate
55e303ae 1622 useMetaZone
1c79356b
A
1623
1624Outputs:
1625 actualStartBlock First block of range allocated, or 0 if error
1626 actualNumBlocks Number of blocks allocated, or 0 if error
1627_______________________________________________________________________
1628*/
6d2010ae
A
1629
1630/*
1631 * BlockAllocateAny acts as a multiplexer between BlockAllocateAnyRBTree
1632 * and BlockAllocateAnyBitmap, which uses the bitmap scanning logic.
1633 */
1634
0b4e3aa0 1635static OSErr BlockAllocateAny(
1c79356b 1636 ExtendedVCB *vcb,
2d21ac55
A
1637 u_int32_t startingBlock,
1638 register u_int32_t endingBlock,
1639 u_int32_t maxBlocks,
55e303ae 1640 Boolean useMetaZone,
2d21ac55
A
1641 u_int32_t *actualStartBlock,
1642 u_int32_t *actualNumBlocks)
1c79356b 1643{
6d2010ae
A
1644
1645#if CONFIG_HFS_ALLOC_RBTREE
1646 if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
1647 return BlockAllocateAnyRBTree(vcb, startingBlock, maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
1648 }
1649#endif
1650 return BlockAllocateAnyBitmap(vcb, startingBlock, endingBlock, maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
1651
1652}
1653
1654
1655#if CONFIG_HFS_ALLOC_RBTREE
1656/*
1657 * BlockAllocateAnyRBTree finds one or more allocation blocks by using
1658 * the red-black allocation tree to figure out where the free ranges are.
1659 * This function is typically used as a last resort becuase we were unable to
1660 * find the right ranges. Outputs are the same as BlockAllocateAnyBitmap.
1661 */
1662static OSErr BlockAllocateAnyRBTree(
1663 ExtendedVCB *vcb,
1664 u_int32_t startingBlock,
1665 u_int32_t maxBlocks,
1666 Boolean useMetaZone,
1667 u_int32_t *actualStartBlock,
1668 u_int32_t *actualNumBlocks)
1669{
1670 OSErr err;
1671
1672 /*
1673 * BlockAllocateContig
1674 */
1675 /* If we're using the red-black tree, try searching at the specified offsets. */
1676 err = BlockAllocateContigRBTree(vcb, startingBlock, 1, maxBlocks, useMetaZone,
1677 actualStartBlock, actualNumBlocks, 0);
1678 return err;
1679
1680}
1681#endif
1682
1683/*
1684 * BlockAllocateAnyBitmap finds free ranges by scanning the bitmap to figure out
1685 * where the free allocation blocks are. Inputs and outputs are the same as for
1686 * BlockAllocateAny and BlockAllocateAnyRBTree
1687 */
1688
1689static OSErr BlockAllocateAnyBitmap(
1690 ExtendedVCB *vcb,
1691 u_int32_t startingBlock,
1692 register u_int32_t endingBlock,
1693 u_int32_t maxBlocks,
1694 Boolean useMetaZone,
1695 u_int32_t *actualStartBlock,
1696 u_int32_t *actualNumBlocks)
1697{
1c79356b 1698 OSErr err;
2d21ac55
A
1699 register u_int32_t block; // current block number
1700 register u_int32_t currentWord; // Pointer to current word within bitmap block
1701 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
1702 register u_int32_t wordsLeft; // Number of words left in this bitmap block
1703 u_int32_t *buffer = NULL;
1704 u_int32_t *currCache = NULL;
b0d623f7 1705 uintptr_t blockRef;
2d21ac55
A
1706 u_int32_t bitsPerBlock;
1707 u_int32_t wordsPerBlock;
0b4e3aa0 1708 Boolean dirty = false;
b4c24cb9 1709 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1c79356b 1710
6d2010ae
A
1711 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1712 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);
1713
2d21ac55
A
1714 /*
1715 * When we're skipping the metadata zone and the start/end
1716 * range overlaps with the metadata zone then adjust the
1717 * start to be outside of the metadata zone. If the range
1718 * is entirely inside the metadata zone then we can deny the
1719 * request (dskFulErr).
1720 */
1721 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
1722 if (startingBlock <= vcb->hfs_metazone_end) {
1723 if (endingBlock > (vcb->hfs_metazone_end + 2))
1724 startingBlock = vcb->hfs_metazone_end + 1;
1725 else {
1726 err = dskFulErr;
1727 goto Exit;
1728 }
1729 }
1730 }
1731
1c79356b
A
1732 // Since this routine doesn't wrap around
1733 if (maxBlocks > (endingBlock - startingBlock)) {
1734 maxBlocks = endingBlock - startingBlock;
1735 }
1736
1c79356b
A
1737 //
1738 // Pre-read the first bitmap block
1739 //
55e303ae 1740 err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef);
1c79356b 1741 if (err != noErr) goto Exit;
55e303ae 1742 buffer = currCache;
1c79356b
A
1743
1744 //
1745 // Set up the current position within the block
1746 //
1747 {
2d21ac55 1748 u_int32_t wordIndexInBlock;
0b4e3aa0
A
1749
1750 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
1751 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
1c79356b 1752
0b4e3aa0 1753 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
1c79356b 1754 buffer += wordIndexInBlock;
0b4e3aa0 1755 wordsLeft = wordsPerBlock - wordIndexInBlock;
1c79356b
A
1756 currentWord = SWAP_BE32 (*buffer);
1757 bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
1758 }
1759
1760 //
1761 // Find the first unallocated block
1762 //
1763 block=startingBlock;
1764 while (block < endingBlock) {
1765 if ((currentWord & bitMask) == 0)
1766 break;
55e303ae 1767
1c79356b
A
1768 // Next bit
1769 ++block;
1770 bitMask >>= 1;
1771 if (bitMask == 0) {
1772 // Next word
1773 bitMask = kHighBitInWordMask;
1774 ++buffer;
55e303ae 1775
1c79356b
A
1776 if (--wordsLeft == 0) {
1777 // Next block
55e303ae 1778 buffer = currCache = NULL;
0b4e3aa0
A
1779 err = ReleaseBitmapBlock(vcb, blockRef, false);
1780 if (err != noErr) goto Exit;
1781
55e303ae
A
1782 /*
1783 * Skip over metadata blocks.
1784 */
1785 if (!useMetaZone) {
1786 block = NextBitmapBlock(vcb, block);
55e303ae 1787 }
2d21ac55
A
1788 if (block >= endingBlock) {
1789 err = dskFulErr;
1790 goto Exit;
1791 }
1792
55e303ae 1793 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
1c79356b 1794 if (err != noErr) goto Exit;
55e303ae 1795 buffer = currCache;
1c79356b 1796
0b4e3aa0 1797 wordsLeft = wordsPerBlock;
1c79356b 1798 }
1c79356b
A
1799 currentWord = SWAP_BE32 (*buffer);
1800 }
1801 }
1802
1803 // Did we get to the end of the bitmap before finding a free block?
1804 // If so, then couldn't allocate anything.
55e303ae 1805 if (block >= endingBlock) {
1c79356b
A
1806 err = dskFulErr;
1807 goto Exit;
1808 }
1809
1810 // Return the first block in the allocated range
1811 *actualStartBlock = block;
0b4e3aa0 1812 dirty = true;
1c79356b
A
1813
1814 // If we could get the desired number of blocks before hitting endingBlock,
1815 // then adjust endingBlock so we won't keep looking. Ideally, the comparison
1816 // would be (block + maxBlocks) < endingBlock, but that could overflow. The
1817 // comparison below yields identical results, but without overflow.
1818 if (block < (endingBlock-maxBlocks)) {
1819 endingBlock = block + maxBlocks; // if we get this far, we've found enough
1820 }
1821
b4c24cb9
A
1822 // XXXdbg
1823 if (hfsmp->jnl) {
1824 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
1825 }
1826
1c79356b
A
1827 //
1828 // Allocate all of the consecutive blocks
1829 //
1830 while ((currentWord & bitMask) == 0) {
1831 // Allocate this block
1832 currentWord |= bitMask;
1833
1834 // Move to the next block. If no more, then exit.
1835 ++block;
1836 if (block == endingBlock)
1837 break;
1838
1839 // Next bit
1840 bitMask >>= 1;
1841 if (bitMask == 0) {
1842 *buffer = SWAP_BE32 (currentWord); // update value in bitmap
1843
1844 // Next word
1845 bitMask = kHighBitInWordMask;
1846 ++buffer;
1847
1848 if (--wordsLeft == 0) {
1849 // Next block
55e303ae 1850 buffer = currCache = NULL;
0b4e3aa0
A
1851 err = ReleaseBitmapBlock(vcb, blockRef, true);
1852 if (err != noErr) goto Exit;
1853
55e303ae
A
1854 /*
1855 * Skip over metadata blocks.
1856 */
1857 if (!useMetaZone) {
2d21ac55 1858 u_int32_t nextBlock;
55e303ae
A
1859
1860 nextBlock = NextBitmapBlock(vcb, block);
1861 if (nextBlock != block) {
1862 goto Exit; /* allocation gap, so stop */
1863 }
1864 }
1865
1866 err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
1c79356b 1867 if (err != noErr) goto Exit;
55e303ae 1868 buffer = currCache;
1c79356b 1869
b4c24cb9
A
1870 // XXXdbg
1871 if (hfsmp->jnl) {
1872 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
1873 }
1874
0b4e3aa0 1875 wordsLeft = wordsPerBlock;
1c79356b
A
1876 }
1877
1878 currentWord = SWAP_BE32 (*buffer);
1879 }
1880 }
1881 *buffer = SWAP_BE32 (currentWord); // update the last change
1882
1883Exit:
1884 if (err == noErr) {
1885 *actualNumBlocks = block - *actualStartBlock;
55e303ae 1886
060df5ea
A
1887 // sanity check
1888 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
1889 panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
1890 }
6d2010ae
A
1891
1892 /*
1893 * Beware!
1894 * Because this function directly manipulates the bitmap to mark the
1895 * blocks it came across as allocated, we must inform the journal (and
1896 * subsequently, the journal's trim list) that we are allocating these
1897 * blocks, just like in BlockMarkAllocatedInternal. hfs_unmap_alloc_extent
1898 * and the functions it calls will serialize behind the journal trim list lock
1899 * to ensure that either the asynchronous flush/TRIM/UNMAP happens prior to
1900 * us manipulating the trim list, or we get there first and successfully remove
1901 * these bitmap blocks before the TRIM happens.
1902 */
1903 hfs_unmap_alloc_extent (vcb, *actualStartBlock, *actualNumBlocks);
1c79356b
A
1904 }
1905 else {
1906 *actualStartBlock = 0;
1907 *actualNumBlocks = 0;
1908 }
1909
0b4e3aa0
A
1910 if (currCache)
1911 (void) ReleaseBitmapBlock(vcb, blockRef, dirty);
1c79356b 1912
6d2010ae
A
1913 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1914 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
1915
1c79356b
A
1916 return err;
1917}
1918
1919
0b4e3aa0
A
1920/*
1921_______________________________________________________________________
1922
1923Routine: BlockAllocateKnown
1924
1925Function: Try to allocate space from known free space in the free
1926 extent cache.
1927
1928Inputs:
1929 vcb Pointer to volume where space is to be allocated
1930 maxBlocks Maximum number of contiguous blocks to allocate
1931
1932Outputs:
1933 actualStartBlock First block of range allocated, or 0 if error
1934 actualNumBlocks Number of blocks allocated, or 0 if error
1935
1936Returns:
1937 dskFulErr Free extent cache is empty
1938_______________________________________________________________________
1939*/
b0d623f7 1940
0b4e3aa0
A
1941static OSErr BlockAllocateKnown(
1942 ExtendedVCB *vcb,
2d21ac55
A
1943 u_int32_t maxBlocks,
1944 u_int32_t *actualStartBlock,
1945 u_int32_t *actualNumBlocks)
0b4e3aa0 1946{
2d21ac55
A
1947 OSErr err;
1948 u_int32_t i;
1949 u_int32_t foundBlocks;
1950 u_int32_t newStartBlock, newBlockCount;
b0d623f7 1951
6d2010ae
A
1952 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1953 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
1954
d1ecb069 1955 HFS_MOUNT_LOCK(vcb, TRUE);
6d2010ae
A
1956 lck_spin_lock(&vcb->vcbFreeExtLock);
1957 if ((hfs_isrbtree_active(vcb) == true) ||
1958 vcb->vcbFreeExtCnt == 0 ||
d1ecb069 1959 vcb->vcbFreeExt[0].blockCount == 0) {
6d2010ae 1960 lck_spin_unlock(&vcb->vcbFreeExtLock);
d1ecb069 1961 HFS_MOUNT_UNLOCK(vcb, TRUE);
6d2010ae
A
1962 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
1963 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
0b4e3aa0 1964 return dskFulErr;
d1ecb069 1965 }
6d2010ae 1966 lck_spin_unlock(&vcb->vcbFreeExtLock);
d1ecb069 1967 HFS_MOUNT_UNLOCK(vcb, TRUE);
0b4e3aa0 1968
6d2010ae
A
1969 lck_spin_lock(&vcb->vcbFreeExtLock);
1970
0b4e3aa0
A
1971 // Just grab up to maxBlocks of the first (largest) free exent.
1972 *actualStartBlock = vcb->vcbFreeExt[0].startBlock;
1973 foundBlocks = vcb->vcbFreeExt[0].blockCount;
1974 if (foundBlocks > maxBlocks)
1975 foundBlocks = maxBlocks;
1976 *actualNumBlocks = foundBlocks;
1977
b0d623f7
A
1978 if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1979 // since sparse volumes keep the free extent list sorted by starting
1980 // block number, the list won't get re-ordered, it may only shrink
1981 //
1982 vcb->vcbFreeExt[0].startBlock += foundBlocks;
1983 vcb->vcbFreeExt[0].blockCount -= foundBlocks;
1984 if (vcb->vcbFreeExt[0].blockCount == 0) {
1985 for(i=1; i < vcb->vcbFreeExtCnt; i++) {
1986 vcb->vcbFreeExt[i-1] = vcb->vcbFreeExt[i];
1987 }
1988 vcb->vcbFreeExtCnt--;
1989 }
1990
1991 goto done;
1992 }
1993
0b4e3aa0
A
1994 // Adjust the start and length of that extent.
1995 newStartBlock = vcb->vcbFreeExt[0].startBlock + foundBlocks;
1996 newBlockCount = vcb->vcbFreeExt[0].blockCount - foundBlocks;
b0d623f7 1997
0b4e3aa0
A
1998
1999 // The first extent might not be the largest anymore. Bubble up any
2000 // (now larger) extents to the top of the list.
2001 for (i=1; i<vcb->vcbFreeExtCnt; ++i)
2002 {
2003 if (vcb->vcbFreeExt[i].blockCount > newBlockCount)
2004 {
2005 vcb->vcbFreeExt[i-1].startBlock = vcb->vcbFreeExt[i].startBlock;
2006 vcb->vcbFreeExt[i-1].blockCount = vcb->vcbFreeExt[i].blockCount;
2007 }
2008 else
2009 {
2010 break;
2011 }
2012 }
2013
2014 // If this is now the smallest known free extent, then it might be smaller than
2015 // other extents we didn't keep track of. So, just forget about this extent.
2016 // After the previous loop, (i-1) is the index of the extent we just allocated from.
b0d623f7 2017 if (newBlockCount == 0)
0b4e3aa0 2018 {
b0d623f7 2019 // then just reduce the number of free extents since this guy got deleted
0b4e3aa0
A
2020 --vcb->vcbFreeExtCnt;
2021 }
2022 else
2023 {
2024 // It's not the smallest, so store it in its proper place
2025 vcb->vcbFreeExt[i-1].startBlock = newStartBlock;
2026 vcb->vcbFreeExt[i-1].blockCount = newBlockCount;
2027 }
b0d623f7
A
2028
2029done:
6d2010ae 2030 lck_spin_unlock(&vcb->vcbFreeExtLock);
55e303ae 2031 // sanity check
2d21ac55 2032 if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
0c530ab8 2033 {
2d21ac55
A
2034 printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
2035 hfs_mark_volume_inconsistent(vcb);
6601e61a
A
2036 *actualStartBlock = 0;
2037 *actualNumBlocks = 0;
2038 err = EIO;
2d21ac55
A
2039 }
2040 else
6601e61a
A
2041 {
2042 //
2043 // Now mark the found extent in the bitmap
2044 //
6d2010ae 2045 err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
6601e61a 2046 }
2d21ac55 2047
6d2010ae
A
2048 sanity_check_free_ext(vcb, 0);
2049
2050 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2051 KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
b0d623f7 2052
2d21ac55 2053 return err;
0b4e3aa0
A
2054}
2055
6d2010ae
A
2056/*
2057 * BlockMarkAllocated
2058 *
2059 * This is a wrapper function around the internal calls which will actually mark the blocks
2060 * as in-use. It will mark the blocks in the red-black tree if appropriate. We need to do
2061 * this logic here to avoid callers having to deal with whether or not the red-black tree
2062 * is enabled.
2063 */
2064
2065
2066OSErr BlockMarkAllocated(
2067 ExtendedVCB *vcb,
2068 u_int32_t startingBlock,
2069 register u_int32_t numBlocks)
2070{
2071 struct hfsmount *hfsmp;
2072
2073 hfsmp = VCBTOHFS(vcb);
2074#if CONFIG_HFS_ALLOC_RBTREE
2075 if (hfs_isrbtree_active(hfsmp)) {
2076 int err;
2077
2078 if ((startingBlock >= hfsmp->offset_block_end) &&
2079 (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
2080 /*
2081 * We're manipulating a portion of the bitmap that is not controlled by the
2082 * red-black tree. Just update the bitmap and don't bother manipulating the tree
2083 */
2084 goto justbitmap;
2085 }
2086
2087 err = BlockMarkAllocatedRBTree(vcb, startingBlock, numBlocks);
2088 if (err == noErr) {
2089 if ( ALLOC_DEBUG ) {
2090 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
2091 if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2092 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
2093 startingBlock, numBlocks);
2094 }
2095 check_rbtree_extents (hfsmp, startingBlock, numBlocks, ASSERT_ALLOC);
2096 }
2097 }
2098 return err;
2099
2100 }
2101justbitmap:
2102#endif
2103
2104 return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks);
2105
2106}
2107
0b4e3aa0 2108
1c79356b
A
2109
2110/*
2111_______________________________________________________________________
2112
6d2010ae 2113Routine: BlockMarkAllocatedInternal
1c79356b
A
2114
2115Function: Mark a contiguous group of blocks as allocated (set in the
2116 bitmap). It assumes those bits are currently marked
6d2010ae
A
2117 deallocated (clear in the bitmap). Note that this function
2118 must be called regardless of whether or not the bitmap or
2119 tree-based allocator is used, as all allocations must correctly
2120 be marked on-disk. If the tree-based approach is running, then
2121 this will be done before the node is removed from the tree.
1c79356b
A
2122
2123Inputs:
2124 vcb Pointer to volume where space is to be allocated
2125 startingBlock First block number to mark as allocated
2126 numBlocks Number of blocks to mark as allocated
2127_______________________________________________________________________
2128*/
6d2010ae
A
2129static
2130OSErr BlockMarkAllocatedInternal (
1c79356b 2131 ExtendedVCB *vcb,
2d21ac55
A
2132 u_int32_t startingBlock,
2133 register u_int32_t numBlocks)
1c79356b
A
2134{
2135 OSErr err;
2d21ac55
A
2136 register u_int32_t *currentWord; // Pointer to current word within bitmap block
2137 register u_int32_t wordsLeft; // Number of words left in this bitmap block
2138 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
2139 u_int32_t firstBit; // Bit index within word of first bit to allocate
2140 u_int32_t numBits; // Number of bits in word to allocate
2141 u_int32_t *buffer = NULL;
b0d623f7 2142 uintptr_t blockRef;
2d21ac55
A
2143 u_int32_t bitsPerBlock;
2144 u_int32_t wordsPerBlock;
b4c24cb9
A
2145 // XXXdbg
2146 struct hfsmount *hfsmp = VCBTOHFS(vcb);
1c79356b 2147
6d2010ae
A
2148 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2149 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
2150
2151 hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
060df5ea 2152
1c79356b
A
2153 //
2154 // Pre-read the bitmap block containing the first word of allocation
2155 //
2156
0b4e3aa0 2157 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
1c79356b 2158 if (err != noErr) goto Exit;
1c79356b
A
2159 //
2160 // Initialize currentWord, and wordsLeft.
2161 //
2162 {
2d21ac55 2163 u_int32_t wordIndexInBlock;
0b4e3aa0
A
2164
2165 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
2166 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
1c79356b 2167
0b4e3aa0 2168 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
1c79356b 2169 currentWord = buffer + wordIndexInBlock;
0b4e3aa0 2170 wordsLeft = wordsPerBlock - wordIndexInBlock;
1c79356b
A
2171 }
2172
b4c24cb9
A
2173 // XXXdbg
2174 if (hfsmp->jnl) {
2175 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2176 }
2177
1c79356b
A
2178 //
2179 // If the first block to allocate doesn't start on a word
2180 // boundary in the bitmap, then treat that first word
2181 // specially.
2182 //
2183
2184 firstBit = startingBlock % kBitsPerWord;
2185 if (firstBit != 0) {
2186 bitMask = kAllBitsSetInWord >> firstBit; // turn off all bits before firstBit
2187 numBits = kBitsPerWord - firstBit; // number of remaining bits in this word
2188 if (numBits > numBlocks) {
2189 numBits = numBlocks; // entire allocation is inside this one word
2190 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); // turn off bits after last
2191 }
2192#if DEBUG_BUILD
2193 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
6d2010ae 2194 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
1c79356b
A
2195 }
2196#endif
2197 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
2198 numBlocks -= numBits; // adjust number of blocks left to allocate
2199
2200 ++currentWord; // move to next word
2201 --wordsLeft; // one less word left in this block
2202 }
2203
2204 //
2205 // Allocate whole words (32 blocks) at a time.
2206 //
2207
2208 bitMask = kAllBitsSetInWord; // put this in a register for 68K
2209 while (numBlocks >= kBitsPerWord) {
2210 if (wordsLeft == 0) {
2211 // Read in the next bitmap block
0b4e3aa0 2212 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
1c79356b 2213
1c79356b 2214 buffer = NULL;
0b4e3aa0 2215 err = ReleaseBitmapBlock(vcb, blockRef, true);
1c79356b 2216 if (err != noErr) goto Exit;
0b4e3aa0
A
2217
2218 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2219 if (err != noErr) goto Exit;
2220
b4c24cb9
A
2221 // XXXdbg
2222 if (hfsmp->jnl) {
2223 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2224 }
2225
1c79356b
A
2226 // Readjust currentWord and wordsLeft
2227 currentWord = buffer;
0b4e3aa0 2228 wordsLeft = wordsPerBlock;
1c79356b
A
2229 }
2230#if DEBUG_BUILD
2231 if (*currentWord != 0) {
6d2010ae 2232 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
1c79356b
A
2233 }
2234#endif
2235 *currentWord = SWAP_BE32 (bitMask);
2236 numBlocks -= kBitsPerWord;
2237
2238 ++currentWord; // move to next word
2239 --wordsLeft; // one less word left in this block
2240 }
2241
2242 //
2243 // Allocate any remaining blocks.
2244 //
2245
2246 if (numBlocks != 0) {
2247 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
2248 if (wordsLeft == 0) {
2249 // Read in the next bitmap block
0b4e3aa0 2250 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
1c79356b 2251
1c79356b 2252 buffer = NULL;
0b4e3aa0 2253 err = ReleaseBitmapBlock(vcb, blockRef, true);
1c79356b 2254 if (err != noErr) goto Exit;
0b4e3aa0
A
2255
2256 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2257 if (err != noErr) goto Exit;
2258
b4c24cb9
A
2259 // XXXdbg
2260 if (hfsmp->jnl) {
2261 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2262 }
2263
1c79356b
A
2264 // Readjust currentWord and wordsLeft
2265 currentWord = buffer;
0b4e3aa0 2266 wordsLeft = wordsPerBlock;
1c79356b
A
2267 }
2268#if DEBUG_BUILD
2269 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
6d2010ae 2270 panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
1c79356b
A
2271 }
2272#endif
2273 *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap
2274
2275 // No need to update currentWord or wordsLeft
2276 }
2277
2278Exit:
2279
0b4e3aa0
A
2280 if (buffer)
2281 (void)ReleaseBitmapBlock(vcb, blockRef, true);
1c79356b 2282
6d2010ae
A
2283 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2284 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
2285
1c79356b
A
2286 return err;
2287}
2288
6d2010ae 2289#if CONFIG_HFS_ALLOC_RBTREE
1c79356b 2290/*
6d2010ae
A
2291 * This is a wrapper function around BlockMarkAllocated. This function is
2292 * called when the RB Tree-based allocator needs to mark a block as in-use.
2293 * This function should take the locks that would not normally be
2294 * necessary for the normal bitmap allocator, and then call the function. Once
2295 * the on-disk data structures are updated properly, then this will remove the
2296 * appropriate node from the tree.
2297 */
1c79356b 2298
6d2010ae 2299static OSErr BlockMarkAllocatedRBTree(
1c79356b 2300 ExtendedVCB *vcb,
6d2010ae
A
2301 u_int32_t startingBlock,
2302 u_int32_t numBlocks)
1c79356b 2303{
6d2010ae
A
2304 OSErr err;
2305 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2306 int rb_err = 0;
1c79356b 2307
6d2010ae
A
2308
2309 if (ALLOC_DEBUG) {
2310 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
2311 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2312 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n",
2313 startingBlock, numBlocks);
2314 }
2315 check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_FREE);
b4c24cb9 2316 }
6d2010ae
A
2317
2318 err = BlockMarkAllocatedInternal (vcb, startingBlock, numBlocks);
2319
2320 if (err == noErr) {
b4c24cb9 2321
6d2010ae
A
2322 if (ALLOC_DEBUG) {
2323 if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2324 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet!\n",
2325 startingBlock, numBlocks);
2326 }
2327 }
2328
2329 /*
2330 * Mark the blocks in the offset tree.
2331 */
2332 rb_err = extent_tree_offset_alloc_space(&hfsmp->offset_tree, numBlocks, startingBlock);
2333 if (rb_err) {
2334 if (ALLOC_DEBUG) {
2335 printf("HFS RBTree Allocator: Could not mark blocks as in-use! %d \n", rb_err);
2336 }
2337
2338 /*
2339 * We may be called from the BlockMarkAllocated interface, in which case, they would
2340 * not be picking extents from their start. Do a check here, find if the specified
2341 * extent is free, and if it is, then find the containing node.
2342 */
2343 extent_node_t *node = NULL;
2344 extent_node_t search_sentinel;
2345 search_sentinel.offset = startingBlock;
2346
2347 node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
2348
2349 if (node) {
2350 rb_err = extent_tree_offset_alloc_unaligned (&hfsmp->offset_tree, numBlocks, startingBlock);
2351 }
2352
2353 if (ALLOC_DEBUG) {
2354 if (rb_err) {
2355 printf ("HFS RBTree Allocator: Still Couldn't mark blocks as in-use! %d\n", rb_err);
2356 }
2357 }
2358 }
2359 if (ALLOC_DEBUG) {
2360 check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_ALLOC);
2361 }
2362 }
2363
2364 /*
2365 * If we encountered a red-black tree error, for now, we immediately back off and force
2366 * destruction of rb-tree. Set the persistent error-detected bit in the mount point.
2367 * That will ensure that even if we reach a low-water-mark in the future we will still
2368 * not allow the rb-tree to be used. On next mount, we will force a re-construction from
2369 * on-disk state. As a fallback, we will now resort to the bitmap-scanning behavior.
2370 */
2371 if (rb_err) {
2372 /* Mark RB-Trees with error */
2373 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
2374 DestroyTrees(hfsmp);
2375 /* Reset the Free Ext Cache since we'll be using it now. */
2376 ResetVCBFreeExtCache(hfsmp);
2377 printf("HFS: Red-Black Allocator Tree BlockMarkAllocated error\n");
2378 }
2379
2380 return err;
2381}
2382#endif
2383
2384
2385
2386/*
2387 * BlockMarkFree
2388 *
2389 * This is a wrapper function around the internal calls which will actually mark the blocks
2390 * as freed. It will mark the blocks in the red-black tree if appropriate. We need to do
2391 * this logic here to avoid callers having to deal with whether or not the red-black tree
2392 * is enabled.
2393 *
2394 */
2395OSErr BlockMarkFree(
2396 ExtendedVCB *vcb,
2397 u_int32_t startingBlock,
2398 register u_int32_t numBlocks)
2399{
2400 struct hfsmount *hfsmp;
2401 hfsmp = VCBTOHFS(vcb);
2402#if CONFIG_HFS_ALLOC_RBTREE
2403 if (hfs_isrbtree_active(hfsmp)) {
2404 int err;
2405
2406 if ((startingBlock >= hfsmp->offset_block_end) &&
2407 (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
2408 /*
2409 * We're manipulating a portion of the bitmap that is not controlled by the
2410 * red-black tree. Just update the bitmap and don't bother manipulating the tree
2411 */
2412 goto justbitmap;
2413 }
2414
2415 err = BlockMarkFreeRBTree(vcb, startingBlock, numBlocks);
2416 if (err == noErr) {
2417 if ( ALLOC_DEBUG ) {
2418 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
2419 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2420 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use!\n",
2421 startingBlock, numBlocks);
2422 }
2423 check_rbtree_extents (hfsmp, startingBlock, numBlocks, ASSERT_FREE);
2424 }
2425 }
2426 return err;
2427 }
2428justbitmap:
2429#endif
2430 return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
2431
2432}
2433
2434
2435/*
2436 * BlockMarkFreeUnused
2437 *
2438 * Scan the bitmap block beyond end of current file system for bits
2439 * that are marked as used. If any of the bits are marked as used,
2440 * this function marks them free.
2441 *
2442 * Note: This was specifically written to mark all bits beyond
2443 * end of current file system during hfs_extendfs(), which makes
2444 * sure that all the new blocks added to the file system are
2445 * marked as free. We expect that all the blocks beyond end of
2446 * current file system are always marked as free, but there might
2447 * be cases where are marked as used. This function assumes that
2448 * the number of blocks marked as used incorrectly are relatively
2449 * small, otherwise this can overflow journal transaction size
2450 * on certain file system configurations (example, large unused
2451 * bitmap with relatively small journal).
2452 *
2453 * Input:
2454 * startingBlock: First block of the range to mark unused
2455 * numBlocks: Number of blocks in the range to mark unused
2456 *
2457 * Returns: zero on success, non-zero on error.
2458 */
2459OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t numBlocks)
2460{
2461 int error = 0;
2462 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2463 u_int32_t curNumBlocks;
2464 u_int32_t bitsPerBlock;
2465 u_int32_t lastBit;
2466
2467 /* Use the optimal bitmap I/O size instead of bitmap block size */
2468 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
2469
2470 /*
2471 * First clear any non bitmap allocation block aligned bits
2472 *
2473 * Calculate the first bit in the bitmap block next to
2474 * the bitmap block containing the bit for startingBlock.
2475 * Using this value, we calculate the total number of
2476 * bits to be marked unused from startingBlock to the
2477 * end of bitmap block containing startingBlock.
2478 */
2479 lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
2480 curNumBlocks = lastBit - startingBlock;
2481 if (curNumBlocks > numBlocks) {
2482 curNumBlocks = numBlocks;
2483 }
2484 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
2485 if (error) {
2486 return error;
2487 }
2488 startingBlock += curNumBlocks;
2489 numBlocks -= curNumBlocks;
2490
2491 /*
2492 * Check a full bitmap block for any 'used' bit. If any bit is used,
2493 * mark all the bits only in that bitmap block as free. This ensures
2494 * that we do not write unmodified bitmap blocks and do not
2495 * overwhelm the journal.
2496 *
2497 * The code starts by checking full bitmap block at a time, and
2498 * marks entire bitmap block as free only if any bit in that bitmap
2499 * block is marked as used. In the end, it handles the last bitmap
2500 * block which might be partially full by only checking till the
2501 * caller-specified last bit and if any bit is set, only mark that
2502 * range as free.
2503 */
2504 while (numBlocks) {
2505 if (numBlocks >= bitsPerBlock) {
2506 curNumBlocks = bitsPerBlock;
2507 } else {
2508 curNumBlocks = numBlocks;
2509 }
2510 if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
2511 error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
2512 if (error) {
2513 return error;
2514 }
2515 }
2516 startingBlock += curNumBlocks;
2517 numBlocks -= curNumBlocks;
2518 }
2519
2520 return error;
2521}
2522
2523/*
2524_______________________________________________________________________
2525
2526Routine: BlockMarkFreeInternal
2527
2528Function: Mark a contiguous group of blocks as free (clear in the
2529 bitmap). It assumes those bits are currently marked
2530 allocated (set in the bitmap).
2531
2532Inputs:
2533 vcb Pointer to volume where space is to be freed
2534 startingBlock First block number to mark as freed
2535 numBlocks Number of blocks to mark as freed
2536 do_validate If true, validate that the blocks being
2537 deallocated to check if they are within totalBlocks
2538 for current volume and whether they were allocated
2539 before they are marked free.
2540_______________________________________________________________________
2541*/
2542static
2543OSErr BlockMarkFreeInternal(
2544 ExtendedVCB *vcb,
2545 u_int32_t startingBlock_in,
2546 register u_int32_t numBlocks_in,
2547 Boolean do_validate)
2548{
2549 OSErr err;
2550 u_int32_t startingBlock = startingBlock_in;
2551 u_int32_t numBlocks = numBlocks_in;
2552 register u_int32_t *currentWord; // Pointer to current word within bitmap block
2553 register u_int32_t wordsLeft; // Number of words left in this bitmap block
2554 register u_int32_t bitMask; // Word with given bits already set (ready to OR in)
2555 u_int32_t firstBit; // Bit index within word of first bit to allocate
2556 u_int32_t numBits; // Number of bits in word to allocate
2557 u_int32_t *buffer = NULL;
2558 uintptr_t blockRef;
2559 u_int32_t bitsPerBlock;
2560 u_int32_t wordsPerBlock;
2561 // XXXdbg
2562 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2563
2564 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2565 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);
2566
2567 /*
2568 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
2569 * need to be able to free blocks being relocated during hfs_truncatefs.
2570 */
2571 if ((do_validate == true) &&
2572 (startingBlock + numBlocks > vcb->totalBlocks)) {
2573 if (ALLOC_DEBUG) {
2574 panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
2575 }
2576
2577 printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
2578 hfs_mark_volume_inconsistent(vcb);
2579 err = EIO;
2580 goto Exit;
2581 }
2582
2583 //
2584 // Pre-read the bitmap block containing the first word of allocation
2585 //
2586
2587 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2588 if (err != noErr) goto Exit;
2589 // XXXdbg
2590 if (hfsmp->jnl) {
2591 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2592 }
2593
2594 //
2595 // Initialize currentWord, and wordsLeft.
2596 //
2597 {
2598 u_int32_t wordIndexInBlock;
0b4e3aa0
A
2599
2600 bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte;
2601 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
1c79356b 2602
0b4e3aa0 2603 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
1c79356b 2604 currentWord = buffer + wordIndexInBlock;
0b4e3aa0 2605 wordsLeft = wordsPerBlock - wordIndexInBlock;
1c79356b
A
2606 }
2607
2608 //
2609 // If the first block to free doesn't start on a word
2610 // boundary in the bitmap, then treat that first word
2611 // specially.
2612 //
2613
2614 firstBit = startingBlock % kBitsPerWord;
2615 if (firstBit != 0) {
2616 bitMask = kAllBitsSetInWord >> firstBit; // turn off all bits before firstBit
2617 numBits = kBitsPerWord - firstBit; // number of remaining bits in this word
2618 if (numBits > numBlocks) {
2619 numBits = numBlocks; // entire allocation is inside this one word
2620 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); // turn off bits after last
2621 }
6d2010ae
A
2622 if ((do_validate == true) &&
2623 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
55e303ae 2624 goto Corruption;
1c79356b 2625 }
1c79356b
A
2626 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
2627 numBlocks -= numBits; // adjust number of blocks left to free
2628
2629 ++currentWord; // move to next word
2630 --wordsLeft; // one less word left in this block
2631 }
2632
2633 //
0b4e3aa0 2634 // Free whole words (32 blocks) at a time.
1c79356b
A
2635 //
2636
2637 while (numBlocks >= kBitsPerWord) {
2638 if (wordsLeft == 0) {
2639 // Read in the next bitmap block
0b4e3aa0 2640 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
1c79356b 2641
1c79356b 2642 buffer = NULL;
0b4e3aa0 2643 err = ReleaseBitmapBlock(vcb, blockRef, true);
1c79356b 2644 if (err != noErr) goto Exit;
0b4e3aa0
A
2645
2646 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2647 if (err != noErr) goto Exit;
2648
b4c24cb9
A
2649 // XXXdbg
2650 if (hfsmp->jnl) {
2651 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2652 }
2653
1c79356b
A
2654 // Readjust currentWord and wordsLeft
2655 currentWord = buffer;
0b4e3aa0 2656 wordsLeft = wordsPerBlock;
1c79356b 2657 }
6d2010ae
A
2658 if ((do_validate == true) &&
2659 (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
55e303ae 2660 goto Corruption;
1c79356b 2661 }
1c79356b
A
2662 *currentWord = 0; // clear the entire word
2663 numBlocks -= kBitsPerWord;
2664
2665 ++currentWord; // move to next word
2666 --wordsLeft; // one less word left in this block
2667 }
2668
2669 //
0b4e3aa0 2670 // Free any remaining blocks.
1c79356b
A
2671 //
2672
2673 if (numBlocks != 0) {
2674 bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits
2675 if (wordsLeft == 0) {
2676 // Read in the next bitmap block
0b4e3aa0 2677 startingBlock += bitsPerBlock; // generate a block number in the next bitmap block
1c79356b 2678
1c79356b 2679 buffer = NULL;
0b4e3aa0 2680 err = ReleaseBitmapBlock(vcb, blockRef, true);
1c79356b 2681 if (err != noErr) goto Exit;
0b4e3aa0
A
2682
2683 err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
2684 if (err != noErr) goto Exit;
2685
b4c24cb9
A
2686 // XXXdbg
2687 if (hfsmp->jnl) {
2688 journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
2689 }
2690
1c79356b
A
2691 // Readjust currentWord and wordsLeft
2692 currentWord = buffer;
0b4e3aa0 2693 wordsLeft = wordsPerBlock;
1c79356b 2694 }
6d2010ae
A
2695 if ((do_validate == true) &&
2696 (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
55e303ae 2697 goto Corruption;
1c79356b 2698 }
1c79356b
A
2699 *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap
2700
2701 // No need to update currentWord or wordsLeft
2702 }
2703
2704Exit:
2705
0b4e3aa0
A
2706 if (buffer)
2707 (void)ReleaseBitmapBlock(vcb, blockRef, true);
1c79356b 2708
6d2010ae 2709 if (err == noErr) {
060df5ea 2710 hfs_unmap_free_extent(vcb, startingBlock_in, numBlocks_in);
593a1d5f
A
2711 }
2712
6d2010ae
A
2713 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
2714 KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
593a1d5f 2715
1c79356b 2716 return err;
55e303ae
A
2717
2718Corruption:
2719#if DEBUG_BUILD
6d2010ae 2720 panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
55e303ae 2721#else
6d2010ae 2722 printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks (%u,%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
2d21ac55 2723 hfs_mark_volume_inconsistent(vcb);
55e303ae
A
2724 err = EIO;
2725 goto Exit;
2726#endif
1c79356b
A
2727}
2728
6d2010ae
A
2729#if CONFIG_HFS_ALLOC_RBTREE
2730/*
2731 * This is a wrapper function around BlockMarkFree. This function is
2732 * called when the RB Tree-based allocator needs to mark a block as no longer
2733 * in use. This function should take the locks that would not normally be
2734 * necessary for the normal bitmap deallocator, and then call the function. Once
2735 * the on-disk data structures are updated properly, then this will update an
2736 * existing rb-tree node if possible, or else create a new one.
2737 */
2738
2739OSErr BlockMarkFreeRBTree(
2740 ExtendedVCB *vcb,
2741 u_int32_t startingBlock,
2742 register u_int32_t numBlocks)
2743{
2744 OSErr err;
2745 struct hfsmount *hfsmp = VCBTOHFS(vcb);
2746 int rb_err = 0;
2747
2748 if (ALLOC_DEBUG) {
2749 REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
2750 if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2751 panic ("HFS RBTree Allocator: Trying to free blocks starting @ %x for %x but blocks not in use! \n",
2752 startingBlock, numBlocks);
2753 }
2754 check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_ALLOC);
2755 }
2756
2757 err = BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
2758
2759 if (err == noErr) {
2760
2761 /*
2762 * During a filesystem truncation, we may need to relocate files out of the
2763 * portion of the bitmap that is no longer controlled by the r/b tree.
2764 * In this case, just update the bitmap and do not attempt to manipulate the tree.
2765 */
2766 if ((startingBlock >= hfsmp->offset_block_end) &&
2767 (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
2768 goto free_error;
2769 }
2770
2771 extent_node_t *newnode;
2772
2773 if (ALLOC_DEBUG) {
2774 /*
2775 * Validate that the blocks in question are not allocated in the bitmap, and that they're
2776 * not in the offset tree, since it should be tracking free extents, rather than allocated
2777 * extents
2778 */
2779 if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
2780 panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks still marked in-use!\n",
2781 startingBlock, numBlocks);
2782 }
2783 }
2784
2785 if ((hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE) == 0) {
2786 if (startingBlock >= hfsmp->offset_block_end) {
2787 /*
2788 * If the tree generation code has not yet finished scanning the
2789 * bitmap region containing this extent, do nothing. If the start
2790 * of the range to be deallocated is greater than the current high
2791 * watermark on the offset tree, just bail out and let the scanner catch up with us.
2792 */
2793 rb_err = 0;
2794 goto free_error;
2795 }
2796 }
2797
2798 newnode = extent_tree_free_space(&hfsmp->offset_tree, numBlocks, startingBlock);
2799 if (newnode == NULL) {
2800 rb_err = 1;
2801 goto free_error;
2802 }
2803
2804 if (ALLOC_DEBUG) {
2805 check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_FREE);
2806 }
2807
2808 }
2809
2810free_error:
2811 /*
2812 * We follow the same principle as in BlockMarkAllocatedRB.
2813 * If we encounter an error in adding the extents to the rb-tree, then immediately
2814 * back off, destroy the trees, and persistently set a bit in the runtime hfsmp flags
2815 * to indicate we should not use the rb-tree until next mount, when we can force a rebuild.
2816 */
2817 if (rb_err) {
2818 /* Mark RB-Trees with error */
2819 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
2820 DestroyTrees(hfsmp);
2821 /* Reset the Free Ext Cache since we'll be using it now. */
2822 ResetVCBFreeExtCache(hfsmp);
2823 printf("HFS: Red-Black Allocator Tree BlockMarkFree error\n");
2824 }
2825
2826
2827 return err;
2828
2829}
2830#endif
1c79356b
A
2831
2832/*
2833_______________________________________________________________________
2834
2835Routine: BlockFindContiguous
2836
2837Function: Find a contiguous range of blocks that are free (bits
2838 clear in the bitmap). If a contiguous range of the
2839 minimum size can't be found, an error will be returned.
6d2010ae
A
2840 This is only needed to support the bitmap-scanning logic,
2841 as the red-black tree should be able to do this by internally
2842 searching its tree.
1c79356b
A
2843
2844Inputs:
2845 vcb Pointer to volume where space is to be allocated
2846 startingBlock Preferred first block of range
2847 endingBlock Last possible block in range + 1
2848 minBlocks Minimum number of blocks needed. Must be > 0.
2849 maxBlocks Maximum (ideal) number of blocks desired
55e303ae 2850 useMetaZone OK to dip into metadata allocation zone
1c79356b
A
2851
2852Outputs:
2853 actualStartBlock First block of range found, or 0 if error
2854 actualNumBlocks Number of blocks found, or 0 if error
1c79356b 2855
0b4e3aa0
A
2856Returns:
2857 noErr Found at least minBlocks contiguous
2858 dskFulErr No contiguous space found, or all less than minBlocks
2859_______________________________________________________________________
1c79356b
A
2860*/
2861
2862static OSErr BlockFindContiguous(
2863 ExtendedVCB *vcb,
2d21ac55
A
2864 u_int32_t startingBlock,
2865 u_int32_t endingBlock,
2866 u_int32_t minBlocks,
2867 u_int32_t maxBlocks,
55e303ae 2868 Boolean useMetaZone,
2d21ac55
A
2869 u_int32_t *actualStartBlock,
2870 u_int32_t *actualNumBlocks)
1c79356b
A
2871{
2872 OSErr err;
2d21ac55
A
2873 register u_int32_t currentBlock; // Block we're currently looking at.
2874 u_int32_t firstBlock; // First free block in current extent.
2875 u_int32_t stopBlock; // If we get to this block, stop searching for first free block.
2876 u_int32_t foundBlocks; // Number of contiguous free blocks in current extent.
2877 u_int32_t *buffer = NULL;
2878 register u_int32_t *currentWord;
2879 register u_int32_t bitMask;
2880 register u_int32_t wordsLeft;
2881 register u_int32_t tempWord;
b0d623f7 2882 uintptr_t blockRef;
2d21ac55 2883 u_int32_t wordsPerBlock;
6d2010ae
A
2884 u_int32_t updated_free_extent = 0;
2885
2886 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
2887 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);
1c79356b 2888
743b1565
A
2889 /*
2890 * When we're skipping the metadata zone and the start/end
2891 * range overlaps with the metadata zone then adjust the
2892 * start to be outside of the metadata zone. If the range
2893 * is entirely inside the metadata zone then we can deny the
2894 * request (dskFulErr).
2895 */
2896 if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
2897 if (startingBlock <= vcb->hfs_metazone_end) {
2898 if (endingBlock > (vcb->hfs_metazone_end + 2))
2899 startingBlock = vcb->hfs_metazone_end + 1;
2900 else
2901 goto DiskFull;
2902 }
4a249263
A
2903 }
2904
0b4e3aa0
A
2905 if ((endingBlock - startingBlock) < minBlocks)
2906 {
1c79356b
A
2907 // The set of blocks we're checking is smaller than the minimum number
2908 // of blocks, so we couldn't possibly find a good range.
0b4e3aa0 2909 goto DiskFull;
1c79356b 2910 }
0b4e3aa0
A
2911
2912 stopBlock = endingBlock - minBlocks + 1;
2913 currentBlock = startingBlock;
9bccf70c 2914 firstBlock = 0;
55e303ae
A
2915
2916 /*
2917 * Skip over metadata blocks.
2918 */
2919 if (!useMetaZone)
2920 currentBlock = NextBitmapBlock(vcb, currentBlock);
2921
1c79356b 2922 //
0b4e3aa0 2923 // Pre-read the first bitmap block.
1c79356b 2924 //
0b4e3aa0
A
2925 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
2926 if ( err != noErr ) goto ErrorExit;
1c79356b 2927
1c79356b 2928 //
0b4e3aa0 2929 // Figure out where currentBlock is within the buffer.
1c79356b 2930 //
0b4e3aa0 2931 wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
1c79356b 2932
55e303ae 2933 wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1); // Current index into buffer
0b4e3aa0
A
2934 currentWord = buffer + wordsLeft;
2935 wordsLeft = wordsPerBlock - wordsLeft;
2936
2937 do
1c79356b 2938 {
0b4e3aa0
A
2939 foundBlocks = 0;
2940
2941 //============================================================
2942 // Look for a free block, skipping over allocated blocks.
2943 //============================================================
2944
2945 //
2946 // Check an initial partial word (if any)
2947 //
2948 bitMask = currentBlock & kBitsWithinWordMask;
2949 if (bitMask)
2950 {
9bccf70c 2951 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
0b4e3aa0
A
2952 bitMask = kHighBitInWordMask >> bitMask;
2953 while (tempWord & bitMask)
1c79356b 2954 {
0b4e3aa0
A
2955 bitMask >>= 1;
2956 ++currentBlock;
1c79356b 2957 }
1c79356b 2958
0b4e3aa0
A
2959 // Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)?
2960 if (bitMask)
2961 goto FoundUnused;
1c79356b 2962
0b4e3aa0
A
2963 // Didn't find any unused bits, so we're done with this word.
2964 ++currentWord;
2965 --wordsLeft;
1c79356b 2966 }
1c79356b 2967
0b4e3aa0
A
2968 //
2969 // Check whole words
2970 //
2971 while (currentBlock < stopBlock)
2972 {
2973 // See if it's time to read another block.
2974 if (wordsLeft == 0)
1c79356b 2975 {
1c79356b 2976 buffer = NULL;
0b4e3aa0
A
2977 err = ReleaseBitmapBlock(vcb, blockRef, false);
2978 if (err != noErr) goto ErrorExit;
2979
55e303ae
A
2980 /*
2981 * Skip over metadata blocks.
2982 */
2983 if (!useMetaZone) {
2984 currentBlock = NextBitmapBlock(vcb, currentBlock);
4a249263
A
2985 if (currentBlock >= stopBlock) {
2986 goto LoopExit;
2987 }
55e303ae
A
2988 }
2989
0b4e3aa0
A
2990 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
2991 if ( err != noErr ) goto ErrorExit;
1c79356b 2992
0b4e3aa0
A
2993 currentWord = buffer;
2994 wordsLeft = wordsPerBlock;
1c79356b
A
2995 }
2996
0b4e3aa0 2997 // See if any of the bits are clear
9bccf70c 2998 if ((tempWord = SWAP_BE32(*currentWord)) + 1) // non-zero if any bits were clear
1c79356b 2999 {
0b4e3aa0
A
3000 // Figure out which bit is clear
3001 bitMask = kHighBitInWordMask;
3002 while (tempWord & bitMask)
3003 {
3004 bitMask >>= 1;
3005 ++currentBlock;
3006 }
3007
3008 break; // Found the free bit; break out to FoundUnused.
1c79356b 3009 }
1c79356b 3010
0b4e3aa0
A
3011 // Keep looking at the next word
3012 currentBlock += kBitsPerWord;
3013 ++currentWord;
3014 --wordsLeft;
3015 }
1c79356b 3016
0b4e3aa0
A
3017FoundUnused:
3018 // Make sure the unused bit is early enough to use
3019 if (currentBlock >= stopBlock)
1c79356b 3020 {
0b4e3aa0 3021 break;
1c79356b 3022 }
1c79356b 3023
0b4e3aa0
A
3024 // Remember the start of the extent
3025 firstBlock = currentBlock;
1c79356b 3026
0b4e3aa0
A
3027 //============================================================
3028 // Count the number of contiguous free blocks.
3029 //============================================================
1c79356b 3030
0b4e3aa0
A
3031 //
3032 // Check an initial partial word (if any)
3033 //
3034 bitMask = currentBlock & kBitsWithinWordMask;
3035 if (bitMask)
1c79356b 3036 {
9bccf70c 3037 tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once
0b4e3aa0
A
3038 bitMask = kHighBitInWordMask >> bitMask;
3039 while (bitMask && !(tempWord & bitMask))
1c79356b 3040 {
0b4e3aa0
A
3041 bitMask >>= 1;
3042 ++currentBlock;
1c79356b 3043 }
0b4e3aa0
A
3044
3045 // Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)?
3046 if (bitMask)
3047 goto FoundUsed;
3048
3049 // Didn't find any used bits, so we're done with this word.
3050 ++currentWord;
3051 --wordsLeft;
3052 }
3053
3054 //
3055 // Check whole words
3056 //
3057 while (currentBlock < endingBlock)
3058 {
3059 // See if it's time to read another block.
3060 if (wordsLeft == 0)
1c79356b 3061 {
0b4e3aa0
A
3062 buffer = NULL;
3063 err = ReleaseBitmapBlock(vcb, blockRef, false);
3064 if (err != noErr) goto ErrorExit;
3065
55e303ae
A
3066 /*
3067 * Skip over metadata blocks.
3068 */
3069 if (!useMetaZone) {
2d21ac55 3070 u_int32_t nextBlock;
55e303ae
A
3071
3072 nextBlock = NextBitmapBlock(vcb, currentBlock);
3073 if (nextBlock != currentBlock) {
4a249263 3074 goto LoopExit; /* allocation gap, so stop */
55e303ae
A
3075 }
3076 }
3077
0b4e3aa0
A
3078 err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
3079 if ( err != noErr ) goto ErrorExit;
3080
3081 currentWord = buffer;
3082 wordsLeft = wordsPerBlock;
1c79356b 3083 }
0b4e3aa0
A
3084
3085 // See if any of the bits are set
9bccf70c 3086 if ((tempWord = SWAP_BE32(*currentWord)) != 0)
1c79356b 3087 {
0b4e3aa0 3088 // Figure out which bit is set
1c79356b 3089 bitMask = kHighBitInWordMask;
0b4e3aa0 3090 while (!(tempWord & bitMask))
1c79356b 3091 {
0b4e3aa0
A
3092 bitMask >>= 1;
3093 ++currentBlock;
1c79356b 3094 }
0b4e3aa0
A
3095
3096 break; // Found the used bit; break out to FoundUsed.
1c79356b 3097 }
0b4e3aa0
A
3098
3099 // Keep looking at the next word
3100 currentBlock += kBitsPerWord;
3101 ++currentWord;
3102 --wordsLeft;
3103
3104 // If we found at least maxBlocks, we can quit early.
3105 if ((currentBlock - firstBlock) >= maxBlocks)
3106 break;
1c79356b 3107 }
1c79356b 3108
0b4e3aa0
A
3109FoundUsed:
3110 // Make sure we didn't run out of bitmap looking for a used block.
3111 // If so, pin to the end of the bitmap.
3112 if (currentBlock > endingBlock)
3113 currentBlock = endingBlock;
3114
3115 // Figure out how many contiguous free blocks there were.
3116 // Pin the answer to maxBlocks.
3117 foundBlocks = currentBlock - firstBlock;
3118 if (foundBlocks > maxBlocks)
3119 foundBlocks = maxBlocks;
3120 if (foundBlocks >= minBlocks)
3121 break; // Found what we needed!
3122
6d2010ae
A
3123 /* We did not find the total blocks were were looking for, but
3124 * lets add this free block run to our free extent cache list
3125 */
3126 updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
b0d623f7 3127
0b4e3aa0 3128 } while (currentBlock < stopBlock);
4a249263 3129LoopExit:
0b4e3aa0
A
3130
3131 // Return the outputs.
3132 if (foundBlocks < minBlocks)
3133 {
3134DiskFull:
3135 err = dskFulErr;
3136ErrorExit:
3137 *actualStartBlock = 0;
3138 *actualNumBlocks = 0;
3139 }
3140 else
3141 {
3142 err = noErr;
3143 *actualStartBlock = firstBlock;
3144 *actualNumBlocks = foundBlocks;
2d21ac55
A
3145 /*
3146 * Sanity check for overflow
3147 */
3148 if ((firstBlock + foundBlocks) > vcb->allocLimit) {
b0d623f7 3149 panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
2d21ac55
A
3150 vcb->vcbVN, startingBlock, endingBlock, currentBlock,
3151 firstBlock, stopBlock, minBlocks, foundBlocks);
3152 }
0b4e3aa0
A
3153 }
3154
6d2010ae 3155 if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
b0d623f7
A
3156 int i;
3157 u_int32_t min_start = vcb->totalBlocks;
3158
3159 // set the nextAllocation pointer to the smallest free block number
3160 // we've seen so on the next mount we won't rescan unnecessarily
6d2010ae 3161 lck_spin_lock(&vcb->vcbFreeExtLock);
b0d623f7
A
3162 for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
3163 if (vcb->vcbFreeExt[i].startBlock < min_start) {
3164 min_start = vcb->vcbFreeExt[i].startBlock;
3165 }
3166 }
6d2010ae 3167 lck_spin_unlock(&vcb->vcbFreeExtLock);
b0d623f7
A
3168 if (min_start != vcb->totalBlocks) {
3169 if (min_start < vcb->nextAllocation) {
3170 vcb->nextAllocation = min_start;
3171 }
3172 if (min_start < vcb->sparseAllocation) {
3173 vcb->sparseAllocation = min_start;
3174 }
3175 }
3176 }
3177
0b4e3aa0
A
3178 if (buffer)
3179 (void) ReleaseBitmapBlock(vcb, blockRef, false);
1c79356b 3180
6d2010ae
A
3181 if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
3182 KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
b0d623f7 3183
1c79356b
A
3184 return err;
3185}
3186
6d2010ae
A
3187
3188#if CONFIG_HFS_ALLOC_RBTREE
91447636 3189/*
6d2010ae
A
3190 * Wrapper function around hfs_isrbtree_allocated. This just takes the start offset,
3191 * and the number of blocks, and whether or not we should check if the blocks are
3192 * free or not. This function is designed to be used primarily with the debug #ifdef
3193 * enabled, so it results in a panic if anything unexpected occurs.
91447636 3194 *
6d2010ae 3195 * shouldBeFree will be nonzero if the caller expects the zone to be free.
91447636 3196 */
91447636 3197
6d2010ae
A
3198void check_rbtree_extents (struct hfsmount *hfsmp, u_int32_t startBlocks,
3199 u_int32_t numBlocks, int shouldBeFree) {
3200 int alloc;
3201 extent_node_t *node1 = NULL;
3202 u_int32_t off1 = 0;
3203 u_int32_t len1 = 0;
3204 alloc = hfs_isrbtree_allocated (hfsmp, startBlocks, numBlocks, &node1);
3205
3206 if (node1) {
3207 off1 = node1->offset;
3208 len1 = node1->length;
91447636
A
3209 }
3210
6d2010ae
A
3211 if (shouldBeFree) {
3212 /*
3213 * If the region should be free, then we expect to see extents in the tree
3214 * matching this start and length. Alloc != 0 means some portion of the extent
3215 * specified was allocated.
3216 */
3217 if (alloc != 0){
3218 panic ("HFS check_rbtree_extents: Node (%p) do not exist! "
3219 "node1 off (%d),len(%d),, start(%d) end(%d)\n",
3220 node1, off1, len1, startBlocks, numBlocks);
91447636 3221 }
6d2010ae
A
3222 }
3223 else {
3224 /*
3225 * Otherwise, this means that the region should be allocated, and if we find
3226 * an extent matching it, that's bad.
3227 */
3228 if (alloc == 0){
3229 panic ("HFS check_rbtree_extents: Node (%p) exists! "
3230 "node1 off (%d),len(%d), start(%d) end(%d)\n",
3231 node1, off1, len1, startBlocks, numBlocks);
3232 }
3233 }
3234}
3235#endif
3236
3237#if CONFIG_HFS_ALLOC_RBTREE
3238/*
3239 * Exhaustive validation search. This function iterates over all allocation blocks and
3240 * compares their status in the red-black tree vs. the allocation bitmap. If the two are out of sync
3241 * then it will panic. Bitmap lock must be held while this function is run.
3242 *
3243 * Because this function requires a red-black tree search to validate every allocation block, it is
3244 * very expensive and should ONLY be run in debug mode, and even then, infrequently.
3245 *
3246 * 'end' is non-inclusive, so it should represent the total number of blocks in the volume.
3247 *
3248 */
3249void
3250hfs_validate_rbtree (struct hfsmount *hfsmp, u_int32_t start, u_int32_t end){
3251
3252 u_int32_t current;
3253 extent_node_t* node1;
3254
3255 hfs_checktreelinks (hfsmp);
3256
3257 for (current = start; current < end; current++) {
3258 node1 = NULL;
3259 int rbtree = hfs_isrbtree_allocated(hfsmp, current, 1, &node1);
3260 int bitmap = hfs_isallocated(hfsmp, current, 1);
3261
3262 if (bitmap != rbtree){
3263 panic("HFS: Allocator mismatch @ block %d -- bitmap %d : rbtree %d\n",
3264 current, bitmap, rbtree);
3265 }
3266 }
3267}
3268
3269/*
3270 * Exhaustive Red-Black Tree Linked List verification routine.
3271 *
3272 * This function iterates through the red-black tree's nodes, and then verifies that the linked list
3273 * embedded within each of the nodes accurately points to the correct node as its "next" pointer.
3274 * The bitmap lock must be held while this function is run.
3275 */
3276
3277void
3278hfs_checktreelinks (struct hfsmount *hfsmp) {
3279 extent_tree_offset_t *tree = &hfsmp->offset_tree;
3280
3281 extent_node_t *current = NULL;
3282 extent_node_t *next = NULL;
3283 extent_node_t *treenext;
3284
3285 current = extent_tree_off_first (tree);
3286
3287 while (current) {
3288 next = current->offset_next;
3289 treenext = extent_tree_off_next (tree, current);
3290 if (next != treenext) {
3291 panic("hfs_checktreelinks: mismatch for node (%p), next: %p , treenext %p !\n", current, next, treenext);
3292 }
3293 current = treenext;
3294 }
3295}
3296
3297#endif
3298
3299
3300#if CONFIG_HFS_ALLOC_RBTREE
3301/*
3302 * Test to see if any free blocks exist at a given offset.
3303 * If there exists a node at the specified offset, it will return the appropriate
3304 * node.
3305 *
3306 * NULL indicates allocated blocks exist at that offset.
3307 *
3308 * Allocation file lock must be held.
3309 *
3310 * Returns:
3311 * 1 if blocks in the range are allocated.
3312 * 0 if all blocks in the range are free.
3313 */
3314
3315static int
3316hfs_isrbtree_allocated (struct hfsmount *hfsmp, u_int32_t startBlock,
3317 u_int32_t numBlocks, extent_node_t **ret_node) {
3318
3319 extent_node_t search_sentinel;
3320 extent_node_t *node = NULL;
3321 extent_node_t *nextnode = NULL;
3322
3323 /*
3324 * With only one tree, then we just have to validate that there are entries
3325 * in the R/B tree at the specified offset if it really is free.
3326 */
3327 search_sentinel.offset = startBlock;
3328 search_sentinel.length = numBlocks;
3329
3330 node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
3331 if (node) {
3332
3333 *ret_node = node;
3334 nextnode = extent_tree_off_next (&hfsmp->offset_tree, node);
3335 if (nextnode != node->offset_next) {
3336 panic ("hfs_rbtree_isallocated: Next pointers out of sync!\n");
3337 }
3338
3339 /*
3340 * Check to see if it is a superset of our target range. Because we started
3341 * with the offset or some offset prior to it, then we know the node's offset is
3342 * at least <= startBlock. So, if the end of the node is greater than the end of
3343 * our target range, then the whole range is free.
3344 */
3345
3346 if ((node->offset + node->length) >= (startBlock + numBlocks)) {
3347 if (node->offset > startBlock) {
3348 panic ("hfs_rbtree_isallocated: bad node ordering!");
3349 }
3350 return 0;
3351 }
3352 }
3353 /*
3354 * We got here if either our node search resulted in a node whose extent
3355 * was strictly before our target offset, or we couldnt' find a previous node
3356 * at all (the beginning of the volume). If the former, then we can infer that
3357 * at least one block in the target range is allocated since the next node's offset
3358 * must be greater than startBlock.
3359 *
3360 * Either way, this means that the target node is unavailable to allocate, so
3361 * just return 1;
3362 */
3363 return 1;
3364}
3365
3366
3367#endif
3368
3369/*
3370 * Count number of bits set in the given 32-bit unsigned number
3371 *
3372 * Returns:
3373 * Number of bits set
3374 */
3375static int num_bits_set(u_int32_t num)
3376{
3377 int count;
3378
3379 for (count = 0; num; count++) {
3380 num &= num - 1;
3381 }
3382
3383 return count;
3384}
3385
3386/*
3387 * For a given range of blocks, find the total number of blocks
3388 * allocated. If 'stop_on_first' is true, it stops as soon as it
3389 * encounters the first allocated block. This option is useful
3390 * to determine if any block is allocated or not.
3391 *
3392 * Inputs:
3393 * startingBlock First allocation block number of the range to be scanned.
3394 * numBlocks Total number of blocks that need to be scanned.
3395 * stop_on_first Stop the search after the first allocated block is found.
3396 *
3397 * Output:
3398 * allocCount Total number of allocation blocks allocated in the given range.
3399 *
3400 * On error, it is the number of allocated blocks found
3401 * before the function got an error.
3402 *
3403 * If 'stop_on_first' is set,
3404 * allocCount = 1 if any allocated block was found.
3405 * allocCount = 0 if no allocated block was found.
3406 *
3407 * Returns:
3408 * 0 on success, non-zero on failure.
3409 */
3410static int
3411hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock,
3412 u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
3413{
3414 u_int32_t *currentWord; // Pointer to current word within bitmap block
3415 u_int32_t wordsLeft; // Number of words left in this bitmap block
3416 u_int32_t bitMask; // Word with given bits already set (ready to test)
3417 u_int32_t firstBit; // Bit index within word of first bit to allocate
3418 u_int32_t numBits; // Number of bits in word to allocate
3419 u_int32_t *buffer = NULL;
3420 uintptr_t blockRef;
3421 u_int32_t bitsPerBlock;
3422 u_int32_t wordsPerBlock;
3423 u_int32_t blockCount = 0;
3424 int error;
3425
3426 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3427 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0);
3428
3429 /*
3430 * Pre-read the bitmap block containing the first word of allocation
3431 */
3432 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3433 if (error)
3434 goto JustReturn;
3435
3436 /*
3437 * Initialize currentWord, and wordsLeft.
3438 */
3439 {
3440 u_int32_t wordIndexInBlock;
3441
3442 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
3443 wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
3444
3445 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3446 currentWord = buffer + wordIndexInBlock;
3447 wordsLeft = wordsPerBlock - wordIndexInBlock;
3448 }
3449
3450 /*
3451 * First test any non word aligned bits.
3452 */
3453 firstBit = startingBlock % kBitsPerWord;
3454 if (firstBit != 0) {
3455 bitMask = kAllBitsSetInWord >> firstBit;
3456 numBits = kBitsPerWord - firstBit;
3457 if (numBits > numBlocks) {
3458 numBits = numBlocks;
3459 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
3460 }
3461 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3462 if (stop_on_first) {
3463 blockCount = 1;
3464 goto Exit;
3465 }
3466 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
3467 }
3468 numBlocks -= numBits;
3469 ++currentWord;
3470 --wordsLeft;
91447636
A
3471 }
3472
3473 /*
3474 * Test whole words (32 blocks) at a time.
3475 */
3476 while (numBlocks >= kBitsPerWord) {
3477 if (wordsLeft == 0) {
3478 /* Read in the next bitmap block. */
3479 startingBlock += bitsPerBlock;
3480
3481 buffer = NULL;
3482 error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3483 if (error) goto Exit;
3484
3485 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3486 if (error) goto Exit;
3487
3488 /* Readjust currentWord and wordsLeft. */
3489 currentWord = buffer;
3490 wordsLeft = wordsPerBlock;
3491 }
3492 if (*currentWord != 0) {
6d2010ae
A
3493 if (stop_on_first) {
3494 blockCount = 1;
3495 goto Exit;
3496 }
3497 blockCount += num_bits_set(*currentWord);
91447636
A
3498 }
3499 numBlocks -= kBitsPerWord;
3500 ++currentWord;
3501 --wordsLeft;
3502 }
3503
3504 /*
3505 * Test any remaining blocks.
3506 */
3507 if (numBlocks != 0) {
3508 bitMask = ~(kAllBitsSetInWord >> numBlocks);
3509 if (wordsLeft == 0) {
3510 /* Read in the next bitmap block */
3511 startingBlock += bitsPerBlock;
3512
3513 buffer = NULL;
3514 error = ReleaseBitmapBlock(hfsmp, blockRef, false);
3515 if (error) goto Exit;
3516
3517 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3518 if (error) goto Exit;
3519
3520 currentWord = buffer;
3521 wordsLeft = wordsPerBlock;
3522 }
3523 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
6d2010ae
A
3524 if (stop_on_first) {
3525 blockCount = 1;
3526 goto Exit;
3527 }
3528 blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
91447636
A
3529 }
3530 }
3531Exit:
3532 if (buffer) {
3533 (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
3534 }
6d2010ae
A
3535 if (allocCount) {
3536 *allocCount = blockCount;
3537 }
3538
3539JustReturn:
3540 if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
3541 KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0);
3542
3543 return (error);
3544}
3545
3546/*
3547 * Count total number of blocks that are allocated in the given
3548 * range from the bitmap. This is used to preflight total blocks
3549 * that need to be relocated during volume resize.
3550 *
3551 * The journal or allocation file lock must be held.
3552 *
3553 * Returns:
3554 * 0 on success, non-zero on failure.
3555 * On failure, allocCount is zero.
3556 */
3557int
3558hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
3559 u_int32_t numBlocks, u_int32_t *allocCount)
3560{
3561 return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
3562}
3563
3564/*
3565 * Test to see if any blocks in a range are allocated.
3566 *
3567 * Note: On error, this function returns 1, which means that
3568 * one or more blocks in the range are allocated. This function
3569 * is primarily used for volume resize and we do not want
3570 * to report to the caller that the blocks are free when we
3571 * were not able to deterministically find it out. So on error,
3572 * we always report that the blocks are allocated.
3573 *
3574 * The journal or allocation file lock must be held.
3575 *
3576 * Returns
3577 * 0 if all blocks in the range are free.
3578 * 1 if blocks in the range are allocated, or there was an error.
3579 */
3580int
3581hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
3582{
3583 int error;
3584 u_int32_t allocCount;
3585
3586 error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
3587 if (error) {
3588 /* On error, we always say that the blocks are allocated
3589 * so that volume resize does not return false success.
3590 */
3591 return 1;
3592 } else {
3593 /* The function was deterministically able to find out
3594 * if there was any block allocated or not. In that case,
3595 * the value in allocCount is good enough to be returned
3596 * back to the caller.
3597 */
3598 return allocCount;
3599 }
3600}
3601
3602/*
3603 * Check to see if the red-black tree is live. Allocation file lock must be held
3604 * shared or exclusive to call this function. Note that we may call this even if
3605 * HFS is built without activating the red-black tree code.
3606 */
3607__private_extern__
3608int
3609hfs_isrbtree_active(struct hfsmount *hfsmp){
3610
3611 //TODO: Update this function to deal with a truncate/resize coming in when the tree
3612 //isn't fully finished. maybe we need to check the flags for something other than ENABLED?
3613
3614#if CONFIG_HFS_ALLOC_RBTREE
3615 if (ALLOC_DEBUG) {
3616 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
3617 }
3618 if (hfsmp){
3619
3620 if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ENABLED) {
3621 return 1;
3622 }
3623 }
3624#else
3625 #pragma unused (hfsmp)
3626#endif
3627 /* If the RB Tree code is not enabled, then just always return 0 */
3628 return 0;
3629}
3630
3631#if CONFIG_HFS_ALLOC_RBTREE
3632/*
3633 * This function is basically the same as hfs_isallocated, except it's designed for
3634 * use with the red-black tree validation code. It assumes we're only checking whether
3635 * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
3636 * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
3637 *
3638 * This should not be called in general purpose scanning code.
3639 */
3640int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
3641
3642 u_int32_t *currentWord; // Pointer to current word within bitmap block
3643 u_int32_t bitMask; // Word with given bits already set (ready to test)
3644 u_int32_t firstBit; // Bit index within word of first bit to allocate
3645 u_int32_t numBits; // Number of bits in word to allocate
3646 u_int32_t bitsPerBlock;
3647 uintptr_t blockRef;
3648 u_int32_t wordsPerBlock;
3649 u_int32_t numBlocks = 1;
3650 u_int32_t *buffer = NULL;
3651
3652 int inuse = 0;
3653 int error;
3654
3655
3656 if (bp_buf) {
3657 /* just use passed-in buffer if avail. */
3658 buffer = bp_buf;
3659 }
3660 else {
3661 /*
3662 * Pre-read the bitmap block containing the first word of allocation
3663 */
3664 error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
3665 if (error)
3666 return (error);
3667 }
3668
3669 /*
3670 * Initialize currentWord, and wordsLeft.
3671 */
3672 u_int32_t wordIndexInBlock;
3673
3674 bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte;
3675 wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
3676
3677 wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
3678 currentWord = buffer + wordIndexInBlock;
3679
3680 /*
3681 * First test any non word aligned bits.
3682 */
3683 firstBit = startingBlock % kBitsPerWord;
3684 bitMask = kAllBitsSetInWord >> firstBit;
3685 numBits = kBitsPerWord - firstBit;
3686 if (numBits > numBlocks) {
3687 numBits = numBlocks;
3688 bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
3689 }
3690 if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
3691 inuse = 1;
3692 goto Exit;
3693 }
3694 numBlocks -= numBits;
3695 ++currentWord;
3696
3697Exit:
3698 if(bp_buf == NULL) {
3699 if (buffer) {
3700 (void)ReleaseBitmapBlock(hfsmp, blockRef, false);
3701 }
3702 }
91447636 3703 return (inuse);
6d2010ae
A
3704
3705
3706
3707}
3708
3709/*
3710 * This function scans the specified block and adds it to the pair of trees specified
3711 * in its arguments. We break this behavior out of GenerateTree so that an allocating
3712 * thread can invoke this if the tree does not have enough extents to satisfy
3713 * an allocation request.
3714 *
3715 * startbit - the allocation block represented by a bit in 'allocblock' where we need to
3716 * start our scan. For instance, we may need to start the normal allocation scan
3717 * in the middle of an existing allocation block.
3718 * endBit - the allocation block where we should end this search (inclusive).
3719 * bitToScan - output argument for this function to specify the next bit to scan.
3720 *
3721 * Returns:
3722 * 0 on success
3723 * nonzero on failure.
3724 */
3725
3726static int hfs_alloc_scan_block(struct hfsmount *hfsmp, u_int32_t startbit,
3727 u_int32_t endBit, u_int32_t *bitToScan) {
3728
3729 int error;
3730 u_int32_t curAllocBlock;
3731 struct buf *blockRef = NULL;
3732 u_int32_t *buffer = NULL;
3733 u_int32_t wordIndexInBlock;
3734 u_int32_t blockSize = (u_int32_t)hfsmp->vcbVBMIOSize;
3735 u_int32_t wordsPerBlock = blockSize / kBytesPerWord;
3736 u_int32_t offset = 0;
3737 u_int32_t size = 0;
3738
3739 /*
3740 * Read the appropriate block from the bitmap file. ReadBitmapBlock
3741 * figures out which actual on-disk block corresponds to the bit we're
3742 * looking at.
3743 */
3744 error = ReadBitmapBlock(hfsmp, startbit, &buffer, (uintptr_t*)&blockRef);
3745 if (error) {
3746 return error;
3747 }
3748
3749 /* curAllocBlock represents the logical block we're analyzing. */
3750 curAllocBlock = startbit;
3751
3752 /* Figure out which word curAllocBlock corresponds to in the block we read */
3753 wordIndexInBlock = (curAllocBlock / kBitsPerWord) % wordsPerBlock;
3754
3755 /* Scan a word at a time */
3756 while (wordIndexInBlock < wordsPerBlock) {
3757 u_int32_t currentWord = SWAP_BE32(buffer[wordIndexInBlock]);
3758 u_int32_t curBit;
3759
3760 /* modulate curBit because it may start in the middle of a word */
3761 for (curBit = curAllocBlock % kBitsPerWord; curBit < kBitsPerWord; curBit++) {
3762
3763 u_int32_t is_allocated = currentWord & (1 << (kBitsWithinWordMask - curBit));
3764 if (ALLOC_DEBUG) {
3765 u_int32_t res = hfs_isallocated_scan (hfsmp, curAllocBlock, buffer);
3766 if ( ((res) && (!is_allocated)) || ((!res) && (is_allocated))) {
3767 panic("hfs_alloc_scan: curAllocBit %u, curBit (%d), word (0x%x), is_allocated (0x%x) res(0x%x) \n",
3768 curAllocBlock, curBit, currentWord, is_allocated, res);
3769 }
3770 }
3771 /*
3772 * If curBit is not allocated, keep track of the start of the free range.
3773 * Increment a running tally on how many free blocks in a row we've seen.
3774 */
3775 if (!is_allocated) {
3776 size++;
3777 if (offset == 0) {
3778 offset = curAllocBlock;
3779 }
3780 }
3781 else {
3782 /*
3783 * If we hit an allocated block, insert the extent that tracked the range
3784 * we saw, and reset our tally counter.
3785 */
3786 if (size != 0) {
3787 extent_tree_free_space(&hfsmp->offset_tree, size, offset);
3788 size = 0;
3789 offset = 0;
3790 }
3791 }
3792 curAllocBlock++;
3793 /*
3794 * Exit early if the next bit we'd analyze would take us beyond the end of the
3795 * range that we're supposed to scan.
3796 */
3797 if (curAllocBlock >= endBit) {
3798 goto DoneScanning;
3799 }
3800 }
3801 wordIndexInBlock++;
3802 }
3803DoneScanning:
3804
3805 /* We may have been tracking a range of free blocks that hasn't been inserted yet. */
3806 if (size != 0) {
3807 extent_tree_free_space(&hfsmp->offset_tree, size, offset);
3808 }
3809 /*
3810 * curAllocBlock represents the next block we need to scan while we're in this
3811 * function.
3812 */
3813 *bitToScan = curAllocBlock;
3814
3815 ReleaseRBScanBitmapBlock(blockRef);
3816
3817 return 0;
3818}
3819
3820/*
3821 * Extern function that is called from mount and upgrade mount routines
3822 * that enable us to initialize the tree.
3823 */
3824
3825__private_extern__
3826u_int32_t InitTree(struct hfsmount *hfsmp) {
3827 extent_tree_init (&(hfsmp->offset_tree));
3828 return 0;
3829}
3830
3831
3832/*
3833 * This function builds the trees specified in its arguments. It uses
3834 * buf_meta_breads to scan through the bitmap and re-build the tree state.
3835 * It is very important to use buf_meta_bread because we need to ensure that we
3836 * read the most current version of the blocks that we're scanning. If we used
3837 * cluster_io, then journaled transactions could still be sitting in RAM since they are
3838 * written to disk in the proper location asynchronously.
3839 *
3840 * Because this could be still running when mount has finished, we need to check
3841 * after every allocation block that we're working on if an unmount or some other
3842 * operation that would cause us to teardown has come in. (think downgrade mount).
3843 * If an unmount has come in, then abort whatever we're doing and return -1
3844 * to indicate we hit an error. If we don't do this, we'd hold up unmount for
3845 * a very long time.
3846 *
3847 * This function assumes that the bitmap lock is acquired exclusively before being
3848 * called. It will drop the lock and then re-acquire it during operation, but
3849 * will always return with the lock held.
3850 */
3851__private_extern__
3852u_int32_t GenerateTree(struct hfsmount *hfsmp, u_int32_t endBlock, int *flags, int initialscan) {
3853
3854 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
3855
3856 u_int32_t *cur_block_eof;
3857 int error = 0;
3858
3859 int USE_FINE_GRAINED_LOCKING = 0;
3860
3861 /* Initialize the block counter while we hold the bitmap lock */
3862 cur_block_eof = &hfsmp->offset_block_end;
3863
3864 /*
3865 * This loop advances over all allocation bitmap blocks of the current region
3866 * to scan them and add the results into the red-black tree. We use the mount point
3867 * variable offset_block_end as our loop counter. This gives us flexibility
3868 * because we can release the allocation bitmap lock and allow a thread that wants
3869 * to make an allocation to grab the lock and do some scanning on our behalf while we're
3870 * waiting to re-acquire the lock. Then, the allocating thread will only do as much bitmap
3871 * scanning as needed to fulfill its allocation.
3872 *
3873 * If the other thread does IO for us, then it will update the offset_block_end
3874 * variable as well, since it will use the same hfs_alloc_scan_block function to do its bit
3875 * scanning. So when we re-grab the lock, our current EOF/loop will immediately skip us to the next
3876 * block that needs scanning.
3877 */
3878
3879 while (*cur_block_eof < endBlock) {
3880
3881 /*
3882 * If the filesystem is being resized before the bitmap has been fully scanned, we'll
3883 * update our endBlock to match the current allocation limit in the hfsmp struct.
3884 * The allocLimit field would only be be updated while holding the bitmap lock, so we won't
3885 * be executing this code at the same time that the resize is going on.
3886 */
3887 if ((initialscan) && (endBlock != hfsmp->allocLimit)) {
3888
3889 /* If we're past the new/modified allocLimit, then just stop immediately.*/
3890 if (*cur_block_eof >= hfsmp->allocLimit ) {
3891 break;
3892 }
3893 endBlock = hfsmp->allocLimit;
3894 }
3895
3896 /*
3897 * TODO: fix unmount stuff!
3898 * See rdar://7391404
3899 *
3900 * Once the RB allocator is checked in, we'll want to augment it to not hold the
3901 * allocation bitmap lock for the entire duration of the tree scan. For a first check-in
3902 * it's ok to do that but we can't leave it like that forever.
3903 *
3904 * The gist of the new algorithm will work as follows:
3905 * if an unmount is in flight and has been detected:
3906 * abort tree-build.
3907 * unset tree-in-progress bit.
3908 * wakeup unmount thread
3909 * unlock allocation bitmap lock, fail out.
3910 *
3911 * The corresponding code in the unmount side should already be in place.
3912 */
3913
3914 error = hfs_alloc_scan_block (hfsmp, *cur_block_eof, endBlock, cur_block_eof);
3915
3916 //TODO: Fix this below!
3917 if (USE_FINE_GRAINED_LOCKING){
3918 hfs_systemfile_unlock(hfsmp, *flags);
3919 *flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3920 }
3921 //TODO: Infer that if *flags == 0, we don't actually need to lock/unlock.
3922 }
3923
3924 return error;
91447636
A
3925}
3926
6d2010ae
A
3927/*
3928 * This function destroys the specified rb-trees associated with the mount point.
3929 */
3930__private_extern__
3931void DestroyTrees(struct hfsmount *hfsmp) {
3932
3933 if (ALLOC_DEBUG) {
3934 REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
3935 printf("DestroyTrees: Validating red/black tree for vol %s\n", (char*) hfsmp->vcbVN);
3936 hfs_validate_rbtree (hfsmp, 0, hfsmp->offset_block_end );
3937 }
3938
3939 /*
3940 * extent_tree_destroy will start with the first entry in the tree (by offset), then
3941 * iterate through the tree quickly using its embedded linked list. This results in tree
3942 * destruction in O(n) time.
3943 */
3944
3945 if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ENABLED) {
3946 extent_tree_destroy(&hfsmp->offset_tree);
3947
3948 /* Mark Trees as disabled */
3949 hfsmp->extent_tree_flags &= ~HFS_ALLOC_RB_ENABLED;
3950 }
3951
3952 return;
3953}
3954
3955#endif
3956
3957/*
3958 * This function resets all of the data structures relevant to the
3959 * free extent cache stored in the hfsmount struct.
3960 *
3961 * If we are using the red-black tree code then we need to account for the fact that
3962 * we may encounter situations where we need to jettison the tree. If that is the
3963 * case, then we fail-over to the bitmap scanning logic, but we need to ensure that
3964 * the free ext cache is zeroed before we start using it.
d1ecb069 3965 *
6d2010ae
A
3966 * We also reset and disable the cache when allocLimit is updated... which
3967 * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()).
3968 * It is independent of the type of allocator being used currently.
d1ecb069 3969 */
6d2010ae 3970void ResetVCBFreeExtCache(struct hfsmount *hfsmp)
d1ecb069 3971{
6d2010ae
A
3972 int bytes;
3973 void *freeExt;
1c79356b 3974
6d2010ae
A
3975 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
3976 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0);
3977
3978 lck_spin_lock(&hfsmp->vcbFreeExtLock);
3979
3980 /* reset Free Extent Count */
3981 hfsmp->vcbFreeExtCnt = 0;
3982
3983 /* reset the actual array */
3984 bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor);
3985 freeExt = (void*)(hfsmp->vcbFreeExt);
3986
3987 bzero (freeExt, bytes);
3988
3989 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
3990
3991 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
3992 KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0);
3993
3994 return;
3995}
3996
3997/*
3998 * This function is used to inform the allocator if we have to effectively shrink
3999 * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs.
4000 *
4001 * The bitmap lock must be held when calling this function. This function also modifies the
4002 * allocLimit field in the hfs mount point structure in the general case.
4003 *
4004 * In the shrinking case, we'll have to remove all free extents from the red-black
4005 * tree past the specified offset new_end_block. In the growth case, we'll have to force
4006 * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
4007 *
4008 * new_end_block represents the total number of blocks available for allocation in the resized
4009 * filesystem. Block #new_end_block should not be allocatable in the resized filesystem since it
4010 * will be out of the (0, n-1) range that are indexable in the bitmap.
4011 *
4012 * Returns 0 on success
4013 * errno on failure
4014 */
4015__private_extern__
4016u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
4017
4018 /*
4019 * Update allocLimit to the argument specified, but don't do anything else
4020 * if the red/black tree is not enabled.
4021 */
4022 hfsmp->allocLimit = new_end_block;
4023
4024 /* Invalidate the free extent cache completely so that
4025 * it does not have any extents beyond end of current
4026 * volume.
4027 */
4028 ResetVCBFreeExtCache(hfsmp);
4029
4030#if CONFIG_HFS_ALLOC_RBTREE
4031 /* Shrinking the existing filesystem */
4032 if ((new_end_block < hfsmp->offset_block_end) &&
4033 (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE)) {
4034 extent_node_t search_sentinel;
4035 extent_node_t *node = NULL;
4036 /* Remover points to the current item to free/remove from the tree */
4037 extent_node_t *remover = NULL;
4038
4039 /* Begin search at the specified offset */
4040 memset (&search_sentinel, 0, sizeof(extent_node_t));
4041 search_sentinel.offset = new_end_block;
4042
4043 /*
4044 * Find the first available extent that satifies the allocation by searching
4045 * from the starting point or 1 earlier. We may need to split apart an existing node
4046 * if it straddles the new alloc limit.
4047 */
4048 node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
4049 if (node) {
4050 /* If it's an exact match, then just remove them all from this point forward */
4051 if (node->offset == new_end_block) {
4052 /*
4053 * Find the previous entry and update its next pointer to NULL
4054 * since this entry is biting the dust. Update remover to node.
4055 */
4056 extent_node_t *prev = NULL;
4057 prev = extent_tree_off_prev (&hfsmp->offset_tree, node);
4058 if (prev) {
4059 prev->offset_next = NULL;
4060 }
4061 remover = node;
4062 }
4063 else {
4064 /* See if we need to split this node */
4065 if ((node->offset + node->length) > new_end_block) {
4066 /*
4067 * Update node to reflect its new size up until new_end_block.
4068 */
4069 remover = node->offset_next;
4070 node->length = new_end_block - node->offset;
4071 /* node is becoming the last free extent in the volume. */
4072 node->offset_next = NULL;
4073 }
4074 else {
4075 if (node->offset_next == NULL) {
4076 /*
4077 * 'node' points to the last free extent in the volume.
4078 * Coincidentally, it is also before the new cut-off point at which
4079 * we will stop representing bitmap values in the tree. Just bail out now.
4080 */
4081 return 0;
4082 }
4083 /*
4084 * Otherwise, point our temp variable 'remover' to the node where
4085 * we'll need to start yanking things out of the tree, and make 'node'
4086 * the last element in the tree in the linked list.
4087 */
4088 remover = node->offset_next;
4089 if (remover->offset <= new_end_block) {
4090 panic ("UpdateAllocLimit: Invalid RBTree node next ptr!");
4091 }
4092 node->offset_next = NULL;
4093 }
4094 }
4095
4096 /*
4097 * Remover is our "temp" pointer that points to the current node to remove from
4098 * the offset tree. We'll simply iterate through the tree linked list, removing the current
4099 * element from the tree, freeing them as we come across them.
4100 */
4101 while (remover) {
4102 extent_node_t *next = remover->offset_next;
4103 extent_tree_remove_node (&hfsmp->offset_tree, remover);
4104 free_node (remover);
4105 remover = next;
4106 }
4107
4108 if (ALLOC_DEBUG) {
4109 printf ("UpdateAllocLimit: Validating rbtree after truncation\n");
4110 hfs_validate_rbtree (hfsmp, 0, new_end_block-1);
4111 }
4112
4113 /*
4114 * Don't forget to shrink offset_block_end after a successful truncation
4115 * new_end_block should represent the number of blocks available on the
4116 * truncated volume.
4117 */
4118
4119 hfsmp->offset_block_end = new_end_block;
4120
4121 return 0;
4122 }
4123 else {
4124 if (ALLOC_DEBUG) {
4125 panic ("UpdateAllocLimit: no prev!");
4126 }
4127 return ENOSPC;
4128 }
d1ecb069 4129 }
6d2010ae
A
4130 /* Growing the existing filesystem */
4131 else if ((new_end_block > hfsmp->offset_block_end) &&
4132 (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE)) {
4133 int flags = 0;
4134 int retval = 0;
4135
4136 if (ALLOC_DEBUG) {
4137 printf ("UpdateAllocLimit: Validating rbtree prior to growth\n");
4138 hfs_validate_rbtree (hfsmp, 0, hfsmp->offset_block_end);
4139 }
4140
4141
4142 retval = GenerateTree (hfsmp, new_end_block, &flags, 0);
4143
4144 /*
4145 * Don't forget to update offset_block_end after a successful tree extension.
4146 */
4147 if (retval == 0) {
4148
4149 if (ALLOC_DEBUG) {
4150 printf ("UpdateAllocLimit: Validating rbtree after growth\n");
4151 hfs_validate_rbtree (hfsmp, 0, new_end_block);
4152 }
4153
4154 hfsmp->offset_block_end = new_end_block;
4155 }
4156
4157 return retval;
4158 }
4159 /* Otherwise, do nothing. fall through to the code below. */
4160 printf ("error : off_block_end: %d, alloclimit: %d, new_end_block: %d\n",
4161 hfsmp->offset_block_end,hfsmp->allocLimit, new_end_block);
4162#endif
4163
4164 return 0;
4165
4166}
4167
4168
4169/*
4170 * Remove an entry from free extent cache after it has been allocated.
4171 *
4172 * This function does not split extents to remove them from the allocated list.
4173 *
4174 * Inputs:
4175 * hfsmp - mount point structure
4176 * startBlock - starting block of the extent to be removed.
4177 * blockCount - number of blocks of the extent to be removed.
4178 */
4179static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
4180{
4181 int i, j;
4182 int extentsRemoved = 0;
4183 u_int32_t start, end;
4184
4185#if CONFIG_HFS_ALLOC_RBTREE
4186 /* If red-black tree is enabled, no free extent cache is necessary */
4187 if (hfs_isrbtree_active(hfsmp) == true) {
4188 return;
4189 }
4190#endif
4191
4192 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4193 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
4194
4195 lck_spin_lock(&hfsmp->vcbFreeExtLock);
4196
4197 for (i = 0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
4198 start = hfsmp->vcbFreeExt[i].startBlock;
4199 end = start + hfsmp->vcbFreeExt[i].blockCount;
4200
4201 /* If the extent to remove from free extent list starts within
4202 * this free extent, or, if it starts before this free extent
4203 * but ends in this free extent, remove it by shifting all other
4204 * extents.
4205 */
4206 if (((startBlock >= start) && (startBlock < end)) ||
4207 ((startBlock < start) && (startBlock + blockCount) > start)) {
4208 for (j = i; j < (int)hfsmp->vcbFreeExtCnt - 1; j++) {
4209 hfsmp->vcbFreeExt[j] = hfsmp->vcbFreeExt[j+1];
4210 }
4211 hfsmp->vcbFreeExtCnt--;
4212 /* Decrement the index so that we check the extent
4213 * that just got shifted to the current index.
4214 */
4215 i--;
4216 extentsRemoved++;
4217 }
4218 /* Continue looping as we might have to invalidate multiple extents,
4219 * probably not possible in normal case, but does not hurt.
4220 */
4221 }
4222
4223 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4224
4225 sanity_check_free_ext(hfsmp, 0);
4226
4227 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4228 KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);
d1ecb069
A
4229
4230 return;
4231}
4232
6d2010ae
A
4233/*
4234 * Add an entry to free extent cache after it has been deallocated.
d1ecb069 4235 *
6d2010ae
A
4236 * If the extent provided has blocks beyond current allocLimit, it
4237 * is clipped to allocLimit. This function does not merge contiguous
4238 * extents, if they already exist in the list.
d1ecb069 4239 *
6d2010ae
A
4240 * Inputs:
4241 * hfsmp - mount point structure
4242 * startBlock - starting block of the extent to be removed.
4243 * blockCount - number of blocks of the extent to be removed.
4244 *
4245 * Returns:
4246 * true - if the extent was added successfully to the list
4247 * false - if the extent was no added to the list, maybe because
4248 * the extent was beyond allocLimit, or is not best
4249 * candidate to be put in the cache.
d1ecb069 4250 */
6d2010ae 4251static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
d1ecb069 4252{
6d2010ae
A
4253 Boolean retval = false;
4254 u_int32_t start, end;
4255 int i;
4256
4257 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4258 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
4259
4260 /*
4261 * If using the red-black tree allocator, then there's no need to special case
4262 * for the sparse device case. We'll simply add the region we've recently freed
4263 * to the red-black tree, where it will get sorted by offset and length. The only special
4264 * casing will need to be done on the allocation side, where we may favor free extents
4265 * based on offset even if it will cause fragmentation. This may be true, for example, if
4266 * we are trying to reduce the number of bandfiles created in a sparse bundle disk image.
4267 */
4268#if CONFIG_HFS_ALLOC_RBTREE
4269 if (hfs_isrbtree_active(hfsmp) == true) {
4270 goto out_not_locked;
4271 }
4272#endif
4273
4274 /* No need to add extent that is beyond current allocLimit */
4275 if (startBlock >= hfsmp->allocLimit) {
4276 goto out_not_locked;
4277 }
4278
4279 /* If end of the free extent is beyond current allocLimit, clip the extent */
4280 if ((startBlock + blockCount) > hfsmp->allocLimit) {
4281 blockCount = hfsmp->allocLimit - startBlock;
4282 }
4283
4284 lck_spin_lock(&hfsmp->vcbFreeExtLock);
d1ecb069 4285
6d2010ae
A
4286 /* If the free extent cache is full and the new extent fails to
4287 * compare with the last extent, skip adding it to the list.
4288 */
4289 if (hfsmp->vcbFreeExtCnt == kMaxFreeExtents) {
4290 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4291 /* For sparse disks, free extent cache list is sorted by start block, lowest first */
4292 if (startBlock > hfsmp->vcbFreeExt[kMaxFreeExtents-1].startBlock) {
4293 goto out;
4294 }
4295 } else {
4296 /* For normal mounts, free extent cache list is sorted by total blocks, highest first */
4297 if (blockCount <= hfsmp->vcbFreeExt[kMaxFreeExtents-1].blockCount) {
4298 goto out;
4299 }
4300 }
d1ecb069 4301 }
6d2010ae
A
4302
4303 /* Check if the current extent overlaps with any of the existing
4304 * extents. If yes, just skip adding it to the list. We have
4305 * to do this check before shifting the extent records.
4306 */
4307 for (i = 0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
4308
4309 start = hfsmp->vcbFreeExt[i].startBlock;
4310 end = start + hfsmp->vcbFreeExt[i].blockCount;
4311
4312 if (((startBlock >= start) && (startBlock < end)) ||
4313 ((startBlock < start) && (startBlock + blockCount) > start)) {
4314 goto out;
4315 }
4316 }
4317
4318 /* Scan the free extent cache array from tail to head till
4319 * we find the entry after which our new entry should be
4320 * inserted. After we break out of this loop, the new entry
4321 * will be inserted at 'i+1'.
4322 */
4323 for (i = (int)hfsmp->vcbFreeExtCnt-1; i >= 0; i--) {
4324 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4325 /* For sparse devices, find entry with smaller start block than ours */
4326 if (hfsmp->vcbFreeExt[i].startBlock < startBlock) {
4327 break;
4328 }
4329 } else {
4330 /* For normal devices, find entry with greater block count than ours */
4331 if (hfsmp->vcbFreeExt[i].blockCount >= blockCount) {
4332 break;
4333 }
4334 }
4335
4336 /* If this is not the right spot to insert, and this is
4337 * not the last entry in the array, just shift it and
4338 * continue check another one.
4339 */
4340 if ((i+1) < kMaxFreeExtents) {
4341 hfsmp->vcbFreeExt[i+1] = hfsmp->vcbFreeExt[i];
4342 }
4343 }
4344 /* 'i' points to one index offset before which the new extent should be inserted */
4345 hfsmp->vcbFreeExt[i+1].startBlock = startBlock;
4346 hfsmp->vcbFreeExt[i+1].blockCount = blockCount;
4347 if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents) {
4348 hfsmp->vcbFreeExtCnt++;
4349 }
4350 retval = true;
4351
4352out:
4353 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4354out_not_locked:
4355 sanity_check_free_ext(hfsmp, 0);
4356
4357 if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
4358 KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);
4359
d1ecb069
A
4360 return retval;
4361}
6d2010ae
A
4362
4363/* Debug function to check if the free extent cache is good or not */
4364static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
4365{
4366 u_int32_t i, j;
4367
4368 /* Do not do anything if debug is not on, or if we're using the red-black tree */
4369 if ((ALLOC_DEBUG == 0) || (hfs_isrbtree_active(hfsmp) == true)) {
4370 return;
4371 }
4372
4373 lck_spin_lock(&hfsmp->vcbFreeExtLock);
4374
4375 /*
4376 * Iterate the Free extent cache and ensure no entries are bogus or refer to
4377 * allocated blocks.
4378 */
4379 for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
4380 u_int32_t start, nblocks;
4381
4382 start = hfsmp->vcbFreeExt[i].startBlock;
4383 nblocks = hfsmp->vcbFreeExt[i].blockCount;
4384
4385 //printf ("hfs: %p: slot:%d (%u,%u)\n", hfsmp, i, start, nblocks);
4386
4387 /* Check if any of the blocks in free extent cache are allocated.
4388 * This should not be enabled always because it might take
4389 * very long for large extents that get added to the list.
4390 *
4391 * We have to drop vcbFreeExtLock while we call hfs_isallocated
4392 * because it is going to do I/O. Note that the free extent
4393 * cache could change. That's a risk we take when using this
4394 * debugging code. (Another alternative would be to try to
4395 * detect when the free extent cache changed, and perhaps
4396 * restart if the list changed while we dropped the lock.)
4397 */
4398 if (check_allocated) {
4399 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4400 if (hfs_isallocated(hfsmp, start, nblocks)) {
4401 panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
4402 hfsmp, i, start, nblocks);
4403 }
4404 lck_spin_lock(&hfsmp->vcbFreeExtLock);
4405 }
4406
4407 /* Check if any part of the extent is beyond allocLimit */
4408 if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
4409 panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
4410 hfsmp, i, start, nblocks, hfsmp->allocLimit);
4411 }
4412
4413 /* Check if there are any duplicate start blocks */
4414 for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
4415 if (start == hfsmp->vcbFreeExt[j].startBlock) {
4416 panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n",
4417 hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock,
4418 hfsmp->vcbFreeExt[j].blockCount);
4419 }
4420 }
4421
4422 /* Check if the entries are out of order */
4423 if ((i+1) != hfsmp->vcbFreeExtCnt) {
4424 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4425 /* sparse devices are sorted by starting block number (ascending) */
4426 if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) {
4427 panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n",
4428 hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
4429 hfsmp->vcbFreeExt[i+1].blockCount);
4430 }
4431 } else {
4432 /* normally sorted by block count (descending) */
4433 if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
4434 panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n",
4435 hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock,
4436 hfsmp->vcbFreeExt[i+1].blockCount);
4437 }
4438 }
4439 }
4440 }
4441 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
4442}