]> git.saurik.com Git - apple/xnu.git/blame - bsd/hfs/hfs_btreeio.c
xnu-3248.60.10.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_btreeio.c
CommitLineData
1c79356b 1/*
13f56ec4 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
1c79356b
A
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/buf.h>
2d21ac55 32#include <sys/buf_internal.h>
9bccf70c 33#include <sys/kernel.h>
91447636 34#include <sys/malloc.h>
1c79356b
A
35#include <sys/mount.h>
36#include <sys/vnode.h>
37
38
39#include "hfs.h"
9bccf70c 40#include "hfs_cnode.h"
1c79356b
A
41#include "hfs_dbg.h"
42#include "hfs_endian.h"
2d21ac55 43#include "hfs_btreeio.h"
1c79356b
A
44
45#include "hfscommon/headers/FileMgrInternal.h"
46#include "hfscommon/headers/BTreesPrivate.h"
47
48#define FORCESYNCBTREEWRITES 0
49
2d21ac55
A
50/* From bsd/vfs/vfs_bio.c */
51extern int bdwrite_internal(struct buf *, int);
1c79356b 52
39236c6e 53static int ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount);
2d21ac55 54static int btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp);
1c79356b 55
6d2010ae 56void btree_swap_node(struct buf *bp, __unused void *arg);
1c79356b 57
13f56ec4
A
58/*
59 * Return btree node size for given vnode.
60 *
61 * Returns:
62 * For btree vnode, returns btree node size.
63 * For non-btree vnodes, returns 0.
64 */
65u_int16_t get_btree_nodesize(struct vnode *vp)
66{
67 BTreeControlBlockPtr btree;
68 u_int16_t node_size = 0;
69
70 if (vnode_issystem(vp)) {
71 btree = (BTreeControlBlockPtr) VTOF(vp)->fcbBTCBPtr;
72 if (btree) {
73 node_size = btree->nodeSize;
74 }
75 }
76
77 return node_size;
78}
79
2d21ac55 80OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, __unused ItemCount minBlockCount)
1c79356b
A
81{
82 BTreeControlBlockPtr bTreePtr;
83
84 DBG_ASSERT(vp != NULL);
1c79356b
A
85 DBG_ASSERT(blockSize >= kMinNodeSize);
86 if (blockSize > MAXBSIZE )
87 return (fsBTBadNodeSize);
88
9bccf70c 89 bTreePtr = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr;
1c79356b
A
90 bTreePtr->nodeSize = blockSize;
91
92 return (E_NONE);
93}
94
95
2d21ac55 96OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions options, BlockDescriptor *block)
1c79356b
A
97{
98 OSStatus retval = E_NONE;
99 struct buf *bp = NULL;
935ed37a
A
100 u_int8_t allow_empty_node;
101
102 /* If the btree block is being read using hint, it is
103 * fine for the swap code to find zeroed out nodes.
104 */
105 if (options & kGetBlockHint) {
106 allow_empty_node = true;
107 } else {
108 allow_empty_node = false;
109 }
1c79356b 110
91447636
A
111 if (options & kGetEmptyBlock) {
112 daddr64_t blkno;
113 off_t offset;
1c79356b 114
91447636
A
115 offset = (daddr64_t)blockNum * (daddr64_t)block->blockSize;
116 bp = buf_getblk(vp, (daddr64_t)blockNum, block->blockSize, 0, 0, BLK_META);
117 if (bp &&
118 VNOP_BLOCKMAP(vp, offset, block->blockSize, &blkno, NULL, NULL, 0, NULL) == 0) {
119 buf_setblkno(bp, blkno);
120 }
121 } else {
122 retval = buf_meta_bread(vp, (daddr64_t)blockNum, block->blockSize, NOCRED, &bp);
123 }
1c79356b
A
124 if (bp == NULL)
125 retval = -1; //XXX need better error
126
127 if (retval == E_NONE) {
128 block->blockHeader = bp;
91447636 129 block->buffer = (char *)buf_dataptr(bp);
3a60a9f5 130 block->blockNum = buf_lblkno(bp);
91447636 131 block->blockReadFromDisk = (buf_fromcache(bp) == 0); /* not found in cache ==> came from disk */
1c79356b 132
b4c24cb9
A
133 // XXXdbg
134 block->isModified = 0;
135
39236c6e
A
136 /* Check and endian swap B-Tree node (only if it's a valid block) */
137 if (!(options & kGetEmptyBlock)) {
138
139 /* This happens when we first open the b-tree, we might not have all the node data on hand */
140 if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) &&
141 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) &&
142 (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) {
1c79356b 143
3a60a9f5 144 /*
39236c6e
A
145 * Don't swap the node descriptor, record offsets, or other records.
146 * This record will be invalidated and re-read with the correct node
147 * size once the B-tree control block is set up with the node size
148 * from the header record.
3a60a9f5 149 */
39236c6e
A
150 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly, allow_empty_node);
151
152 } else {
153 /*
154 * In this case, we have enough data in-hand to do basic validation
155 * on the B-Tree node.
156 */
157 if (block->blockReadFromDisk) {
158 /*
159 * The node was just read from disk, so always swap/check it.
160 * This is necessary on big endian since the test below won't trigger.
161 */
162 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node);
163 }
164 else {
165 /*
166 * Block wasn't read from disk; it was found in the cache.
167 */
168 if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) {
169 /*
170 * The node was left in the cache in non-native order, so swap it.
171 * This only happens on little endian, after the node is written
172 * back to disk.
173 */
174 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node);
175 }
176 else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x000e) {
177 /*
178 * The node was in-cache in native-endianness. We don't need to do
179 * anything here, because the node is ready to use. Set retval == 0.
180 */
181 retval = 0;
182 }
183 /*
184 * If the node doesn't have hex 14 (0xe) in the last two bytes of the buffer,
185 * it doesn't necessarily mean that this is a bad node. Zeroed nodes that are
186 * marked as unused in the b-tree map node would be OK and not have valid content.
187 */
188 }
189 }
190
191 /*
192 * If we got an error, then the node is only partially swapped.
193 * We mark the buffer invalid so that the next attempt to get the
194 * node will read it and attempt to swap again, and will notice
195 * the error again. If we didn't do this, the next attempt to get
196 * the node might use the partially swapped node as-is.
197 */
198 if (retval)
3a60a9f5 199 buf_markinvalid(bp);
39236c6e
A
200 }
201 }
3a60a9f5
A
202
203 if (retval) {
1c79356b 204 if (bp)
3a60a9f5 205 buf_brelse(bp);
1c79356b
A
206 block->blockHeader = NULL;
207 block->buffer = NULL;
208 }
209
210 return (retval);
211}
212
213
b4c24cb9
A
214void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr)
215{
216 struct hfsmount *hfsmp = VTOHFS(vp);
217 struct buf *bp = NULL;
218
219 if (hfsmp->jnl == NULL) {
220 return;
221 }
222
223 bp = (struct buf *) blockPtr->blockHeader;
224 if (bp == NULL) {
b0d623f7 225 panic("hfs: ModifyBlockStart: null bp for blockdescptr %p?!?\n", blockPtr);
b4c24cb9
A
226 return;
227 }
228
229 journal_modify_block_start(hfsmp->jnl, bp);
230 blockPtr->isModified = 1;
231}
232
6d2010ae 233void
2d21ac55 234btree_swap_node(struct buf *bp, __unused void *arg)
55e303ae 235{
2d21ac55 236 // struct hfsmount *hfsmp = (struct hfsmount *)arg;
3a60a9f5 237 int retval;
91447636 238 struct vnode *vp = buf_vnode(bp);
55e303ae
A
239 BlockDescriptor block;
240
241 /* Prepare the block pointer */
242 block.blockHeader = bp;
91447636 243 block.buffer = (char *)buf_dataptr(bp);
3a60a9f5 244 block.blockNum = buf_lblkno(bp);
55e303ae 245 /* not found in cache ==> came from disk */
91447636
A
246 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
247 block.blockSize = buf_count(bp);
55e303ae 248
935ed37a
A
249 /* Swap the data now that this node is ready to go to disk.
250 * We allow swapping of zeroed out nodes here because we might
251 * be writing node whose last record just got deleted.
252 */
253 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, true);
3a60a9f5 254 if (retval)
b0d623f7 255 panic("hfs: btree_swap_node: about to write corrupt node!\n");
2d21ac55
A
256}
257
55e303ae 258
2d21ac55
A
259static int
260btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp)
261{
262 return journal_modify_block_end(hfsmp->jnl, bp, btree_swap_node, hfsmp);
55e303ae
A
263}
264
b4c24cb9 265
1c79356b
A
266OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options)
267{
b4c24cb9 268 struct hfsmount *hfsmp = VTOHFS(vp);
1c79356b
A
269 OSStatus retval = E_NONE;
270 struct buf *bp = NULL;
271
272 bp = (struct buf *) blockPtr->blockHeader;
273
274 if (bp == NULL) {
1c79356b
A
275 retval = -1;
276 goto exit;
277 }
278
279 if (options & kTrashBlock) {
91447636
A
280 buf_markinvalid(bp);
281
282 if (hfsmp->jnl && (buf_flags(bp) & B_LOCKED)) {
b4c24cb9
A
283 journal_kill_block(hfsmp->jnl, bp);
284 } else {
91447636 285 buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
b4c24cb9 286 }
d1ecb069
A
287
288 /* Don't let anyone else try to use this bp, it's been consumed */
289 blockPtr->blockHeader = NULL;
290
1c79356b
A
291 } else {
292 if (options & kForceWriteBlock) {
b4c24cb9
A
293 if (hfsmp->jnl) {
294 if (blockPtr->isModified == 0) {
2d21ac55 295 panic("hfs: releaseblock: modified is 0 but forcewrite set! bp %p\n", bp);
b4c24cb9 296 }
55e303ae
A
297
298 retval = btree_journal_modify_block_end(hfsmp, bp);
b4c24cb9
A
299 blockPtr->isModified = 0;
300 } else {
91447636 301 retval = VNOP_BWRITE(bp);
b4c24cb9 302 }
d1ecb069
A
303
304 /* Don't let anyone else try to use this bp, it's been consumed */
305 blockPtr->blockHeader = NULL;
306
1c79356b 307 } else if (options & kMarkBlockDirty) {
91447636
A
308 struct timeval tv;
309 microuptime(&tv);
b4c24cb9 310 if ((options & kLockTransaction) && hfsmp->jnl == NULL) {
9bccf70c
A
311 /*
312 *
91447636 313 * Set the B_LOCKED flag and unlock the buffer, causing buf_brelse to move
9bccf70c 314 * the buffer onto the LOCKED free list. This is necessary, otherwise
91447636 315 * getnewbuf() would try to reclaim the buffers using buf_bawrite, which
9bccf70c
A
316 * isn't going to work.
317 *
318 */
9bccf70c
A
319 /* Don't hog all the buffers... */
320 if (count_lock_queue() > kMaxLockedMetaBuffers) {
321 hfs_btsync(vp, HFS_SYNCTRANS);
322 /* Rollback sync time to cause a sync on lock release... */
91447636 323 (void) BTSetLastSync(VTOF(vp), tv.tv_sec - (kMaxSecsForFsync + 1));
9bccf70c 324 }
91447636 325 buf_setflags(bp, B_LOCKED);
b4c24cb9
A
326 }
327
9bccf70c
A
328 /*
329 * Delay-write this block.
330 * If the maximum delayed buffers has been exceeded then
331 * free up some buffers and fall back to an asynchronous write.
332 */
b4c24cb9
A
333 if (hfsmp->jnl) {
334 if (blockPtr->isModified == 0) {
2d21ac55 335 panic("hfs: releaseblock: modified is 0 but markdirty set! bp %p\n", bp);
b4c24cb9 336 }
55e303ae 337 retval = btree_journal_modify_block_end(hfsmp, bp);
b4c24cb9
A
338 blockPtr->isModified = 0;
339 } else if (bdwrite_internal(bp, 1) != 0) {
9bccf70c
A
340 hfs_btsync(vp, 0);
341 /* Rollback sync time to cause a sync on lock release... */
91447636
A
342 (void) BTSetLastSync(VTOF(vp), tv.tv_sec - (kMaxSecsForFsync + 1));
343
344 buf_clearflags(bp, B_LOCKED);
345 buf_bawrite(bp);
9bccf70c 346 }
d1ecb069
A
347
348 /* Don't let anyone else try to use this bp, it's been consumed */
349 blockPtr->blockHeader = NULL;
350
1c79356b 351 } else {
b4c24cb9 352 // check if we had previously called journal_modify_block_start()
91447636 353 // on this block and if so, abort it (which will call buf_brelse()).
b4c24cb9
A
354 if (hfsmp->jnl && blockPtr->isModified) {
355 // XXXdbg - I don't want to call modify_block_abort()
356 // because I think it may be screwing up the
357 // journal and blowing away a block that has
358 // valid data in it.
359 //
360 // journal_modify_block_abort(hfsmp->jnl, bp);
361 //panic("hfs: releaseblock called for 0x%x but mod_block_start previously called.\n", bp);
55e303ae 362 btree_journal_modify_block_end(hfsmp, bp);
b4c24cb9
A
363 blockPtr->isModified = 0;
364 } else {
91447636 365 buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
b4c24cb9 366 }
d1ecb069
A
367
368 /* Don't let anyone else try to use this bp, it's been consumed */
369 blockPtr->blockHeader = NULL;
370 }
371 }
1c79356b
A
372
373exit:
374 return (retval);
375}
376
377
378OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF)
379{
380#pragma unused (maxEOF)
381
91447636 382 OSStatus retval = 0, ret = 0;
6601e61a
A
383 int64_t actualBytesAdded, origSize;
384 u_int64_t bytesToAdd;
9bccf70c
A
385 u_int32_t startAllocation;
386 u_int32_t fileblocks;
6601e61a 387 BTreeInfoRec btInfo;
1c79356b 388 ExtendedVCB *vcb;
6601e61a
A
389 FCB *filePtr;
390 struct proc *p = NULL;
391 int64_t trim = 0;
392 int lockflags = 0;
1c79356b
A
393
394 filePtr = GetFileControlBlock(vp);
395
6601e61a 396 if ( (off_t)minEOF > filePtr->fcbEOF )
1c79356b
A
397 {
398 bytesToAdd = minEOF - filePtr->fcbEOF;
399
9bccf70c
A
400 if (bytesToAdd < filePtr->ff_clumpsize)
401 bytesToAdd = filePtr->ff_clumpsize; //XXX why not always be a mutiple of clump size?
1c79356b
A
402 }
403 else
404 {
1c79356b
A
405 return -1;
406 }
407
9bccf70c 408 vcb = VTOVCB(vp);
6601e61a 409
1c79356b
A
410 /*
411 * The Extents B-tree can't have overflow extents. ExtendFileC will
412 * return an error if an attempt is made to extend the Extents B-tree
413 * when the resident extents are exhausted.
414 */
1c79356b 415
91447636
A
416 /* Protect allocation bitmap and extents overflow file. */
417 lockflags = SFL_BITMAP;
418 if (VTOC(vp)->c_fileid != kHFSExtentsFileID)
419 lockflags |= SFL_EXTENTS;
420 lockflags = hfs_systemfile_lock(vcb, lockflags, HFS_EXCLUSIVE_LOCK);
421
422 (void) BTGetInformation(filePtr, 0, &btInfo);
1c79356b 423
b4c24cb9 424#if 0 // XXXdbg
1c79356b 425 /*
9bccf70c 426 * The b-tree code expects nodes to be contiguous. So when
1c79356b 427 * the allocation block size is less than the b-tree node
9bccf70c
A
428 * size, we need to force disk allocations to be contiguous.
429 */
1c79356b
A
430 if (vcb->blockSize >= btInfo.nodeSize) {
431 extendFlags = 0;
432 } else {
433 /* Ensure that all b-tree nodes are contiguous on disk */
b4c24cb9 434 extendFlags = kEFContigMask;
1c79356b 435 }
b4c24cb9 436#endif
1c79356b 437
b4c24cb9 438 origSize = filePtr->fcbEOF;
9bccf70c
A
439 fileblocks = filePtr->ff_blocks;
440 startAllocation = vcb->nextAllocation;
1c79356b 441
b4c24cb9
A
442 // loop trying to get a contiguous chunk that's an integer multiple
443 // of the btree node size. if we can't get a contiguous chunk that
444 // is at least the node size then we break out of the loop and let
445 // the error propagate back up.
6601e61a
A
446 while((off_t)bytesToAdd >= btInfo.nodeSize) {
447 do {
55e303ae 448 retval = ExtendFileC(vcb, filePtr, bytesToAdd, 0,
6601e61a
A
449 kEFContigMask | kEFMetadataMask | kEFNoClumpMask,
450 (int64_t *)&actualBytesAdded);
b4c24cb9 451 if (retval == dskFulErr && actualBytesAdded == 0) {
6601e61a
A
452 bytesToAdd >>= 1;
453 if (bytesToAdd < btInfo.nodeSize) {
454 break;
455 } else if ((bytesToAdd % btInfo.nodeSize) != 0) {
456 // make sure it's an integer multiple of the nodeSize
457 bytesToAdd -= (bytesToAdd % btInfo.nodeSize);
458 }
b4c24cb9 459 }
6601e61a
A
460 } while (retval == dskFulErr && actualBytesAdded == 0);
461
462 if (retval == dskFulErr && actualBytesAdded == 0 && bytesToAdd <= btInfo.nodeSize) {
463 break;
464 }
465
466 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
467 bytesToAdd = minEOF - filePtr->fcbEOF;
468 }
91447636 469
9bccf70c
A
470 /*
471 * If a new extent was added then move the roving allocator
472 * reference forward by the current b-tree file size so
473 * there's plenty of room to grow.
474 */
475 if ((retval == 0) &&
55e303ae 476 ((VCBTOHFS(vcb)->hfs_flags & HFS_METADATA_ZONE) == 0) &&
9bccf70c 477 (vcb->nextAllocation > startAllocation) &&
2d21ac55
A
478 ((vcb->nextAllocation + fileblocks) < vcb->allocLimit)) {
479 HFS_UPDATE_NEXT_ALLOCATION(vcb, vcb->nextAllocation + fileblocks);
9bccf70c
A
480 }
481
b4c24cb9
A
482 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
483
484 // XXXdbg ExtendFileC() could have returned an error even though
485 // it grew the file to be big enough for our needs. If this is
486 // the case, we don't care about retval so we blow it away.
487 //
6601e61a 488 if (filePtr->fcbEOF >= (off_t)minEOF && retval != 0) {
b4c24cb9
A
489 retval = 0;
490 }
491
492 // XXXdbg if the file grew but isn't large enough or isn't an
493 // even multiple of the nodeSize then trim things back. if
494 // the file isn't large enough we trim back to the original
495 // size. otherwise we trim back to be an even multiple of the
496 // btree node size.
497 //
6601e61a 498 if ((filePtr->fcbEOF < (off_t)minEOF) || ((filePtr->fcbEOF - origSize) % btInfo.nodeSize) != 0) {
b4c24cb9 499
6601e61a 500 if (filePtr->fcbEOF < (off_t)minEOF) {
b4c24cb9
A
501 retval = dskFulErr;
502
503 if (filePtr->fcbEOF < origSize) {
504 panic("hfs: btree file eof %lld less than orig size %lld!\n",
505 filePtr->fcbEOF, origSize);
506 }
507
508 trim = filePtr->fcbEOF - origSize;
b4c24cb9 509 } else {
6601e61a 510 trim = ((filePtr->fcbEOF - origSize) % btInfo.nodeSize);
b4c24cb9
A
511 }
512
6d2010ae 513 ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0, 0, FTOC(filePtr)->c_fileid, 0);
b4c24cb9
A
514 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
515
516 // XXXdbg - panic if the file didn't get trimmed back properly
517 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
6601e61a 518 panic("hfs: truncate file didn't! fcbEOF %lld nsize %d fcb %p\n",
b4c24cb9
A
519 filePtr->fcbEOF, btInfo.nodeSize, filePtr);
520 }
521
522 if (ret) {
523 // XXXdbg - this probably doesn't need to be a panic()
6601e61a 524 panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %ld)\n",
b0d623f7 525 filePtr->fcbEOF, trim, (long)ret);
91447636 526 goto out;
b4c24cb9 527 }
b4c24cb9
A
528 }
529
9bccf70c
A
530 if(VTOC(vp)->c_fileid != kHFSExtentsFileID) {
531 /*
532 * Get any extents overflow b-tree changes to disk ASAP!
533 */
b4c24cb9 534 (void) BTFlushPath(VTOF(vcb->extentsRefNum));
91447636 535 (void) hfs_fsync(vcb->extentsRefNum, MNT_WAIT, 0, p);
9bccf70c 536 }
91447636
A
537 hfs_systemfile_unlock(vcb, lockflags);
538 lockflags = 0;
1c79356b 539
b4c24cb9 540 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
6601e61a 541 panic("hfs: extendbtree: fcb %p has eof 0x%llx not a multiple of 0x%x (trim %llx)\n",
b4c24cb9
A
542 filePtr, filePtr->fcbEOF, btInfo.nodeSize, trim);
543 }
544
1c79356b
A
545 /*
546 * Update the Alternate MDB or Alternate VolumeHeader
547 */
3e170ce0 548 VTOC(vp)->c_flag |= C_MODIFIED;
9bccf70c
A
549 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
550 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
551 (VTOC(vp)->c_fileid == kHFSAttributesFileID)
1c79356b
A
552 ) {
553 MarkVCBDirty( vcb );
3e170ce0 554 ret = hfs_flushvolumeheader(VCBTOHFS(vcb), HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
55e303ae 555 } else {
91447636
A
556 VTOC(vp)->c_touch_chgtime = TRUE;
557 VTOC(vp)->c_touch_modtime = TRUE;
3e170ce0 558 (void) hfs_update(vp, 0);
1c79356b 559 }
b4c24cb9 560
6601e61a 561 ret = ClearBTNodes(vp, btInfo.nodeSize, origSize, (filePtr->fcbEOF - origSize));
91447636
A
562out:
563 if (retval == 0)
564 retval = ret;
565
566 if (lockflags)
567 hfs_systemfile_unlock(vcb, lockflags);
1c79356b
A
568
569 return retval;
570}
571
572
1c79356b
A
573/*
574 * Clear out (zero) new b-tree nodes on disk.
575 */
576static int
39236c6e 577ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount)
1c79356b 578{
b4c24cb9 579 struct hfsmount *hfsmp = VTOHFS(vp);
1c79356b 580 struct buf *bp = NULL;
91447636
A
581 daddr64_t blk;
582 daddr64_t blkcnt;
1c79356b
A
583
584 blk = offset / blksize;
585 blkcnt = amount / blksize;
586
587 while (blkcnt > 0) {
91447636 588 bp = buf_getblk(vp, blk, blksize, 0, 0, BLK_META);
1c79356b
A
589 if (bp == NULL)
590 continue;
b4c24cb9
A
591
592 // XXXdbg
593 if (hfsmp->jnl) {
594 // XXXdbg -- skipping this for now since it makes a transaction
595 // become *way* too large
596 //journal_modify_block_start(hfsmp->jnl, bp);
597 }
91447636 598 bzero((char *)buf_dataptr(bp), blksize);
b4c24cb9 599
91447636 600 buf_markaged(bp);
1c79356b 601
b4c24cb9
A
602 // XXXdbg
603 if (hfsmp->jnl) {
604 // XXXdbg -- skipping this for now since it makes a transaction
605 // become *way* too large
606 //journal_modify_block_end(hfsmp->jnl, bp);
607
608 // XXXdbg - remove this once we decide what to do with the
609 // writes to the journal
610 if ((blk % 32) == 0)
91447636 611 VNOP_BWRITE(bp);
b4c24cb9 612 else
91447636 613 buf_bawrite(bp);
b4c24cb9
A
614 } else {
615 /* wait/yield every 32 blocks so we don't hog all the buffers */
616 if ((blk % 32) == 0)
91447636 617 VNOP_BWRITE(bp);
b4c24cb9 618 else
91447636 619 buf_bawrite(bp);
b4c24cb9 620 }
1c79356b
A
621 --blkcnt;
622 ++blk;
623 }
624
625 return (0);
626}
91447636
A
627
628
629extern char hfs_attrname[];
630
91447636
A
631/*
632 * Create an HFS+ Attribute B-tree File.
633 *
2d21ac55 634 * No global resources should be held.
91447636
A
635 */
636int
2d21ac55 637hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t nodecnt)
91447636 638{
2d21ac55 639 struct vnode* vp = NULLVP;
91447636
A
640 struct cat_desc cndesc;
641 struct cat_attr cnattr;
642 struct cat_fork cfork;
643 BlockDescriptor blkdesc;
644 BTNodeDescriptor *ndp;
645 BTHeaderRec *bthp;
646 BTreeControlBlockPtr btcb = NULL;
647 struct buf *bp = NULL;
648 void * buffer;
b0d623f7 649 u_int8_t *bitmap;
91447636 650 u_int16_t *index;
b0d623f7
A
651 u_int32_t node_num, num_map_nodes;
652 u_int32_t bytes_per_map_record;
653 u_int32_t temp;
91447636 654 u_int16_t offset;
2d21ac55 655 int intrans = 0;
91447636 656 int result;
6d2010ae
A
657 int newvnode_flags = 0;
658
2d21ac55
A
659again:
660 /*
661 * Serialize creation using HFS_CREATING_BTREE flag.
662 */
39236c6e 663 hfs_lock_mount (hfsmp);
2d21ac55
A
664 if (hfsmp->hfs_flags & HFS_CREATING_BTREE) {
665 /* Someone else beat us, wait for them to finish. */
fe8ab488 666 (void) msleep(&hfsmp->hfs_attribute_cp, &hfsmp->hfs_mutex,
2d21ac55
A
667 PDROP | PINOD, "hfs_create_attr_btree", 0);
668 if (hfsmp->hfs_attribute_vp) {
669 return (0);
670 }
671 goto again;
672 }
673 hfsmp->hfs_flags |= HFS_CREATING_BTREE;
39236c6e 674 hfs_unlock_mount (hfsmp);
91447636 675
2d21ac55
A
676 /* Check if were out of usable disk space. */
677 if ((hfs_freeblks(hfsmp, 1) == 0)) {
678 result = ENOSPC;
679 goto exit;
680 }
91447636
A
681
682 /*
683 * Set up Attribute B-tree vnode
2d21ac55
A
684 * (this must be done before we start a transaction
685 * or take any system file locks)
91447636
A
686 */
687 bzero(&cndesc, sizeof(cndesc));
688 cndesc.cd_parentcnid = kHFSRootParentID;
689 cndesc.cd_flags |= CD_ISMETA;
2d21ac55 690 cndesc.cd_nameptr = (const u_int8_t *)hfs_attrname;
91447636
A
691 cndesc.cd_namelen = strlen(hfs_attrname);
692 cndesc.cd_cnid = kHFSAttributesFileID;
693
694 bzero(&cnattr, sizeof(cnattr));
2d21ac55 695 cnattr.ca_linkcount = 1;
91447636
A
696 cnattr.ca_mode = S_IFREG;
697 cnattr.ca_fileid = cndesc.cd_cnid;
698
699 bzero(&cfork, sizeof(cfork));
700 cfork.cf_clump = nodesize * nodecnt;
701
6d2010ae
A
702 result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
703 &cfork, &vp, &newvnode_flags);
2d21ac55
A
704 if (result) {
705 goto exit;
706 }
91447636
A
707 /*
708 * Set up Attribute B-tree control block
709 */
710 MALLOC(btcb, BTreeControlBlock *, sizeof(BTreeControlBlock), M_TEMP, M_WAITOK);
711 bzero(btcb, sizeof(BTreeControlBlock));
712
713 btcb->nodeSize = nodesize;
714 btcb->maxKeyLength = kHFSPlusAttrKeyMaximumLength;
715 btcb->btreeType = 0xFF;
716 btcb->attributes = kBTVariableIndexKeysMask | kBTBigKeysMask;
717 btcb->version = kBTreeVersion;
718 btcb->writeCount = 1;
719 btcb->flags = 0; /* kBTHeaderDirty */
720 btcb->fileRefNum = vp;
721 btcb->getBlockProc = GetBTreeBlock;
722 btcb->releaseBlockProc = ReleaseBTreeBlock;
723 btcb->setEndOfForkProc = ExtendBTreeFile;
724 btcb->keyCompareProc = (KeyCompareProcPtr)hfs_attrkeycompare;
725 VTOF(vp)->fcbBTCBPtr = btcb;
726
727 /*
728 * Allocate some space
729 */
2d21ac55
A
730 if (hfs_start_transaction(hfsmp) != 0) {
731 result = EINVAL;
732 goto exit;
733 }
734 intrans = 1;
735
736 /* Note ExtendBTreeFile will acquire the necessary system file locks. */
91447636
A
737 result = ExtendBTreeFile(vp, nodesize, cfork.cf_clump);
738 if (result)
739 goto exit;
740
741 btcb->totalNodes = VTOF(vp)->ff_size / nodesize;
91447636 742
b0d623f7
A
743 /*
744 * Figure out how many map nodes we'll need.
745 *
746 * bytes_per_map_record = the number of bytes in the map record of a
747 * map node. Since that is the only record in the node, it is the size
748 * of the node minus the node descriptor at the start, and two record
749 * offsets at the end of the node. The "- 2" is to round the size down
750 * to a multiple of 4 bytes (since sizeof(BTNodeDescriptor) is not a
751 * multiple of 4).
752 *
753 * The value "temp" here is the number of *bits* in the map record of
754 * the header node.
755 */
756 bytes_per_map_record = nodesize - sizeof(BTNodeDescriptor) - 2*sizeof(u_int16_t) - 2;
757 temp = 8 * (nodesize - sizeof(BTNodeDescriptor)
758 - sizeof(BTHeaderRec)
759 - kBTreeHeaderUserBytes
760 - 4 * sizeof(u_int16_t));
761 if (btcb->totalNodes > temp) {
762 num_map_nodes = howmany(btcb->totalNodes - temp, bytes_per_map_record * 8);
763 }
764 else {
765 num_map_nodes = 0;
766 }
767
768 btcb->freeNodes = btcb->totalNodes - 1 - num_map_nodes;
769
91447636
A
770 /*
771 * Initialize the b-tree header on disk
772 */
773 bp = buf_getblk(vp, 0, nodesize, 0, 0, BLK_META);
774 if (bp == NULL) {
775 result = EIO;
776 goto exit;
777 }
778
779 buffer = (void *)buf_dataptr(bp);
780 blkdesc.buffer = buffer;
781 blkdesc.blockHeader = (void *)bp;
782 blkdesc.blockReadFromDisk = 0;
783 blkdesc.isModified = 0;
784
785 ModifyBlockStart(vp, &blkdesc);
786
787 if (buf_size(bp) != nodesize)
788 panic("hfs_create_attr_btree: bad buffer size (%d)\n", buf_size(bp));
789
790 bzero(buffer, nodesize);
2d21ac55 791 index = (u_int16_t *)buffer;
91447636
A
792
793 /* FILL IN THE NODE DESCRIPTOR: */
794 ndp = (BTNodeDescriptor *)buffer;
b0d623f7
A
795 if (num_map_nodes != 0)
796 ndp->fLink = 1;
91447636
A
797 ndp->kind = kBTHeaderNode;
798 ndp->numRecords = 3;
799 offset = sizeof(BTNodeDescriptor);
800 index[(nodesize / 2) - 1] = offset;
801
802 /* FILL IN THE HEADER RECORD: */
2d21ac55 803 bthp = (BTHeaderRec *)((u_int8_t *)buffer + offset);
91447636
A
804 bthp->nodeSize = nodesize;
805 bthp->totalNodes = btcb->totalNodes;
806 bthp->freeNodes = btcb->freeNodes;
807 bthp->clumpSize = cfork.cf_clump;
808 bthp->btreeType = 0xFF;
809 bthp->attributes = kBTVariableIndexKeysMask | kBTBigKeysMask;
810 bthp->maxKeyLength = kHFSPlusAttrKeyMaximumLength;
811 bthp->keyCompareType = kHFSBinaryCompare;
812 offset += sizeof(BTHeaderRec);
813 index[(nodesize / 2) - 2] = offset;
814
815 /* FILL IN THE USER RECORD: */
816 offset += kBTreeHeaderUserBytes;
817 index[(nodesize / 2) - 3] = offset;
818
b0d623f7
A
819 /* Mark the header node and map nodes in use in the map record.
820 *
821 * NOTE: Assumes that the header node's map record has at least
822 * (num_map_nodes + 1) bits.
823 */
824 bitmap = (u_int8_t *) buffer + offset;
825 temp = num_map_nodes + 1; /* +1 for the header node */
826 while (temp >= 8) {
827 *(bitmap++) = 0xFF;
828 temp -= 8;
829 }
830 *bitmap = ~(0xFF >> temp);
831
91447636
A
832 offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec)
833 - kBTreeHeaderUserBytes - (4 * sizeof(int16_t));
834 index[(nodesize / 2) - 4] = offset;
835
836 if (hfsmp->jnl) {
837 result = btree_journal_modify_block_end(hfsmp, bp);
838 } else {
839 result = VNOP_BWRITE(bp);
840 }
841 if (result)
842 goto exit;
843
b0d623f7
A
844 /* Create the map nodes: node numbers 1 .. num_map_nodes */
845 for (node_num=1; node_num <= num_map_nodes; ++node_num) {
846 bp = buf_getblk(vp, node_num, nodesize, 0, 0, BLK_META);
847 if (bp == NULL) {
848 result = EIO;
849 goto exit;
850 }
851 buffer = (void *)buf_dataptr(bp);
852 blkdesc.buffer = buffer;
853 blkdesc.blockHeader = (void *)bp;
854 blkdesc.blockReadFromDisk = 0;
855 blkdesc.isModified = 0;
856
857 ModifyBlockStart(vp, &blkdesc);
858
859 bzero(buffer, nodesize);
860 index = (u_int16_t *)buffer;
861
862 /* Fill in the node descriptor */
863 ndp = (BTNodeDescriptor *)buffer;
864 if (node_num != num_map_nodes)
865 ndp->fLink = node_num + 1;
866 ndp->kind = kBTMapNode;
867 ndp->numRecords = 1;
868 offset = sizeof(BTNodeDescriptor);
869 index[(nodesize / 2) - 1] = offset;
870
871
872 /* Fill in the map record's offset */
873 /* Note: We assume that the map record is all zeroes */
874 offset = sizeof(BTNodeDescriptor) + bytes_per_map_record;
875 index[(nodesize / 2) - 2] = offset;
876
877 if (hfsmp->jnl) {
878 result = btree_journal_modify_block_end(hfsmp, bp);
879 } else {
880 result = VNOP_BWRITE(bp);
881 }
882 if (result)
883 goto exit;
884 }
885
2d21ac55 886 /* Update vp/cp for attribute btree */
39236c6e 887 hfs_lock_mount (hfsmp);
2d21ac55 888 hfsmp->hfs_attribute_cp = VTOC(vp);
91447636 889 hfsmp->hfs_attribute_vp = vp;
39236c6e 890 hfs_unlock_mount (hfsmp);
91447636 891
3e170ce0 892 (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
316670eb
A
893
894 if (intrans) {
895 hfs_end_transaction(hfsmp);
896 intrans = 0;
897 }
898
899 /* Initialize the vnode for virtual attribute data file */
900 result = init_attrdata_vnode(hfsmp);
901 if (result) {
39236c6e 902 printf("hfs_create_attr_btree: vol=%s init_attrdata_vnode() error=%d\n", hfsmp->vcbVN, result);
316670eb
A
903 }
904
91447636 905exit:
2d21ac55
A
906 if (vp) {
907 hfs_unlock(VTOC(vp));
908 }
91447636
A
909 if (result) {
910 if (btcb) {
911 FREE (btcb, M_TEMP);
912 }
2d21ac55
A
913 if (vp) {
914 vnode_put(vp);
915 }
916 /* XXX need to give back blocks ? */
917 }
918 if (intrans) {
919 hfs_end_transaction(hfsmp);
91447636 920 }
91447636 921
2d21ac55
A
922 /*
923 * All done, clear HFS_CREATING_BTREE, and wake up any sleepers.
924 */
39236c6e 925 hfs_lock_mount (hfsmp);
2d21ac55 926 hfsmp->hfs_flags &= ~HFS_CREATING_BTREE;
fe8ab488 927 wakeup((caddr_t)&hfsmp->hfs_attribute_cp);
39236c6e 928 hfs_unlock_mount (hfsmp);
91447636 929
2d21ac55
A
930 return (result);
931}
91447636 932