]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_btreeio.c
xnu-1699.22.81.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_btreeio.c
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/buf.h>
32 #include <sys/buf_internal.h>
33 #include <sys/kernel.h>
34 #include <sys/malloc.h>
35 #include <sys/mount.h>
36 #include <sys/vnode.h>
37
38
39 #include "hfs.h"
40 #include "hfs_cnode.h"
41 #include "hfs_dbg.h"
42 #include "hfs_endian.h"
43 #include "hfs_btreeio.h"
44
45 #include "hfscommon/headers/FileMgrInternal.h"
46 #include "hfscommon/headers/BTreesPrivate.h"
47
48 #define FORCESYNCBTREEWRITES 0
49
50 /* From bsd/vfs/vfs_bio.c */
51 extern int bdwrite_internal(struct buf *, int);
52
53 static int ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount);
54 static int btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp);
55
56 void btree_swap_node(struct buf *bp, __unused void *arg);
57
58 OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, __unused ItemCount minBlockCount)
59 {
60 BTreeControlBlockPtr bTreePtr;
61
62 DBG_ASSERT(vp != NULL);
63 DBG_ASSERT(blockSize >= kMinNodeSize);
64 if (blockSize > MAXBSIZE )
65 return (fsBTBadNodeSize);
66
67 bTreePtr = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr;
68 bTreePtr->nodeSize = blockSize;
69
70 return (E_NONE);
71 }
72
73
74 OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions options, BlockDescriptor *block)
75 {
76 OSStatus retval = E_NONE;
77 struct buf *bp = NULL;
78 u_int8_t allow_empty_node;
79
80 /* If the btree block is being read using hint, it is
81 * fine for the swap code to find zeroed out nodes.
82 */
83 if (options & kGetBlockHint) {
84 allow_empty_node = true;
85 } else {
86 allow_empty_node = false;
87 }
88
89 if (options & kGetEmptyBlock) {
90 daddr64_t blkno;
91 off_t offset;
92
93 offset = (daddr64_t)blockNum * (daddr64_t)block->blockSize;
94 bp = buf_getblk(vp, (daddr64_t)blockNum, block->blockSize, 0, 0, BLK_META);
95 if (bp &&
96 VNOP_BLOCKMAP(vp, offset, block->blockSize, &blkno, NULL, NULL, 0, NULL) == 0) {
97 buf_setblkno(bp, blkno);
98 }
99 } else {
100 retval = buf_meta_bread(vp, (daddr64_t)blockNum, block->blockSize, NOCRED, &bp);
101 }
102 if (bp == NULL)
103 retval = -1; //XXX need better error
104
105 if (retval == E_NONE) {
106 block->blockHeader = bp;
107 block->buffer = (char *)buf_dataptr(bp);
108 block->blockNum = buf_lblkno(bp);
109 block->blockReadFromDisk = (buf_fromcache(bp) == 0); /* not found in cache ==> came from disk */
110
111 // XXXdbg
112 block->isModified = 0;
113
114 /* Check and endian swap B-Tree node (only if it's a valid block) */
115 if (!(options & kGetEmptyBlock)) {
116 /* This happens when we first open the b-tree, we might not have all the node data on hand */
117 if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) &&
118 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) &&
119 (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) {
120
121 /*
122 * Don't swap the node descriptor, record offsets, or other records.
123 * This record will be invalidated and re-read with the correct node
124 * size once the B-tree control block is set up with the node size
125 * from the header record.
126 */
127 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly, allow_empty_node);
128
129 } else if (block->blockReadFromDisk) {
130 /*
131 * The node was just read from disk, so always swap/check it.
132 * This is necessary on big endian since the test below won't trigger.
133 */
134 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node);
135 } else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) {
136 /*
137 * The node was left in the cache in non-native order, so swap it.
138 * This only happens on little endian, after the node is written
139 * back to disk.
140 */
141 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node);
142 }
143
144 /*
145 * If we got an error, then the node is only partially swapped.
146 * We mark the buffer invalid so that the next attempt to get the
147 * node will read it and attempt to swap again, and will notice
148 * the error again. If we didn't do this, the next attempt to get
149 * the node might use the partially swapped node as-is.
150 */
151 if (retval)
152 buf_markinvalid(bp);
153 }
154 }
155
156 if (retval) {
157 if (bp)
158 buf_brelse(bp);
159 block->blockHeader = NULL;
160 block->buffer = NULL;
161 }
162
163 return (retval);
164 }
165
166
167 void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr)
168 {
169 struct hfsmount *hfsmp = VTOHFS(vp);
170 struct buf *bp = NULL;
171
172 if (hfsmp->jnl == NULL) {
173 return;
174 }
175
176 bp = (struct buf *) blockPtr->blockHeader;
177 if (bp == NULL) {
178 panic("hfs: ModifyBlockStart: null bp for blockdescptr %p?!?\n", blockPtr);
179 return;
180 }
181
182 journal_modify_block_start(hfsmp->jnl, bp);
183 blockPtr->isModified = 1;
184 }
185
186 void
187 btree_swap_node(struct buf *bp, __unused void *arg)
188 {
189 // struct hfsmount *hfsmp = (struct hfsmount *)arg;
190 int retval;
191 struct vnode *vp = buf_vnode(bp);
192 BlockDescriptor block;
193
194 /* Prepare the block pointer */
195 block.blockHeader = bp;
196 block.buffer = (char *)buf_dataptr(bp);
197 block.blockNum = buf_lblkno(bp);
198 /* not found in cache ==> came from disk */
199 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
200 block.blockSize = buf_count(bp);
201
202 /* Swap the data now that this node is ready to go to disk.
203 * We allow swapping of zeroed out nodes here because we might
204 * be writing node whose last record just got deleted.
205 */
206 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, true);
207 if (retval)
208 panic("hfs: btree_swap_node: about to write corrupt node!\n");
209 }
210
211
212 static int
213 btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp)
214 {
215 return journal_modify_block_end(hfsmp->jnl, bp, btree_swap_node, hfsmp);
216 }
217
218
219 OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options)
220 {
221 struct hfsmount *hfsmp = VTOHFS(vp);
222 OSStatus retval = E_NONE;
223 struct buf *bp = NULL;
224
225 bp = (struct buf *) blockPtr->blockHeader;
226
227 if (bp == NULL) {
228 retval = -1;
229 goto exit;
230 }
231
232 if (options & kTrashBlock) {
233 buf_markinvalid(bp);
234
235 if (hfsmp->jnl && (buf_flags(bp) & B_LOCKED)) {
236 journal_kill_block(hfsmp->jnl, bp);
237 } else {
238 buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
239 }
240
241 /* Don't let anyone else try to use this bp, it's been consumed */
242 blockPtr->blockHeader = NULL;
243
244 } else {
245 if (options & kForceWriteBlock) {
246 if (hfsmp->jnl) {
247 if (blockPtr->isModified == 0) {
248 panic("hfs: releaseblock: modified is 0 but forcewrite set! bp %p\n", bp);
249 }
250
251 retval = btree_journal_modify_block_end(hfsmp, bp);
252 blockPtr->isModified = 0;
253 } else {
254 retval = VNOP_BWRITE(bp);
255 }
256
257 /* Don't let anyone else try to use this bp, it's been consumed */
258 blockPtr->blockHeader = NULL;
259
260 } else if (options & kMarkBlockDirty) {
261 struct timeval tv;
262 microuptime(&tv);
263 if ((options & kLockTransaction) && hfsmp->jnl == NULL) {
264 /*
265 *
266 * Set the B_LOCKED flag and unlock the buffer, causing buf_brelse to move
267 * the buffer onto the LOCKED free list. This is necessary, otherwise
268 * getnewbuf() would try to reclaim the buffers using buf_bawrite, which
269 * isn't going to work.
270 *
271 */
272 /* Don't hog all the buffers... */
273 if (count_lock_queue() > kMaxLockedMetaBuffers) {
274 hfs_btsync(vp, HFS_SYNCTRANS);
275 /* Rollback sync time to cause a sync on lock release... */
276 (void) BTSetLastSync(VTOF(vp), tv.tv_sec - (kMaxSecsForFsync + 1));
277 }
278 buf_setflags(bp, B_LOCKED);
279 }
280
281 /*
282 * Delay-write this block.
283 * If the maximum delayed buffers has been exceeded then
284 * free up some buffers and fall back to an asynchronous write.
285 */
286 if (hfsmp->jnl) {
287 if (blockPtr->isModified == 0) {
288 panic("hfs: releaseblock: modified is 0 but markdirty set! bp %p\n", bp);
289 }
290 retval = btree_journal_modify_block_end(hfsmp, bp);
291 blockPtr->isModified = 0;
292 } else if (bdwrite_internal(bp, 1) != 0) {
293 hfs_btsync(vp, 0);
294 /* Rollback sync time to cause a sync on lock release... */
295 (void) BTSetLastSync(VTOF(vp), tv.tv_sec - (kMaxSecsForFsync + 1));
296
297 buf_clearflags(bp, B_LOCKED);
298 buf_bawrite(bp);
299 }
300
301 /* Don't let anyone else try to use this bp, it's been consumed */
302 blockPtr->blockHeader = NULL;
303
304 } else {
305 // check if we had previously called journal_modify_block_start()
306 // on this block and if so, abort it (which will call buf_brelse()).
307 if (hfsmp->jnl && blockPtr->isModified) {
308 // XXXdbg - I don't want to call modify_block_abort()
309 // because I think it may be screwing up the
310 // journal and blowing away a block that has
311 // valid data in it.
312 //
313 // journal_modify_block_abort(hfsmp->jnl, bp);
314 //panic("hfs: releaseblock called for 0x%x but mod_block_start previously called.\n", bp);
315 btree_journal_modify_block_end(hfsmp, bp);
316 blockPtr->isModified = 0;
317 } else {
318 buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
319 }
320
321 /* Don't let anyone else try to use this bp, it's been consumed */
322 blockPtr->blockHeader = NULL;
323 }
324 }
325
326 exit:
327 return (retval);
328 }
329
330
331 OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF)
332 {
333 #pragma unused (maxEOF)
334
335 OSStatus retval = 0, ret = 0;
336 int64_t actualBytesAdded, origSize;
337 u_int64_t bytesToAdd;
338 u_int32_t startAllocation;
339 u_int32_t fileblocks;
340 BTreeInfoRec btInfo;
341 ExtendedVCB *vcb;
342 FCB *filePtr;
343 struct proc *p = NULL;
344 int64_t trim = 0;
345 int lockflags = 0;
346
347 filePtr = GetFileControlBlock(vp);
348
349 if ( (off_t)minEOF > filePtr->fcbEOF )
350 {
351 bytesToAdd = minEOF - filePtr->fcbEOF;
352
353 if (bytesToAdd < filePtr->ff_clumpsize)
354 bytesToAdd = filePtr->ff_clumpsize; //XXX why not always be a mutiple of clump size?
355 }
356 else
357 {
358 return -1;
359 }
360
361 vcb = VTOVCB(vp);
362
363 /*
364 * The Extents B-tree can't have overflow extents. ExtendFileC will
365 * return an error if an attempt is made to extend the Extents B-tree
366 * when the resident extents are exhausted.
367 */
368
369 /* Protect allocation bitmap and extents overflow file. */
370 lockflags = SFL_BITMAP;
371 if (VTOC(vp)->c_fileid != kHFSExtentsFileID)
372 lockflags |= SFL_EXTENTS;
373 lockflags = hfs_systemfile_lock(vcb, lockflags, HFS_EXCLUSIVE_LOCK);
374
375 (void) BTGetInformation(filePtr, 0, &btInfo);
376
377 #if 0 // XXXdbg
378 /*
379 * The b-tree code expects nodes to be contiguous. So when
380 * the allocation block size is less than the b-tree node
381 * size, we need to force disk allocations to be contiguous.
382 */
383 if (vcb->blockSize >= btInfo.nodeSize) {
384 extendFlags = 0;
385 } else {
386 /* Ensure that all b-tree nodes are contiguous on disk */
387 extendFlags = kEFContigMask;
388 }
389 #endif
390
391 origSize = filePtr->fcbEOF;
392 fileblocks = filePtr->ff_blocks;
393 startAllocation = vcb->nextAllocation;
394
395 // loop trying to get a contiguous chunk that's an integer multiple
396 // of the btree node size. if we can't get a contiguous chunk that
397 // is at least the node size then we break out of the loop and let
398 // the error propagate back up.
399 while((off_t)bytesToAdd >= btInfo.nodeSize) {
400 do {
401 retval = ExtendFileC(vcb, filePtr, bytesToAdd, 0,
402 kEFContigMask | kEFMetadataMask | kEFNoClumpMask,
403 (int64_t *)&actualBytesAdded);
404 if (retval == dskFulErr && actualBytesAdded == 0) {
405 bytesToAdd >>= 1;
406 if (bytesToAdd < btInfo.nodeSize) {
407 break;
408 } else if ((bytesToAdd % btInfo.nodeSize) != 0) {
409 // make sure it's an integer multiple of the nodeSize
410 bytesToAdd -= (bytesToAdd % btInfo.nodeSize);
411 }
412 }
413 } while (retval == dskFulErr && actualBytesAdded == 0);
414
415 if (retval == dskFulErr && actualBytesAdded == 0 && bytesToAdd <= btInfo.nodeSize) {
416 break;
417 }
418
419 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
420 bytesToAdd = minEOF - filePtr->fcbEOF;
421 }
422
423 /*
424 * If a new extent was added then move the roving allocator
425 * reference forward by the current b-tree file size so
426 * there's plenty of room to grow.
427 */
428 if ((retval == 0) &&
429 ((VCBTOHFS(vcb)->hfs_flags & HFS_METADATA_ZONE) == 0) &&
430 (vcb->nextAllocation > startAllocation) &&
431 ((vcb->nextAllocation + fileblocks) < vcb->allocLimit)) {
432 HFS_UPDATE_NEXT_ALLOCATION(vcb, vcb->nextAllocation + fileblocks);
433 }
434
435 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
436
437 // XXXdbg ExtendFileC() could have returned an error even though
438 // it grew the file to be big enough for our needs. If this is
439 // the case, we don't care about retval so we blow it away.
440 //
441 if (filePtr->fcbEOF >= (off_t)minEOF && retval != 0) {
442 retval = 0;
443 }
444
445 // XXXdbg if the file grew but isn't large enough or isn't an
446 // even multiple of the nodeSize then trim things back. if
447 // the file isn't large enough we trim back to the original
448 // size. otherwise we trim back to be an even multiple of the
449 // btree node size.
450 //
451 if ((filePtr->fcbEOF < (off_t)minEOF) || ((filePtr->fcbEOF - origSize) % btInfo.nodeSize) != 0) {
452
453 if (filePtr->fcbEOF < (off_t)minEOF) {
454 retval = dskFulErr;
455
456 if (filePtr->fcbEOF < origSize) {
457 panic("hfs: btree file eof %lld less than orig size %lld!\n",
458 filePtr->fcbEOF, origSize);
459 }
460
461 trim = filePtr->fcbEOF - origSize;
462 } else {
463 trim = ((filePtr->fcbEOF - origSize) % btInfo.nodeSize);
464 }
465
466 ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0, 0, FTOC(filePtr)->c_fileid, 0);
467 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
468
469 // XXXdbg - panic if the file didn't get trimmed back properly
470 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
471 panic("hfs: truncate file didn't! fcbEOF %lld nsize %d fcb %p\n",
472 filePtr->fcbEOF, btInfo.nodeSize, filePtr);
473 }
474
475 if (ret) {
476 // XXXdbg - this probably doesn't need to be a panic()
477 panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %ld)\n",
478 filePtr->fcbEOF, trim, (long)ret);
479 goto out;
480 }
481 }
482
483 if(VTOC(vp)->c_fileid != kHFSExtentsFileID) {
484 /*
485 * Get any extents overflow b-tree changes to disk ASAP!
486 */
487 (void) BTFlushPath(VTOF(vcb->extentsRefNum));
488 (void) hfs_fsync(vcb->extentsRefNum, MNT_WAIT, 0, p);
489 }
490 hfs_systemfile_unlock(vcb, lockflags);
491 lockflags = 0;
492
493 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
494 panic("hfs: extendbtree: fcb %p has eof 0x%llx not a multiple of 0x%x (trim %llx)\n",
495 filePtr, filePtr->fcbEOF, btInfo.nodeSize, trim);
496 }
497
498 /*
499 * Update the Alternate MDB or Alternate VolumeHeader
500 */
501 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
502 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
503 (VTOC(vp)->c_fileid == kHFSAttributesFileID)
504 ) {
505 VTOC(vp)->c_flag |= C_MODIFIED;
506 MarkVCBDirty( vcb );
507 ret = hfs_flushvolumeheader(VCBTOHFS(vcb), MNT_WAIT, HFS_ALTFLUSH);
508 } else {
509 VTOC(vp)->c_touch_chgtime = TRUE;
510 VTOC(vp)->c_touch_modtime = TRUE;
511 (void) hfs_update(vp, TRUE);
512 }
513
514 ret = ClearBTNodes(vp, btInfo.nodeSize, origSize, (filePtr->fcbEOF - origSize));
515 out:
516 if (retval == 0)
517 retval = ret;
518
519 if (lockflags)
520 hfs_systemfile_unlock(vcb, lockflags);
521
522 return retval;
523 }
524
525
526 /*
527 * Clear out (zero) new b-tree nodes on disk.
528 */
529 static int
530 ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount)
531 {
532 struct hfsmount *hfsmp = VTOHFS(vp);
533 struct buf *bp = NULL;
534 daddr64_t blk;
535 daddr64_t blkcnt;
536
537 blk = offset / blksize;
538 blkcnt = amount / blksize;
539
540 while (blkcnt > 0) {
541 bp = buf_getblk(vp, blk, blksize, 0, 0, BLK_META);
542 if (bp == NULL)
543 continue;
544
545 // XXXdbg
546 if (hfsmp->jnl) {
547 // XXXdbg -- skipping this for now since it makes a transaction
548 // become *way* too large
549 //journal_modify_block_start(hfsmp->jnl, bp);
550 }
551 bzero((char *)buf_dataptr(bp), blksize);
552
553 buf_markaged(bp);
554
555 // XXXdbg
556 if (hfsmp->jnl) {
557 // XXXdbg -- skipping this for now since it makes a transaction
558 // become *way* too large
559 //journal_modify_block_end(hfsmp->jnl, bp);
560
561 // XXXdbg - remove this once we decide what to do with the
562 // writes to the journal
563 if ((blk % 32) == 0)
564 VNOP_BWRITE(bp);
565 else
566 buf_bawrite(bp);
567 } else {
568 /* wait/yield every 32 blocks so we don't hog all the buffers */
569 if ((blk % 32) == 0)
570 VNOP_BWRITE(bp);
571 else
572 buf_bawrite(bp);
573 }
574 --blkcnt;
575 ++blk;
576 }
577
578 return (0);
579 }
580
581
582 extern char hfs_attrname[];
583
584 /*
585 * Create an HFS+ Attribute B-tree File.
586 *
587 * No global resources should be held.
588 */
589 int
590 hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t nodecnt)
591 {
592 struct vnode* vp = NULLVP;
593 struct cat_desc cndesc;
594 struct cat_attr cnattr;
595 struct cat_fork cfork;
596 BlockDescriptor blkdesc;
597 BTNodeDescriptor *ndp;
598 BTHeaderRec *bthp;
599 BTreeControlBlockPtr btcb = NULL;
600 struct buf *bp = NULL;
601 void * buffer;
602 u_int8_t *bitmap;
603 u_int16_t *index;
604 u_int32_t node_num, num_map_nodes;
605 u_int32_t bytes_per_map_record;
606 u_int32_t temp;
607 u_int16_t offset;
608 int intrans = 0;
609 int result;
610 int newvnode_flags = 0;
611
612 again:
613 /*
614 * Serialize creation using HFS_CREATING_BTREE flag.
615 */
616 lck_mtx_lock(&hfsmp->hfs_mutex);
617 if (hfsmp->hfs_flags & HFS_CREATING_BTREE) {
618 /* Someone else beat us, wait for them to finish. */
619 (void) msleep(hfsmp->hfs_attribute_cp, &hfsmp->hfs_mutex,
620 PDROP | PINOD, "hfs_create_attr_btree", 0);
621 if (hfsmp->hfs_attribute_vp) {
622 return (0);
623 }
624 goto again;
625 }
626 hfsmp->hfs_flags |= HFS_CREATING_BTREE;
627 lck_mtx_unlock(&hfsmp->hfs_mutex);
628
629 /* Check if were out of usable disk space. */
630 if ((hfs_freeblks(hfsmp, 1) == 0)) {
631 result = ENOSPC;
632 goto exit;
633 }
634
635 /*
636 * Set up Attribute B-tree vnode
637 * (this must be done before we start a transaction
638 * or take any system file locks)
639 */
640 bzero(&cndesc, sizeof(cndesc));
641 cndesc.cd_parentcnid = kHFSRootParentID;
642 cndesc.cd_flags |= CD_ISMETA;
643 cndesc.cd_nameptr = (const u_int8_t *)hfs_attrname;
644 cndesc.cd_namelen = strlen(hfs_attrname);
645 cndesc.cd_cnid = kHFSAttributesFileID;
646
647 bzero(&cnattr, sizeof(cnattr));
648 cnattr.ca_linkcount = 1;
649 cnattr.ca_mode = S_IFREG;
650 cnattr.ca_fileid = cndesc.cd_cnid;
651
652 bzero(&cfork, sizeof(cfork));
653 cfork.cf_clump = nodesize * nodecnt;
654
655 result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
656 &cfork, &vp, &newvnode_flags);
657 if (result) {
658 goto exit;
659 }
660 /*
661 * Set up Attribute B-tree control block
662 */
663 MALLOC(btcb, BTreeControlBlock *, sizeof(BTreeControlBlock), M_TEMP, M_WAITOK);
664 bzero(btcb, sizeof(BTreeControlBlock));
665
666 btcb->nodeSize = nodesize;
667 btcb->maxKeyLength = kHFSPlusAttrKeyMaximumLength;
668 btcb->btreeType = 0xFF;
669 btcb->attributes = kBTVariableIndexKeysMask | kBTBigKeysMask;
670 btcb->version = kBTreeVersion;
671 btcb->writeCount = 1;
672 btcb->flags = 0; /* kBTHeaderDirty */
673 btcb->fileRefNum = vp;
674 btcb->getBlockProc = GetBTreeBlock;
675 btcb->releaseBlockProc = ReleaseBTreeBlock;
676 btcb->setEndOfForkProc = ExtendBTreeFile;
677 btcb->keyCompareProc = (KeyCompareProcPtr)hfs_attrkeycompare;
678 VTOF(vp)->fcbBTCBPtr = btcb;
679
680 /*
681 * Allocate some space
682 */
683 if (hfs_start_transaction(hfsmp) != 0) {
684 result = EINVAL;
685 goto exit;
686 }
687 intrans = 1;
688
689 /* Note ExtendBTreeFile will acquire the necessary system file locks. */
690 result = ExtendBTreeFile(vp, nodesize, cfork.cf_clump);
691 if (result)
692 goto exit;
693
694 btcb->totalNodes = VTOF(vp)->ff_size / nodesize;
695
696 /*
697 * Figure out how many map nodes we'll need.
698 *
699 * bytes_per_map_record = the number of bytes in the map record of a
700 * map node. Since that is the only record in the node, it is the size
701 * of the node minus the node descriptor at the start, and two record
702 * offsets at the end of the node. The "- 2" is to round the size down
703 * to a multiple of 4 bytes (since sizeof(BTNodeDescriptor) is not a
704 * multiple of 4).
705 *
706 * The value "temp" here is the number of *bits* in the map record of
707 * the header node.
708 */
709 bytes_per_map_record = nodesize - sizeof(BTNodeDescriptor) - 2*sizeof(u_int16_t) - 2;
710 temp = 8 * (nodesize - sizeof(BTNodeDescriptor)
711 - sizeof(BTHeaderRec)
712 - kBTreeHeaderUserBytes
713 - 4 * sizeof(u_int16_t));
714 if (btcb->totalNodes > temp) {
715 num_map_nodes = howmany(btcb->totalNodes - temp, bytes_per_map_record * 8);
716 }
717 else {
718 num_map_nodes = 0;
719 }
720
721 btcb->freeNodes = btcb->totalNodes - 1 - num_map_nodes;
722
723 /*
724 * Initialize the b-tree header on disk
725 */
726 bp = buf_getblk(vp, 0, nodesize, 0, 0, BLK_META);
727 if (bp == NULL) {
728 result = EIO;
729 goto exit;
730 }
731
732 buffer = (void *)buf_dataptr(bp);
733 blkdesc.buffer = buffer;
734 blkdesc.blockHeader = (void *)bp;
735 blkdesc.blockReadFromDisk = 0;
736 blkdesc.isModified = 0;
737
738 ModifyBlockStart(vp, &blkdesc);
739
740 if (buf_size(bp) != nodesize)
741 panic("hfs_create_attr_btree: bad buffer size (%d)\n", buf_size(bp));
742
743 bzero(buffer, nodesize);
744 index = (u_int16_t *)buffer;
745
746 /* FILL IN THE NODE DESCRIPTOR: */
747 ndp = (BTNodeDescriptor *)buffer;
748 if (num_map_nodes != 0)
749 ndp->fLink = 1;
750 ndp->kind = kBTHeaderNode;
751 ndp->numRecords = 3;
752 offset = sizeof(BTNodeDescriptor);
753 index[(nodesize / 2) - 1] = offset;
754
755 /* FILL IN THE HEADER RECORD: */
756 bthp = (BTHeaderRec *)((u_int8_t *)buffer + offset);
757 bthp->nodeSize = nodesize;
758 bthp->totalNodes = btcb->totalNodes;
759 bthp->freeNodes = btcb->freeNodes;
760 bthp->clumpSize = cfork.cf_clump;
761 bthp->btreeType = 0xFF;
762 bthp->attributes = kBTVariableIndexKeysMask | kBTBigKeysMask;
763 bthp->maxKeyLength = kHFSPlusAttrKeyMaximumLength;
764 bthp->keyCompareType = kHFSBinaryCompare;
765 offset += sizeof(BTHeaderRec);
766 index[(nodesize / 2) - 2] = offset;
767
768 /* FILL IN THE USER RECORD: */
769 offset += kBTreeHeaderUserBytes;
770 index[(nodesize / 2) - 3] = offset;
771
772 /* Mark the header node and map nodes in use in the map record.
773 *
774 * NOTE: Assumes that the header node's map record has at least
775 * (num_map_nodes + 1) bits.
776 */
777 bitmap = (u_int8_t *) buffer + offset;
778 temp = num_map_nodes + 1; /* +1 for the header node */
779 while (temp >= 8) {
780 *(bitmap++) = 0xFF;
781 temp -= 8;
782 }
783 *bitmap = ~(0xFF >> temp);
784
785 offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec)
786 - kBTreeHeaderUserBytes - (4 * sizeof(int16_t));
787 index[(nodesize / 2) - 4] = offset;
788
789 if (hfsmp->jnl) {
790 result = btree_journal_modify_block_end(hfsmp, bp);
791 } else {
792 result = VNOP_BWRITE(bp);
793 }
794 if (result)
795 goto exit;
796
797 /* Create the map nodes: node numbers 1 .. num_map_nodes */
798 for (node_num=1; node_num <= num_map_nodes; ++node_num) {
799 bp = buf_getblk(vp, node_num, nodesize, 0, 0, BLK_META);
800 if (bp == NULL) {
801 result = EIO;
802 goto exit;
803 }
804 buffer = (void *)buf_dataptr(bp);
805 blkdesc.buffer = buffer;
806 blkdesc.blockHeader = (void *)bp;
807 blkdesc.blockReadFromDisk = 0;
808 blkdesc.isModified = 0;
809
810 ModifyBlockStart(vp, &blkdesc);
811
812 bzero(buffer, nodesize);
813 index = (u_int16_t *)buffer;
814
815 /* Fill in the node descriptor */
816 ndp = (BTNodeDescriptor *)buffer;
817 if (node_num != num_map_nodes)
818 ndp->fLink = node_num + 1;
819 ndp->kind = kBTMapNode;
820 ndp->numRecords = 1;
821 offset = sizeof(BTNodeDescriptor);
822 index[(nodesize / 2) - 1] = offset;
823
824
825 /* Fill in the map record's offset */
826 /* Note: We assume that the map record is all zeroes */
827 offset = sizeof(BTNodeDescriptor) + bytes_per_map_record;
828 index[(nodesize / 2) - 2] = offset;
829
830 if (hfsmp->jnl) {
831 result = btree_journal_modify_block_end(hfsmp, bp);
832 } else {
833 result = VNOP_BWRITE(bp);
834 }
835 if (result)
836 goto exit;
837 }
838
839 /* Update vp/cp for attribute btree */
840 lck_mtx_lock(&hfsmp->hfs_mutex);
841 hfsmp->hfs_attribute_cp = VTOC(vp);
842 hfsmp->hfs_attribute_vp = vp;
843 lck_mtx_unlock(&hfsmp->hfs_mutex);
844
845 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
846 exit:
847 if (vp) {
848 hfs_unlock(VTOC(vp));
849 }
850 if (result) {
851 if (btcb) {
852 FREE (btcb, M_TEMP);
853 }
854 if (vp) {
855 vnode_put(vp);
856 }
857 /* XXX need to give back blocks ? */
858 }
859 if (intrans) {
860 hfs_end_transaction(hfsmp);
861 }
862
863 /*
864 * All done, clear HFS_CREATING_BTREE, and wake up any sleepers.
865 */
866 lck_mtx_lock(&hfsmp->hfs_mutex);
867 hfsmp->hfs_flags &= ~HFS_CREATING_BTREE;
868 wakeup((caddr_t)hfsmp->hfs_attribute_cp);
869 lck_mtx_unlock(&hfsmp->hfs_mutex);
870
871 return (result);
872 }
873