]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_btreeio.c
xnu-2050.48.11.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_btreeio.c
1 /*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/buf.h>
32 #include <sys/buf_internal.h>
33 #include <sys/kernel.h>
34 #include <sys/malloc.h>
35 #include <sys/mount.h>
36 #include <sys/vnode.h>
37
38
39 #include "hfs.h"
40 #include "hfs_cnode.h"
41 #include "hfs_dbg.h"
42 #include "hfs_endian.h"
43 #include "hfs_btreeio.h"
44
45 #include "hfscommon/headers/FileMgrInternal.h"
46 #include "hfscommon/headers/BTreesPrivate.h"
47
48 #define FORCESYNCBTREEWRITES 0
49
50 /* From bsd/vfs/vfs_bio.c */
51 extern int bdwrite_internal(struct buf *, int);
52
53 static int ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount);
54 static int btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp);
55
56 void btree_swap_node(struct buf *bp, __unused void *arg);
57
58 /*
59 * Return btree node size for given vnode.
60 *
61 * Returns:
62 * For btree vnode, returns btree node size.
63 * For non-btree vnodes, returns 0.
64 */
65 u_int16_t get_btree_nodesize(struct vnode *vp)
66 {
67 BTreeControlBlockPtr btree;
68 u_int16_t node_size = 0;
69
70 if (vnode_issystem(vp)) {
71 btree = (BTreeControlBlockPtr) VTOF(vp)->fcbBTCBPtr;
72 if (btree) {
73 node_size = btree->nodeSize;
74 }
75 }
76
77 return node_size;
78 }
79
80 OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, __unused ItemCount minBlockCount)
81 {
82 BTreeControlBlockPtr bTreePtr;
83
84 DBG_ASSERT(vp != NULL);
85 DBG_ASSERT(blockSize >= kMinNodeSize);
86 if (blockSize > MAXBSIZE )
87 return (fsBTBadNodeSize);
88
89 bTreePtr = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr;
90 bTreePtr->nodeSize = blockSize;
91
92 return (E_NONE);
93 }
94
95
96 OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions options, BlockDescriptor *block)
97 {
98 OSStatus retval = E_NONE;
99 struct buf *bp = NULL;
100 u_int8_t allow_empty_node;
101
102 /* If the btree block is being read using hint, it is
103 * fine for the swap code to find zeroed out nodes.
104 */
105 if (options & kGetBlockHint) {
106 allow_empty_node = true;
107 } else {
108 allow_empty_node = false;
109 }
110
111 if (options & kGetEmptyBlock) {
112 daddr64_t blkno;
113 off_t offset;
114
115 offset = (daddr64_t)blockNum * (daddr64_t)block->blockSize;
116 bp = buf_getblk(vp, (daddr64_t)blockNum, block->blockSize, 0, 0, BLK_META);
117 if (bp &&
118 VNOP_BLOCKMAP(vp, offset, block->blockSize, &blkno, NULL, NULL, 0, NULL) == 0) {
119 buf_setblkno(bp, blkno);
120 }
121 } else {
122 retval = buf_meta_bread(vp, (daddr64_t)blockNum, block->blockSize, NOCRED, &bp);
123 }
124 if (bp == NULL)
125 retval = -1; //XXX need better error
126
127 if (retval == E_NONE) {
128 block->blockHeader = bp;
129 block->buffer = (char *)buf_dataptr(bp);
130 block->blockNum = buf_lblkno(bp);
131 block->blockReadFromDisk = (buf_fromcache(bp) == 0); /* not found in cache ==> came from disk */
132
133 // XXXdbg
134 block->isModified = 0;
135
136 /* Check and endian swap B-Tree node (only if it's a valid block) */
137 if (!(options & kGetEmptyBlock)) {
138 /* This happens when we first open the b-tree, we might not have all the node data on hand */
139 if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) &&
140 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) &&
141 (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) {
142
143 /*
144 * Don't swap the node descriptor, record offsets, or other records.
145 * This record will be invalidated and re-read with the correct node
146 * size once the B-tree control block is set up with the node size
147 * from the header record.
148 */
149 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly, allow_empty_node);
150
151 } else if (block->blockReadFromDisk) {
152 /*
153 * The node was just read from disk, so always swap/check it.
154 * This is necessary on big endian since the test below won't trigger.
155 */
156 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node);
157 } else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) {
158 /*
159 * The node was left in the cache in non-native order, so swap it.
160 * This only happens on little endian, after the node is written
161 * back to disk.
162 */
163 retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node);
164 }
165
166 /*
167 * If we got an error, then the node is only partially swapped.
168 * We mark the buffer invalid so that the next attempt to get the
169 * node will read it and attempt to swap again, and will notice
170 * the error again. If we didn't do this, the next attempt to get
171 * the node might use the partially swapped node as-is.
172 */
173 if (retval)
174 buf_markinvalid(bp);
175 }
176 }
177
178 if (retval) {
179 if (bp)
180 buf_brelse(bp);
181 block->blockHeader = NULL;
182 block->buffer = NULL;
183 }
184
185 return (retval);
186 }
187
188
189 void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr)
190 {
191 struct hfsmount *hfsmp = VTOHFS(vp);
192 struct buf *bp = NULL;
193
194 if (hfsmp->jnl == NULL) {
195 return;
196 }
197
198 bp = (struct buf *) blockPtr->blockHeader;
199 if (bp == NULL) {
200 panic("hfs: ModifyBlockStart: null bp for blockdescptr %p?!?\n", blockPtr);
201 return;
202 }
203
204 journal_modify_block_start(hfsmp->jnl, bp);
205 blockPtr->isModified = 1;
206 }
207
208 void
209 btree_swap_node(struct buf *bp, __unused void *arg)
210 {
211 // struct hfsmount *hfsmp = (struct hfsmount *)arg;
212 int retval;
213 struct vnode *vp = buf_vnode(bp);
214 BlockDescriptor block;
215
216 /* Prepare the block pointer */
217 block.blockHeader = bp;
218 block.buffer = (char *)buf_dataptr(bp);
219 block.blockNum = buf_lblkno(bp);
220 /* not found in cache ==> came from disk */
221 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
222 block.blockSize = buf_count(bp);
223
224 /* Swap the data now that this node is ready to go to disk.
225 * We allow swapping of zeroed out nodes here because we might
226 * be writing node whose last record just got deleted.
227 */
228 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, true);
229 if (retval)
230 panic("hfs: btree_swap_node: about to write corrupt node!\n");
231 }
232
233
234 static int
235 btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp)
236 {
237 return journal_modify_block_end(hfsmp->jnl, bp, btree_swap_node, hfsmp);
238 }
239
240
241 OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options)
242 {
243 struct hfsmount *hfsmp = VTOHFS(vp);
244 OSStatus retval = E_NONE;
245 struct buf *bp = NULL;
246
247 bp = (struct buf *) blockPtr->blockHeader;
248
249 if (bp == NULL) {
250 retval = -1;
251 goto exit;
252 }
253
254 if (options & kTrashBlock) {
255 buf_markinvalid(bp);
256
257 if (hfsmp->jnl && (buf_flags(bp) & B_LOCKED)) {
258 journal_kill_block(hfsmp->jnl, bp);
259 } else {
260 buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
261 }
262
263 /* Don't let anyone else try to use this bp, it's been consumed */
264 blockPtr->blockHeader = NULL;
265
266 } else {
267 if (options & kForceWriteBlock) {
268 if (hfsmp->jnl) {
269 if (blockPtr->isModified == 0) {
270 panic("hfs: releaseblock: modified is 0 but forcewrite set! bp %p\n", bp);
271 }
272
273 retval = btree_journal_modify_block_end(hfsmp, bp);
274 blockPtr->isModified = 0;
275 } else {
276 retval = VNOP_BWRITE(bp);
277 }
278
279 /* Don't let anyone else try to use this bp, it's been consumed */
280 blockPtr->blockHeader = NULL;
281
282 } else if (options & kMarkBlockDirty) {
283 struct timeval tv;
284 microuptime(&tv);
285 if ((options & kLockTransaction) && hfsmp->jnl == NULL) {
286 /*
287 *
288 * Set the B_LOCKED flag and unlock the buffer, causing buf_brelse to move
289 * the buffer onto the LOCKED free list. This is necessary, otherwise
290 * getnewbuf() would try to reclaim the buffers using buf_bawrite, which
291 * isn't going to work.
292 *
293 */
294 /* Don't hog all the buffers... */
295 if (count_lock_queue() > kMaxLockedMetaBuffers) {
296 hfs_btsync(vp, HFS_SYNCTRANS);
297 /* Rollback sync time to cause a sync on lock release... */
298 (void) BTSetLastSync(VTOF(vp), tv.tv_sec - (kMaxSecsForFsync + 1));
299 }
300 buf_setflags(bp, B_LOCKED);
301 }
302
303 /*
304 * Delay-write this block.
305 * If the maximum delayed buffers has been exceeded then
306 * free up some buffers and fall back to an asynchronous write.
307 */
308 if (hfsmp->jnl) {
309 if (blockPtr->isModified == 0) {
310 panic("hfs: releaseblock: modified is 0 but markdirty set! bp %p\n", bp);
311 }
312 retval = btree_journal_modify_block_end(hfsmp, bp);
313 blockPtr->isModified = 0;
314 } else if (bdwrite_internal(bp, 1) != 0) {
315 hfs_btsync(vp, 0);
316 /* Rollback sync time to cause a sync on lock release... */
317 (void) BTSetLastSync(VTOF(vp), tv.tv_sec - (kMaxSecsForFsync + 1));
318
319 buf_clearflags(bp, B_LOCKED);
320 buf_bawrite(bp);
321 }
322
323 /* Don't let anyone else try to use this bp, it's been consumed */
324 blockPtr->blockHeader = NULL;
325
326 } else {
327 // check if we had previously called journal_modify_block_start()
328 // on this block and if so, abort it (which will call buf_brelse()).
329 if (hfsmp->jnl && blockPtr->isModified) {
330 // XXXdbg - I don't want to call modify_block_abort()
331 // because I think it may be screwing up the
332 // journal and blowing away a block that has
333 // valid data in it.
334 //
335 // journal_modify_block_abort(hfsmp->jnl, bp);
336 //panic("hfs: releaseblock called for 0x%x but mod_block_start previously called.\n", bp);
337 btree_journal_modify_block_end(hfsmp, bp);
338 blockPtr->isModified = 0;
339 } else {
340 buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
341 }
342
343 /* Don't let anyone else try to use this bp, it's been consumed */
344 blockPtr->blockHeader = NULL;
345 }
346 }
347
348 exit:
349 return (retval);
350 }
351
352
353 OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF)
354 {
355 #pragma unused (maxEOF)
356
357 OSStatus retval = 0, ret = 0;
358 int64_t actualBytesAdded, origSize;
359 u_int64_t bytesToAdd;
360 u_int32_t startAllocation;
361 u_int32_t fileblocks;
362 BTreeInfoRec btInfo;
363 ExtendedVCB *vcb;
364 FCB *filePtr;
365 struct proc *p = NULL;
366 int64_t trim = 0;
367 int lockflags = 0;
368
369 filePtr = GetFileControlBlock(vp);
370
371 if ( (off_t)minEOF > filePtr->fcbEOF )
372 {
373 bytesToAdd = minEOF - filePtr->fcbEOF;
374
375 if (bytesToAdd < filePtr->ff_clumpsize)
376 bytesToAdd = filePtr->ff_clumpsize; //XXX why not always be a mutiple of clump size?
377 }
378 else
379 {
380 return -1;
381 }
382
383 vcb = VTOVCB(vp);
384
385 /*
386 * The Extents B-tree can't have overflow extents. ExtendFileC will
387 * return an error if an attempt is made to extend the Extents B-tree
388 * when the resident extents are exhausted.
389 */
390
391 /* Protect allocation bitmap and extents overflow file. */
392 lockflags = SFL_BITMAP;
393 if (VTOC(vp)->c_fileid != kHFSExtentsFileID)
394 lockflags |= SFL_EXTENTS;
395 lockflags = hfs_systemfile_lock(vcb, lockflags, HFS_EXCLUSIVE_LOCK);
396
397 (void) BTGetInformation(filePtr, 0, &btInfo);
398
399 #if 0 // XXXdbg
400 /*
401 * The b-tree code expects nodes to be contiguous. So when
402 * the allocation block size is less than the b-tree node
403 * size, we need to force disk allocations to be contiguous.
404 */
405 if (vcb->blockSize >= btInfo.nodeSize) {
406 extendFlags = 0;
407 } else {
408 /* Ensure that all b-tree nodes are contiguous on disk */
409 extendFlags = kEFContigMask;
410 }
411 #endif
412
413 origSize = filePtr->fcbEOF;
414 fileblocks = filePtr->ff_blocks;
415 startAllocation = vcb->nextAllocation;
416
417 // loop trying to get a contiguous chunk that's an integer multiple
418 // of the btree node size. if we can't get a contiguous chunk that
419 // is at least the node size then we break out of the loop and let
420 // the error propagate back up.
421 while((off_t)bytesToAdd >= btInfo.nodeSize) {
422 do {
423 retval = ExtendFileC(vcb, filePtr, bytesToAdd, 0,
424 kEFContigMask | kEFMetadataMask | kEFNoClumpMask,
425 (int64_t *)&actualBytesAdded);
426 if (retval == dskFulErr && actualBytesAdded == 0) {
427 bytesToAdd >>= 1;
428 if (bytesToAdd < btInfo.nodeSize) {
429 break;
430 } else if ((bytesToAdd % btInfo.nodeSize) != 0) {
431 // make sure it's an integer multiple of the nodeSize
432 bytesToAdd -= (bytesToAdd % btInfo.nodeSize);
433 }
434 }
435 } while (retval == dskFulErr && actualBytesAdded == 0);
436
437 if (retval == dskFulErr && actualBytesAdded == 0 && bytesToAdd <= btInfo.nodeSize) {
438 break;
439 }
440
441 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
442 bytesToAdd = minEOF - filePtr->fcbEOF;
443 }
444
445 /*
446 * If a new extent was added then move the roving allocator
447 * reference forward by the current b-tree file size so
448 * there's plenty of room to grow.
449 */
450 if ((retval == 0) &&
451 ((VCBTOHFS(vcb)->hfs_flags & HFS_METADATA_ZONE) == 0) &&
452 (vcb->nextAllocation > startAllocation) &&
453 ((vcb->nextAllocation + fileblocks) < vcb->allocLimit)) {
454 HFS_UPDATE_NEXT_ALLOCATION(vcb, vcb->nextAllocation + fileblocks);
455 }
456
457 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
458
459 // XXXdbg ExtendFileC() could have returned an error even though
460 // it grew the file to be big enough for our needs. If this is
461 // the case, we don't care about retval so we blow it away.
462 //
463 if (filePtr->fcbEOF >= (off_t)minEOF && retval != 0) {
464 retval = 0;
465 }
466
467 // XXXdbg if the file grew but isn't large enough or isn't an
468 // even multiple of the nodeSize then trim things back. if
469 // the file isn't large enough we trim back to the original
470 // size. otherwise we trim back to be an even multiple of the
471 // btree node size.
472 //
473 if ((filePtr->fcbEOF < (off_t)minEOF) || ((filePtr->fcbEOF - origSize) % btInfo.nodeSize) != 0) {
474
475 if (filePtr->fcbEOF < (off_t)minEOF) {
476 retval = dskFulErr;
477
478 if (filePtr->fcbEOF < origSize) {
479 panic("hfs: btree file eof %lld less than orig size %lld!\n",
480 filePtr->fcbEOF, origSize);
481 }
482
483 trim = filePtr->fcbEOF - origSize;
484 } else {
485 trim = ((filePtr->fcbEOF - origSize) % btInfo.nodeSize);
486 }
487
488 ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0, 0, FTOC(filePtr)->c_fileid, 0);
489 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
490
491 // XXXdbg - panic if the file didn't get trimmed back properly
492 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
493 panic("hfs: truncate file didn't! fcbEOF %lld nsize %d fcb %p\n",
494 filePtr->fcbEOF, btInfo.nodeSize, filePtr);
495 }
496
497 if (ret) {
498 // XXXdbg - this probably doesn't need to be a panic()
499 panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %ld)\n",
500 filePtr->fcbEOF, trim, (long)ret);
501 goto out;
502 }
503 }
504
505 if(VTOC(vp)->c_fileid != kHFSExtentsFileID) {
506 /*
507 * Get any extents overflow b-tree changes to disk ASAP!
508 */
509 (void) BTFlushPath(VTOF(vcb->extentsRefNum));
510 (void) hfs_fsync(vcb->extentsRefNum, MNT_WAIT, 0, p);
511 }
512 hfs_systemfile_unlock(vcb, lockflags);
513 lockflags = 0;
514
515 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
516 panic("hfs: extendbtree: fcb %p has eof 0x%llx not a multiple of 0x%x (trim %llx)\n",
517 filePtr, filePtr->fcbEOF, btInfo.nodeSize, trim);
518 }
519
520 /*
521 * Update the Alternate MDB or Alternate VolumeHeader
522 */
523 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
524 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
525 (VTOC(vp)->c_fileid == kHFSAttributesFileID)
526 ) {
527 VTOC(vp)->c_flag |= C_MODIFIED;
528 MarkVCBDirty( vcb );
529 ret = hfs_flushvolumeheader(VCBTOHFS(vcb), MNT_WAIT, HFS_ALTFLUSH);
530 } else {
531 VTOC(vp)->c_touch_chgtime = TRUE;
532 VTOC(vp)->c_touch_modtime = TRUE;
533 (void) hfs_update(vp, TRUE);
534 }
535
536 ret = ClearBTNodes(vp, btInfo.nodeSize, origSize, (filePtr->fcbEOF - origSize));
537 out:
538 if (retval == 0)
539 retval = ret;
540
541 if (lockflags)
542 hfs_systemfile_unlock(vcb, lockflags);
543
544 return retval;
545 }
546
547
548 /*
549 * Clear out (zero) new b-tree nodes on disk.
550 */
551 static int
552 ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount)
553 {
554 struct hfsmount *hfsmp = VTOHFS(vp);
555 struct buf *bp = NULL;
556 daddr64_t blk;
557 daddr64_t blkcnt;
558
559 blk = offset / blksize;
560 blkcnt = amount / blksize;
561
562 while (blkcnt > 0) {
563 bp = buf_getblk(vp, blk, blksize, 0, 0, BLK_META);
564 if (bp == NULL)
565 continue;
566
567 // XXXdbg
568 if (hfsmp->jnl) {
569 // XXXdbg -- skipping this for now since it makes a transaction
570 // become *way* too large
571 //journal_modify_block_start(hfsmp->jnl, bp);
572 }
573 bzero((char *)buf_dataptr(bp), blksize);
574
575 buf_markaged(bp);
576
577 // XXXdbg
578 if (hfsmp->jnl) {
579 // XXXdbg -- skipping this for now since it makes a transaction
580 // become *way* too large
581 //journal_modify_block_end(hfsmp->jnl, bp);
582
583 // XXXdbg - remove this once we decide what to do with the
584 // writes to the journal
585 if ((blk % 32) == 0)
586 VNOP_BWRITE(bp);
587 else
588 buf_bawrite(bp);
589 } else {
590 /* wait/yield every 32 blocks so we don't hog all the buffers */
591 if ((blk % 32) == 0)
592 VNOP_BWRITE(bp);
593 else
594 buf_bawrite(bp);
595 }
596 --blkcnt;
597 ++blk;
598 }
599
600 return (0);
601 }
602
603
604 extern char hfs_attrname[];
605
606 /*
607 * Create an HFS+ Attribute B-tree File.
608 *
609 * No global resources should be held.
610 */
611 int
612 hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t nodecnt)
613 {
614 struct vnode* vp = NULLVP;
615 struct cat_desc cndesc;
616 struct cat_attr cnattr;
617 struct cat_fork cfork;
618 BlockDescriptor blkdesc;
619 BTNodeDescriptor *ndp;
620 BTHeaderRec *bthp;
621 BTreeControlBlockPtr btcb = NULL;
622 struct buf *bp = NULL;
623 void * buffer;
624 u_int8_t *bitmap;
625 u_int16_t *index;
626 u_int32_t node_num, num_map_nodes;
627 u_int32_t bytes_per_map_record;
628 u_int32_t temp;
629 u_int16_t offset;
630 int intrans = 0;
631 int result;
632 int newvnode_flags = 0;
633
634 again:
635 /*
636 * Serialize creation using HFS_CREATING_BTREE flag.
637 */
638 lck_mtx_lock(&hfsmp->hfs_mutex);
639 if (hfsmp->hfs_flags & HFS_CREATING_BTREE) {
640 /* Someone else beat us, wait for them to finish. */
641 (void) msleep(hfsmp->hfs_attribute_cp, &hfsmp->hfs_mutex,
642 PDROP | PINOD, "hfs_create_attr_btree", 0);
643 if (hfsmp->hfs_attribute_vp) {
644 return (0);
645 }
646 goto again;
647 }
648 hfsmp->hfs_flags |= HFS_CREATING_BTREE;
649 lck_mtx_unlock(&hfsmp->hfs_mutex);
650
651 /* Check if were out of usable disk space. */
652 if ((hfs_freeblks(hfsmp, 1) == 0)) {
653 result = ENOSPC;
654 goto exit;
655 }
656
657 /*
658 * Set up Attribute B-tree vnode
659 * (this must be done before we start a transaction
660 * or take any system file locks)
661 */
662 bzero(&cndesc, sizeof(cndesc));
663 cndesc.cd_parentcnid = kHFSRootParentID;
664 cndesc.cd_flags |= CD_ISMETA;
665 cndesc.cd_nameptr = (const u_int8_t *)hfs_attrname;
666 cndesc.cd_namelen = strlen(hfs_attrname);
667 cndesc.cd_cnid = kHFSAttributesFileID;
668
669 bzero(&cnattr, sizeof(cnattr));
670 cnattr.ca_linkcount = 1;
671 cnattr.ca_mode = S_IFREG;
672 cnattr.ca_fileid = cndesc.cd_cnid;
673
674 bzero(&cfork, sizeof(cfork));
675 cfork.cf_clump = nodesize * nodecnt;
676
677 result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
678 &cfork, &vp, &newvnode_flags);
679 if (result) {
680 goto exit;
681 }
682 /*
683 * Set up Attribute B-tree control block
684 */
685 MALLOC(btcb, BTreeControlBlock *, sizeof(BTreeControlBlock), M_TEMP, M_WAITOK);
686 bzero(btcb, sizeof(BTreeControlBlock));
687
688 btcb->nodeSize = nodesize;
689 btcb->maxKeyLength = kHFSPlusAttrKeyMaximumLength;
690 btcb->btreeType = 0xFF;
691 btcb->attributes = kBTVariableIndexKeysMask | kBTBigKeysMask;
692 btcb->version = kBTreeVersion;
693 btcb->writeCount = 1;
694 btcb->flags = 0; /* kBTHeaderDirty */
695 btcb->fileRefNum = vp;
696 btcb->getBlockProc = GetBTreeBlock;
697 btcb->releaseBlockProc = ReleaseBTreeBlock;
698 btcb->setEndOfForkProc = ExtendBTreeFile;
699 btcb->keyCompareProc = (KeyCompareProcPtr)hfs_attrkeycompare;
700 VTOF(vp)->fcbBTCBPtr = btcb;
701
702 /*
703 * Allocate some space
704 */
705 if (hfs_start_transaction(hfsmp) != 0) {
706 result = EINVAL;
707 goto exit;
708 }
709 intrans = 1;
710
711 /* Note ExtendBTreeFile will acquire the necessary system file locks. */
712 result = ExtendBTreeFile(vp, nodesize, cfork.cf_clump);
713 if (result)
714 goto exit;
715
716 btcb->totalNodes = VTOF(vp)->ff_size / nodesize;
717
718 /*
719 * Figure out how many map nodes we'll need.
720 *
721 * bytes_per_map_record = the number of bytes in the map record of a
722 * map node. Since that is the only record in the node, it is the size
723 * of the node minus the node descriptor at the start, and two record
724 * offsets at the end of the node. The "- 2" is to round the size down
725 * to a multiple of 4 bytes (since sizeof(BTNodeDescriptor) is not a
726 * multiple of 4).
727 *
728 * The value "temp" here is the number of *bits* in the map record of
729 * the header node.
730 */
731 bytes_per_map_record = nodesize - sizeof(BTNodeDescriptor) - 2*sizeof(u_int16_t) - 2;
732 temp = 8 * (nodesize - sizeof(BTNodeDescriptor)
733 - sizeof(BTHeaderRec)
734 - kBTreeHeaderUserBytes
735 - 4 * sizeof(u_int16_t));
736 if (btcb->totalNodes > temp) {
737 num_map_nodes = howmany(btcb->totalNodes - temp, bytes_per_map_record * 8);
738 }
739 else {
740 num_map_nodes = 0;
741 }
742
743 btcb->freeNodes = btcb->totalNodes - 1 - num_map_nodes;
744
745 /*
746 * Initialize the b-tree header on disk
747 */
748 bp = buf_getblk(vp, 0, nodesize, 0, 0, BLK_META);
749 if (bp == NULL) {
750 result = EIO;
751 goto exit;
752 }
753
754 buffer = (void *)buf_dataptr(bp);
755 blkdesc.buffer = buffer;
756 blkdesc.blockHeader = (void *)bp;
757 blkdesc.blockReadFromDisk = 0;
758 blkdesc.isModified = 0;
759
760 ModifyBlockStart(vp, &blkdesc);
761
762 if (buf_size(bp) != nodesize)
763 panic("hfs_create_attr_btree: bad buffer size (%d)\n", buf_size(bp));
764
765 bzero(buffer, nodesize);
766 index = (u_int16_t *)buffer;
767
768 /* FILL IN THE NODE DESCRIPTOR: */
769 ndp = (BTNodeDescriptor *)buffer;
770 if (num_map_nodes != 0)
771 ndp->fLink = 1;
772 ndp->kind = kBTHeaderNode;
773 ndp->numRecords = 3;
774 offset = sizeof(BTNodeDescriptor);
775 index[(nodesize / 2) - 1] = offset;
776
777 /* FILL IN THE HEADER RECORD: */
778 bthp = (BTHeaderRec *)((u_int8_t *)buffer + offset);
779 bthp->nodeSize = nodesize;
780 bthp->totalNodes = btcb->totalNodes;
781 bthp->freeNodes = btcb->freeNodes;
782 bthp->clumpSize = cfork.cf_clump;
783 bthp->btreeType = 0xFF;
784 bthp->attributes = kBTVariableIndexKeysMask | kBTBigKeysMask;
785 bthp->maxKeyLength = kHFSPlusAttrKeyMaximumLength;
786 bthp->keyCompareType = kHFSBinaryCompare;
787 offset += sizeof(BTHeaderRec);
788 index[(nodesize / 2) - 2] = offset;
789
790 /* FILL IN THE USER RECORD: */
791 offset += kBTreeHeaderUserBytes;
792 index[(nodesize / 2) - 3] = offset;
793
794 /* Mark the header node and map nodes in use in the map record.
795 *
796 * NOTE: Assumes that the header node's map record has at least
797 * (num_map_nodes + 1) bits.
798 */
799 bitmap = (u_int8_t *) buffer + offset;
800 temp = num_map_nodes + 1; /* +1 for the header node */
801 while (temp >= 8) {
802 *(bitmap++) = 0xFF;
803 temp -= 8;
804 }
805 *bitmap = ~(0xFF >> temp);
806
807 offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec)
808 - kBTreeHeaderUserBytes - (4 * sizeof(int16_t));
809 index[(nodesize / 2) - 4] = offset;
810
811 if (hfsmp->jnl) {
812 result = btree_journal_modify_block_end(hfsmp, bp);
813 } else {
814 result = VNOP_BWRITE(bp);
815 }
816 if (result)
817 goto exit;
818
819 /* Create the map nodes: node numbers 1 .. num_map_nodes */
820 for (node_num=1; node_num <= num_map_nodes; ++node_num) {
821 bp = buf_getblk(vp, node_num, nodesize, 0, 0, BLK_META);
822 if (bp == NULL) {
823 result = EIO;
824 goto exit;
825 }
826 buffer = (void *)buf_dataptr(bp);
827 blkdesc.buffer = buffer;
828 blkdesc.blockHeader = (void *)bp;
829 blkdesc.blockReadFromDisk = 0;
830 blkdesc.isModified = 0;
831
832 ModifyBlockStart(vp, &blkdesc);
833
834 bzero(buffer, nodesize);
835 index = (u_int16_t *)buffer;
836
837 /* Fill in the node descriptor */
838 ndp = (BTNodeDescriptor *)buffer;
839 if (node_num != num_map_nodes)
840 ndp->fLink = node_num + 1;
841 ndp->kind = kBTMapNode;
842 ndp->numRecords = 1;
843 offset = sizeof(BTNodeDescriptor);
844 index[(nodesize / 2) - 1] = offset;
845
846
847 /* Fill in the map record's offset */
848 /* Note: We assume that the map record is all zeroes */
849 offset = sizeof(BTNodeDescriptor) + bytes_per_map_record;
850 index[(nodesize / 2) - 2] = offset;
851
852 if (hfsmp->jnl) {
853 result = btree_journal_modify_block_end(hfsmp, bp);
854 } else {
855 result = VNOP_BWRITE(bp);
856 }
857 if (result)
858 goto exit;
859 }
860
861 /* Update vp/cp for attribute btree */
862 lck_mtx_lock(&hfsmp->hfs_mutex);
863 hfsmp->hfs_attribute_cp = VTOC(vp);
864 hfsmp->hfs_attribute_vp = vp;
865 lck_mtx_unlock(&hfsmp->hfs_mutex);
866
867 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
868
869 if (intrans) {
870 hfs_end_transaction(hfsmp);
871 intrans = 0;
872 }
873
874 /* Initialize the vnode for virtual attribute data file */
875 result = init_attrdata_vnode(hfsmp);
876 if (result) {
877 printf("hfs_create_attr_btree: init_attrdata_vnode() error=%d\n", result);
878 }
879
880 exit:
881 if (vp) {
882 hfs_unlock(VTOC(vp));
883 }
884 if (result) {
885 if (btcb) {
886 FREE (btcb, M_TEMP);
887 }
888 if (vp) {
889 vnode_put(vp);
890 }
891 /* XXX need to give back blocks ? */
892 }
893 if (intrans) {
894 hfs_end_transaction(hfsmp);
895 }
896
897 /*
898 * All done, clear HFS_CREATING_BTREE, and wake up any sleepers.
899 */
900 lck_mtx_lock(&hfsmp->hfs_mutex);
901 hfsmp->hfs_flags &= ~HFS_CREATING_BTREE;
902 wakeup((caddr_t)hfsmp->hfs_attribute_cp);
903 lck_mtx_unlock(&hfsmp->hfs_mutex);
904
905 return (result);
906 }
907