]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_btreeio.c
xnu-517.3.7.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_btreeio.c
1 /*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/buf.h>
29 #include <sys/kernel.h>
30 #include <sys/mount.h>
31 #include <sys/vnode.h>
32
33
34 #include "hfs.h"
35 #include "hfs_cnode.h"
36 #include "hfs_dbg.h"
37 #include "hfs_endian.h"
38
39 #include "hfscommon/headers/FileMgrInternal.h"
40 #include "hfscommon/headers/BTreesPrivate.h"
41
42 #define FORCESYNCBTREEWRITES 0
43
44
45 static int ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount);
46
47
48 __private_extern__
49 OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, ItemCount minBlockCount)
50 {
51 BTreeControlBlockPtr bTreePtr;
52
53 DBG_ASSERT(vp != NULL);
54 DBG_ASSERT(blockSize >= kMinNodeSize);
55 if (blockSize > MAXBSIZE )
56 return (fsBTBadNodeSize);
57
58 bTreePtr = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr;
59 bTreePtr->nodeSize = blockSize;
60
61 return (E_NONE);
62 }
63
64
65 __private_extern__
66 OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions options, BlockDescriptor *block)
67 {
68 OSStatus retval = E_NONE;
69 struct buf *bp = NULL;
70
71 if (options & kGetEmptyBlock)
72 bp = getblk(vp, blockNum, block->blockSize, 0, 0, BLK_META);
73 else
74 retval = meta_bread(vp, blockNum, block->blockSize, NOCRED, &bp);
75
76 DBG_ASSERT(bp != NULL);
77 DBG_ASSERT(bp->b_data != NULL);
78 DBG_ASSERT(bp->b_bcount == block->blockSize);
79 DBG_ASSERT(bp->b_lblkno == blockNum);
80
81 if (bp == NULL)
82 retval = -1; //XXX need better error
83
84 if (retval == E_NONE) {
85 block->blockHeader = bp;
86 block->buffer = bp->b_data;
87 block->blockReadFromDisk = (bp->b_flags & B_CACHE) == 0; /* not found in cache ==> came from disk */
88
89 // XXXdbg
90 block->isModified = 0;
91
92 #if BYTE_ORDER == LITTLE_ENDIAN
93 /* Endian swap B-Tree node (only if it's a valid block) */
94 if (!(options & kGetEmptyBlock)) {
95 /* This happens when we first open the b-tree, we might not have all the node data on hand */
96 if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) &&
97 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != bp->b_bcount) &&
98 (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != bp->b_bcount)) {
99
100 /* Don't swap the descriptors at all, we don't care (this block will be invalidated) */
101 SWAP_BT_NODE (block, ISHFSPLUS(VTOVCB(vp)), VTOC(vp)->c_fileid, 3);
102
103 /* The node needs swapping */
104 } else if (*((UInt16 *)((char *)block->buffer + (block->blockSize - sizeof (UInt16)))) == 0x0e00) {
105 SWAP_BT_NODE (block, ISHFSPLUS(VTOVCB(vp)), VTOC(vp)->c_fileid, 0);
106 #if 0
107 /* The node is not already in native byte order, hence corrupt */
108 } else if (*((UInt16 *)((char *)block->buffer + (block->blockSize - sizeof (UInt16)))) != 0x000e) {
109 panic ("%s Corrupt B-Tree node detected!\n", "GetBTreeBlock:");
110 #endif
111 }
112 }
113 #endif
114 } else {
115 if (bp)
116 brelse(bp);
117 block->blockHeader = NULL;
118 block->buffer = NULL;
119 }
120
121 return (retval);
122 }
123
124
125 __private_extern__
126 void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr)
127 {
128 struct hfsmount *hfsmp = VTOHFS(vp);
129 struct buf *bp = NULL;
130
131 if (hfsmp->jnl == NULL) {
132 return;
133 }
134
135 bp = (struct buf *) blockPtr->blockHeader;
136 if (bp == NULL) {
137 panic("ModifyBlockStart: null bp for blockdescptr 0x%x?!?\n", blockPtr);
138 return;
139 }
140
141 journal_modify_block_start(hfsmp->jnl, bp);
142 blockPtr->isModified = 1;
143 }
144
145 static int
146 btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp)
147 {
148 #if BYTE_ORDER == LITTLE_ENDIAN
149 struct vnode *vp = bp->b_vp;
150 BlockDescriptor block;
151
152 /* Prepare the block pointer */
153 block.blockHeader = bp;
154 block.buffer = bp->b_data;
155 /* not found in cache ==> came from disk */
156 block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;
157 block.blockSize = bp->b_bcount;
158
159 // XXXdbg have to swap the data before it goes in the journal
160 SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
161 #endif
162
163 return journal_modify_block_end(hfsmp->jnl, bp);
164 }
165
166
167 __private_extern__
168 OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options)
169 {
170 struct hfsmount *hfsmp = VTOHFS(vp);
171 extern int bdwrite_internal(struct buf *, int);
172 OSStatus retval = E_NONE;
173 struct buf *bp = NULL;
174
175 bp = (struct buf *) blockPtr->blockHeader;
176
177 if (bp == NULL) {
178 retval = -1;
179 goto exit;
180 }
181
182 if (options & kTrashBlock) {
183 bp->b_flags |= B_INVAL;
184 if (hfsmp->jnl && (bp->b_flags & B_LOCKED)) {
185 journal_kill_block(hfsmp->jnl, bp);
186 } else {
187 brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
188 }
189 } else {
190 if (options & kForceWriteBlock) {
191 if (hfsmp->jnl) {
192 if (blockPtr->isModified == 0) {
193 panic("hfs: releaseblock: modified is 0 but forcewrite set! bp 0x%x\n", bp);
194 }
195
196 retval = btree_journal_modify_block_end(hfsmp, bp);
197 blockPtr->isModified = 0;
198 } else {
199 retval = VOP_BWRITE(bp);
200 }
201 } else if (options & kMarkBlockDirty) {
202 if ((options & kLockTransaction) && hfsmp->jnl == NULL) {
203 /*
204 *
205 * Set the B_LOCKED flag and unlock the buffer, causing brelse to move
206 * the buffer onto the LOCKED free list. This is necessary, otherwise
207 * getnewbuf() would try to reclaim the buffers using bawrite, which
208 * isn't going to work.
209 *
210 */
211 extern int count_lock_queue __P((void));
212 /* Don't hog all the buffers... */
213 if (count_lock_queue() > kMaxLockedMetaBuffers) {
214 hfs_btsync(vp, HFS_SYNCTRANS);
215 /* Rollback sync time to cause a sync on lock release... */
216 (void) BTSetLastSync(VTOF(vp), time.tv_sec - (kMaxSecsForFsync + 1));
217 }
218
219 bp->b_flags |= B_LOCKED;
220 }
221
222 /*
223 * Delay-write this block.
224 * If the maximum delayed buffers has been exceeded then
225 * free up some buffers and fall back to an asynchronous write.
226 */
227 if (hfsmp->jnl) {
228 if (blockPtr->isModified == 0) {
229 panic("hfs: releaseblock: modified is 0 but markdirty set! bp 0x%x\n", bp);
230 }
231 retval = btree_journal_modify_block_end(hfsmp, bp);
232 blockPtr->isModified = 0;
233 } else if (bdwrite_internal(bp, 1) != 0) {
234 hfs_btsync(vp, 0);
235 /* Rollback sync time to cause a sync on lock release... */
236 (void) BTSetLastSync(VTOF(vp), time.tv_sec - (kMaxSecsForFsync + 1));
237 bp->b_flags &= ~B_LOCKED;
238 bawrite(bp);
239 }
240 } else {
241 // check if we had previously called journal_modify_block_start()
242 // on this block and if so, abort it (which will call brelse()).
243 if (hfsmp->jnl && blockPtr->isModified) {
244 // XXXdbg - I don't want to call modify_block_abort()
245 // because I think it may be screwing up the
246 // journal and blowing away a block that has
247 // valid data in it.
248 //
249 // journal_modify_block_abort(hfsmp->jnl, bp);
250 //panic("hfs: releaseblock called for 0x%x but mod_block_start previously called.\n", bp);
251 btree_journal_modify_block_end(hfsmp, bp);
252 blockPtr->isModified = 0;
253 } else {
254 brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
255 }
256 };
257 };
258
259 exit:
260 return (retval);
261 }
262
263
264 __private_extern__
265 OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF)
266 {
267 #pragma unused (maxEOF)
268
269 OSStatus retval, ret;
270 UInt64 actualBytesAdded, origSize;
271 UInt64 bytesToAdd;
272 u_int32_t startAllocation;
273 u_int32_t fileblocks;
274 BTreeInfoRec btInfo;
275 ExtendedVCB *vcb;
276 FCB *filePtr;
277 struct proc *p = NULL;
278 UInt64 trim = 0;
279
280 filePtr = GetFileControlBlock(vp);
281
282 if ( minEOF > filePtr->fcbEOF )
283 {
284 bytesToAdd = minEOF - filePtr->fcbEOF;
285
286 if (bytesToAdd < filePtr->ff_clumpsize)
287 bytesToAdd = filePtr->ff_clumpsize; //XXX why not always be a mutiple of clump size?
288 }
289 else
290 {
291 return -1;
292 }
293
294 vcb = VTOVCB(vp);
295
296 /*
297 * The Extents B-tree can't have overflow extents. ExtendFileC will
298 * return an error if an attempt is made to extend the Extents B-tree
299 * when the resident extents are exhausted.
300 */
301 /* XXX warning - this can leave the volume bitmap unprotected during ExtendFileC call */
302 if(VTOC(vp)->c_fileid != kHFSExtentsFileID)
303 {
304 p = current_proc();
305 /* lock extents b-tree (also protects volume bitmap) */
306 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, p);
307 if (retval)
308 return (retval);
309 }
310
311 (void) BTGetInformation(filePtr, 0, &btInfo);
312
313 #if 0 // XXXdbg
314 /*
315 * The b-tree code expects nodes to be contiguous. So when
316 * the allocation block size is less than the b-tree node
317 * size, we need to force disk allocations to be contiguous.
318 */
319 if (vcb->blockSize >= btInfo.nodeSize) {
320 extendFlags = 0;
321 } else {
322 /* Ensure that all b-tree nodes are contiguous on disk */
323 extendFlags = kEFContigMask;
324 }
325 #endif
326
327 origSize = filePtr->fcbEOF;
328 fileblocks = filePtr->ff_blocks;
329 startAllocation = vcb->nextAllocation;
330
331 // loop trying to get a contiguous chunk that's an integer multiple
332 // of the btree node size. if we can't get a contiguous chunk that
333 // is at least the node size then we break out of the loop and let
334 // the error propagate back up.
335 do {
336 retval = ExtendFileC(vcb, filePtr, bytesToAdd, 0,
337 kEFContigMask | kEFMetadataMask,
338 &actualBytesAdded);
339 if (retval == dskFulErr && actualBytesAdded == 0) {
340
341 if (bytesToAdd == btInfo.nodeSize || bytesToAdd < (minEOF - origSize)) {
342 // if we're here there's nothing else to try, we're out
343 // of space so we break and bail out.
344 break;
345 } else {
346 bytesToAdd >>= 1;
347 if (bytesToAdd < btInfo.nodeSize) {
348 bytesToAdd = btInfo.nodeSize;
349 } else if ((bytesToAdd % btInfo.nodeSize) != 0) {
350 // make sure it's an integer multiple of the nodeSize
351 bytesToAdd -= (bytesToAdd % btInfo.nodeSize);
352 }
353 }
354 }
355 } while (retval == dskFulErr && actualBytesAdded == 0);
356
357 /*
358 * If a new extent was added then move the roving allocator
359 * reference forward by the current b-tree file size so
360 * there's plenty of room to grow.
361 */
362 if ((retval == 0) &&
363 ((VCBTOHFS(vcb)->hfs_flags & HFS_METADATA_ZONE) == 0) &&
364 (vcb->nextAllocation > startAllocation) &&
365 ((vcb->nextAllocation + fileblocks) < vcb->totalBlocks)) {
366 vcb->nextAllocation += fileblocks;
367 }
368
369 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
370
371 // XXXdbg ExtendFileC() could have returned an error even though
372 // it grew the file to be big enough for our needs. If this is
373 // the case, we don't care about retval so we blow it away.
374 //
375 if (filePtr->fcbEOF >= minEOF && retval != 0) {
376 retval = 0;
377 }
378
379 // XXXdbg if the file grew but isn't large enough or isn't an
380 // even multiple of the nodeSize then trim things back. if
381 // the file isn't large enough we trim back to the original
382 // size. otherwise we trim back to be an even multiple of the
383 // btree node size.
384 //
385 if ((filePtr->fcbEOF < minEOF) || (actualBytesAdded % btInfo.nodeSize) != 0) {
386
387 if (filePtr->fcbEOF < minEOF) {
388 retval = dskFulErr;
389
390 if (filePtr->fcbEOF < origSize) {
391 panic("hfs: btree file eof %lld less than orig size %lld!\n",
392 filePtr->fcbEOF, origSize);
393 }
394
395 trim = filePtr->fcbEOF - origSize;
396 if (trim != actualBytesAdded) {
397 panic("hfs: trim == %lld but actualBytesAdded == %lld\n",
398 trim, actualBytesAdded);
399 }
400 } else {
401 trim = (actualBytesAdded % btInfo.nodeSize);
402 }
403
404 ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0);
405 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
406
407 // XXXdbg - panic if the file didn't get trimmed back properly
408 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
409 panic("hfs: truncate file didn't! fcbEOF %lld nsize %d fcb 0x%x\n",
410 filePtr->fcbEOF, btInfo.nodeSize, filePtr);
411 }
412
413 if (ret) {
414 // XXXdbg - this probably doesn't need to be a panic()
415 panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %d)\n",
416 filePtr->fcbEOF, trim, ret);
417 return ret;
418 }
419 actualBytesAdded -= trim;
420 }
421
422 if(VTOC(vp)->c_fileid != kHFSExtentsFileID) {
423 /*
424 * Get any extents overflow b-tree changes to disk ASAP!
425 */
426 (void) BTFlushPath(VTOF(vcb->extentsRefNum));
427 (void) VOP_FSYNC(vcb->extentsRefNum, NOCRED, MNT_WAIT, p);
428
429 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
430 }
431
432 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
433 panic("hfs: extendbtree: fcb 0x%x has eof 0x%llx not a multiple of 0x%x (trim %llx)\n",
434 filePtr, filePtr->fcbEOF, btInfo.nodeSize, trim);
435 }
436
437 /*
438 * Update the Alternate MDB or Alternate VolumeHeader
439 */
440 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
441 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
442 (VTOC(vp)->c_fileid == kHFSAttributesFileID)
443 ) {
444 MarkVCBDirty( vcb );
445 ret = hfs_flushvolumeheader(VCBTOHFS(vcb), MNT_WAIT, HFS_ALTFLUSH);
446 } else {
447 struct timeval tv = time;
448
449 VTOC(vp)->c_flag |= C_CHANGE | C_UPDATE;
450 (void) VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
451 }
452
453 ret = ClearBTNodes(vp, btInfo.nodeSize, filePtr->fcbEOF - actualBytesAdded, actualBytesAdded);
454 if (ret)
455 return (ret);
456
457 return retval;
458 }
459
460
461 /*
462 * Clear out (zero) new b-tree nodes on disk.
463 */
464 static int
465 ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount)
466 {
467 struct hfsmount *hfsmp = VTOHFS(vp);
468 struct buf *bp = NULL;
469 daddr_t blk;
470 daddr_t blkcnt;
471
472 blk = offset / blksize;
473 blkcnt = amount / blksize;
474
475 while (blkcnt > 0) {
476 bp = getblk(vp, blk, blksize, 0, 0, BLK_META);
477 if (bp == NULL)
478 continue;
479
480 // XXXdbg
481 if (hfsmp->jnl) {
482 // XXXdbg -- skipping this for now since it makes a transaction
483 // become *way* too large
484 //journal_modify_block_start(hfsmp->jnl, bp);
485 }
486
487 bzero((char *)bp->b_data, blksize);
488 bp->b_flags |= B_AGE;
489
490 // XXXdbg
491 if (hfsmp->jnl) {
492 // XXXdbg -- skipping this for now since it makes a transaction
493 // become *way* too large
494 //journal_modify_block_end(hfsmp->jnl, bp);
495
496 // XXXdbg - remove this once we decide what to do with the
497 // writes to the journal
498 if ((blk % 32) == 0)
499 VOP_BWRITE(bp);
500 else
501 bawrite(bp);
502 } else {
503 /* wait/yield every 32 blocks so we don't hog all the buffers */
504 if ((blk % 32) == 0)
505 VOP_BWRITE(bp);
506 else
507 bawrite(bp);
508 }
509 --blkcnt;
510 ++blk;
511 }
512
513 return (0);
514 }