]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_btreeio.c
xnu-344.49.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_btreeio.c
1 /*
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/buf.h>
29 #include <sys/kernel.h>
30 #include <sys/mount.h>
31 #include <sys/vnode.h>
32
33
34 #include "hfs.h"
35 #include "hfs_cnode.h"
36 #include "hfs_dbg.h"
37 #include "hfs_endian.h"
38
39 #include "hfscommon/headers/FileMgrInternal.h"
40 #include "hfscommon/headers/BTreesPrivate.h"
41
42 #define FORCESYNCBTREEWRITES 0
43
44
45 static int ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount);
46
47
48 __private_extern__
49 OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, ItemCount minBlockCount)
50 {
51 BTreeControlBlockPtr bTreePtr;
52
53 DBG_ASSERT(vp != NULL);
54 DBG_ASSERT(blockSize >= kMinNodeSize);
55 if (blockSize > MAXBSIZE )
56 return (fsBTBadNodeSize);
57
58 bTreePtr = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr;
59 bTreePtr->nodeSize = blockSize;
60
61 return (E_NONE);
62 }
63
64
65 __private_extern__
66 OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions options, BlockDescriptor *block)
67 {
68 OSStatus retval = E_NONE;
69 struct buf *bp = NULL;
70
71 if (options & kGetEmptyBlock)
72 bp = getblk(vp, blockNum, block->blockSize, 0, 0, BLK_META);
73 else
74 retval = meta_bread(vp, blockNum, block->blockSize, NOCRED, &bp);
75
76 DBG_ASSERT(bp != NULL);
77 DBG_ASSERT(bp->b_data != NULL);
78 DBG_ASSERT(bp->b_bcount == block->blockSize);
79 DBG_ASSERT(bp->b_lblkno == blockNum);
80
81 if (bp == NULL)
82 retval = -1; //XXX need better error
83
84 if (retval == E_NONE) {
85 block->blockHeader = bp;
86 block->buffer = bp->b_data;
87 block->blockReadFromDisk = (bp->b_flags & B_CACHE) == 0; /* not found in cache ==> came from disk */
88
89 // XXXdbg
90 block->isModified = 0;
91
92 #if BYTE_ORDER == LITTLE_ENDIAN
93 /* Endian swap B-Tree node (only if it's a valid block) */
94 if (!(options & kGetEmptyBlock)) {
95 /* This happens when we first open the b-tree, we might not have all the node data on hand */
96 if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) &&
97 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != bp->b_bcount) &&
98 (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != bp->b_bcount)) {
99
100 /* Don't swap the descriptors at all, we don't care (this block will be invalidated) */
101 SWAP_BT_NODE (block, ISHFSPLUS(VTOVCB(vp)), VTOC(vp)->c_fileid, 3);
102
103 /* The node needs swapping */
104 } else if (*((UInt16 *)((char *)block->buffer + (block->blockSize - sizeof (UInt16)))) == 0x0e00) {
105 SWAP_BT_NODE (block, ISHFSPLUS(VTOVCB(vp)), VTOC(vp)->c_fileid, 0);
106 #if 0
107 /* The node is not already in native byte order, hence corrupt */
108 } else if (*((UInt16 *)((char *)block->buffer + (block->blockSize - sizeof (UInt16)))) != 0x000e) {
109 panic ("%s Corrupt B-Tree node detected!\n", "GetBTreeBlock:");
110 #endif
111 }
112 }
113 #endif
114 } else {
115 if (bp)
116 brelse(bp);
117 block->blockHeader = NULL;
118 block->buffer = NULL;
119 }
120
121 return (retval);
122 }
123
124
125 __private_extern__
126 void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr)
127 {
128 struct hfsmount *hfsmp = VTOHFS(vp);
129 struct buf *bp = NULL;
130
131 if (hfsmp->jnl == NULL) {
132 return;
133 }
134
135 bp = (struct buf *) blockPtr->blockHeader;
136 if (bp == NULL) {
137 panic("ModifyBlockStart: null bp for blockdescptr 0x%x?!?\n", blockPtr);
138 return;
139 }
140
141 journal_modify_block_start(hfsmp->jnl, bp);
142 blockPtr->isModified = 1;
143 }
144
145
146 __private_extern__
147 OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options)
148 {
149 struct hfsmount *hfsmp = VTOHFS(vp);
150 extern int bdwrite_internal(struct buf *, int);
151 OSStatus retval = E_NONE;
152 struct buf *bp = NULL;
153
154 bp = (struct buf *) blockPtr->blockHeader;
155
156 if (bp == NULL) {
157 retval = -1;
158 goto exit;
159 }
160
161 if (options & kTrashBlock) {
162 bp->b_flags |= B_INVAL;
163 if (hfsmp->jnl && (bp->b_flags & B_LOCKED)) {
164 journal_kill_block(hfsmp->jnl, bp);
165 } else {
166 brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
167 }
168 } else {
169 if (options & kForceWriteBlock) {
170 if (hfsmp->jnl) {
171 if (blockPtr->isModified == 0) {
172 panic("hfs: releaseblock: modified is 0 but forcewrite set! bp 0x%x\n", bp);
173 }
174 retval = journal_modify_block_end(hfsmp->jnl, bp);
175 blockPtr->isModified = 0;
176 } else {
177 retval = VOP_BWRITE(bp);
178 }
179 } else if (options & kMarkBlockDirty) {
180 if ((options & kLockTransaction) && hfsmp->jnl == NULL) {
181 /*
182 *
183 * Set the B_LOCKED flag and unlock the buffer, causing brelse to move
184 * the buffer onto the LOCKED free list. This is necessary, otherwise
185 * getnewbuf() would try to reclaim the buffers using bawrite, which
186 * isn't going to work.
187 *
188 */
189 extern int count_lock_queue __P((void));
190 /* Don't hog all the buffers... */
191 if (count_lock_queue() > kMaxLockedMetaBuffers) {
192 hfs_btsync(vp, HFS_SYNCTRANS);
193 /* Rollback sync time to cause a sync on lock release... */
194 (void) BTSetLastSync(VTOF(vp), time.tv_sec - (kMaxSecsForFsync + 1));
195 }
196
197 bp->b_flags |= B_LOCKED;
198 }
199
200 /*
201 * Delay-write this block.
202 * If the maximum delayed buffers has been exceeded then
203 * free up some buffers and fall back to an asynchronous write.
204 */
205 if (hfsmp->jnl) {
206 if (blockPtr->isModified == 0) {
207 panic("hfs: releaseblock: modified is 0 but markdirty set! bp 0x%x\n", bp);
208 }
209 retval = journal_modify_block_end(hfsmp->jnl, bp);
210 blockPtr->isModified = 0;
211 } else if (bdwrite_internal(bp, 1) != 0) {
212 hfs_btsync(vp, 0);
213 /* Rollback sync time to cause a sync on lock release... */
214 (void) BTSetLastSync(VTOF(vp), time.tv_sec - (kMaxSecsForFsync + 1));
215 bp->b_flags &= ~B_LOCKED;
216 bawrite(bp);
217 }
218 } else {
219 // check if we had previously called journal_modify_block_start()
220 // on this block and if so, abort it (which will call brelse()).
221 if (hfsmp->jnl && blockPtr->isModified) {
222 // XXXdbg - I don't want to call modify_block_abort()
223 // because I think it may be screwing up the
224 // journal and blowing away a block that has
225 // valid data in it.
226 //
227 // journal_modify_block_abort(hfsmp->jnl, bp);
228 //panic("hfs: releaseblock called for 0x%x but mod_block_start previously called.\n", bp);
229 journal_modify_block_end(hfsmp->jnl, bp);
230 blockPtr->isModified = 0;
231 } else {
232 brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
233 }
234 };
235 };
236
237 exit:
238 return (retval);
239 }
240
241
242 __private_extern__
243 OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF)
244 {
245 #pragma unused (maxEOF)
246
247 OSStatus retval, ret;
248 UInt64 actualBytesAdded, origSize;
249 UInt64 bytesToAdd;
250 u_int32_t startAllocation;
251 u_int32_t fileblocks;
252 BTreeInfoRec btInfo;
253 ExtendedVCB *vcb;
254 FCB *filePtr;
255 struct proc *p = NULL;
256 UInt64 trim = 0;
257
258 filePtr = GetFileControlBlock(vp);
259
260 if ( minEOF > filePtr->fcbEOF )
261 {
262 bytesToAdd = minEOF - filePtr->fcbEOF;
263
264 if (bytesToAdd < filePtr->ff_clumpsize)
265 bytesToAdd = filePtr->ff_clumpsize; //XXX why not always be a mutiple of clump size?
266 }
267 else
268 {
269 return -1;
270 }
271
272 vcb = VTOVCB(vp);
273
274 /*
275 * The Extents B-tree can't have overflow extents. ExtendFileC will
276 * return an error if an attempt is made to extend the Extents B-tree
277 * when the resident extents are exhausted.
278 */
279 /* XXX warning - this can leave the volume bitmap unprotected during ExtendFileC call */
280 if(VTOC(vp)->c_fileid != kHFSExtentsFileID)
281 {
282 p = current_proc();
283 /* lock extents b-tree (also protects volume bitmap) */
284 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, p);
285 if (retval)
286 return (retval);
287 }
288
289 (void) BTGetInformation(filePtr, 0, &btInfo);
290
291 #if 0 // XXXdbg
292 /*
293 * The b-tree code expects nodes to be contiguous. So when
294 * the allocation block size is less than the b-tree node
295 * size, we need to force disk allocations to be contiguous.
296 */
297 if (vcb->blockSize >= btInfo.nodeSize) {
298 extendFlags = 0;
299 } else {
300 /* Ensure that all b-tree nodes are contiguous on disk */
301 extendFlags = kEFContigMask;
302 }
303 #endif
304
305 origSize = filePtr->fcbEOF;
306 fileblocks = filePtr->ff_blocks;
307 startAllocation = vcb->nextAllocation;
308
309 // loop trying to get a contiguous chunk that's an integer multiple
310 // of the btree node size. if we can't get a contiguous chunk that
311 // is at least the node size then we break out of the loop and let
312 // the error propagate back up.
313 do {
314 retval = ExtendFileC(vcb, filePtr, bytesToAdd, 0, kEFContigMask, &actualBytesAdded);
315 if (retval == dskFulErr && actualBytesAdded == 0) {
316
317 if (bytesToAdd == btInfo.nodeSize || bytesToAdd < (minEOF - origSize)) {
318 // if we're here there's nothing else to try, we're out
319 // of space so we break and bail out.
320 break;
321 } else {
322 bytesToAdd >>= 1;
323 if (bytesToAdd < btInfo.nodeSize) {
324 bytesToAdd = btInfo.nodeSize;
325 } else if ((bytesToAdd % btInfo.nodeSize) != 0) {
326 // make sure it's an integer multiple of the nodeSize
327 bytesToAdd -= (bytesToAdd % btInfo.nodeSize);
328 }
329 }
330 }
331 } while (retval == dskFulErr && actualBytesAdded == 0);
332
333 /*
334 * If a new extent was added then move the roving allocator
335 * reference forward by the current b-tree file size so
336 * there's plenty of room to grow.
337 */
338 if ((retval == 0) &&
339 (vcb->nextAllocation > startAllocation) &&
340 ((vcb->nextAllocation + fileblocks) < vcb->totalBlocks)) {
341 vcb->nextAllocation += fileblocks;
342 }
343
344 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
345
346 // XXXdbg ExtendFileC() could have returned an error even though
347 // it grew the file to be big enough for our needs. If this is
348 // the case, we don't care about retval so we blow it away.
349 //
350 if (filePtr->fcbEOF >= minEOF && retval != 0) {
351 retval = 0;
352 }
353
354 // XXXdbg if the file grew but isn't large enough or isn't an
355 // even multiple of the nodeSize then trim things back. if
356 // the file isn't large enough we trim back to the original
357 // size. otherwise we trim back to be an even multiple of the
358 // btree node size.
359 //
360 if ((filePtr->fcbEOF < minEOF) || (actualBytesAdded % btInfo.nodeSize) != 0) {
361
362 if (filePtr->fcbEOF < minEOF) {
363 retval = dskFulErr;
364
365 if (filePtr->fcbEOF < origSize) {
366 panic("hfs: btree file eof %lld less than orig size %lld!\n",
367 filePtr->fcbEOF, origSize);
368 }
369
370 trim = filePtr->fcbEOF - origSize;
371 if (trim != actualBytesAdded) {
372 panic("hfs: trim == %lld but actualBytesAdded == %lld\n",
373 trim, actualBytesAdded);
374 }
375 } else {
376 trim = (actualBytesAdded % btInfo.nodeSize);
377 }
378
379 ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0);
380 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
381
382 // XXXdbg - panic if the file didn't get trimmed back properly
383 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
384 panic("hfs: truncate file didn't! fcbEOF %lld nsize %d fcb 0x%x\n",
385 filePtr->fcbEOF, btInfo.nodeSize, filePtr);
386 }
387
388 if (ret) {
389 // XXXdbg - this probably doesn't need to be a panic()
390 panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %d)\n",
391 filePtr->fcbEOF, trim, ret);
392 return ret;
393 }
394 actualBytesAdded -= trim;
395 }
396
397 if(VTOC(vp)->c_fileid != kHFSExtentsFileID) {
398 /*
399 * Get any extents overflow b-tree changes to disk ASAP!
400 */
401 (void) BTFlushPath(VTOF(vcb->extentsRefNum));
402 (void) VOP_FSYNC(vcb->extentsRefNum, NOCRED, MNT_WAIT, p);
403
404 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
405 }
406
407 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
408 panic("hfs: extendbtree: fcb 0x%x has eof 0x%llx not a multiple of 0x%x (trim %llx)\n",
409 filePtr, filePtr->fcbEOF, btInfo.nodeSize, trim);
410 }
411
412 /*
413 * Update the Alternate MDB or Alternate VolumeHeader
414 */
415 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
416 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
417 (VTOC(vp)->c_fileid == kHFSAttributesFileID)
418 ) {
419 MarkVCBDirty( vcb );
420 ret = hfs_flushvolumeheader(VCBTOHFS(vcb), MNT_WAIT, HFS_ALTFLUSH);
421 }
422
423 ret = ClearBTNodes(vp, btInfo.nodeSize, filePtr->fcbEOF - actualBytesAdded, actualBytesAdded);
424 if (ret)
425 return (ret);
426
427 return retval;
428 }
429
430
431 /*
432 * Clear out (zero) new b-tree nodes on disk.
433 */
434 static int
435 ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount)
436 {
437 struct hfsmount *hfsmp = VTOHFS(vp);
438 struct buf *bp = NULL;
439 daddr_t blk;
440 daddr_t blkcnt;
441
442 blk = offset / blksize;
443 blkcnt = amount / blksize;
444
445 while (blkcnt > 0) {
446 bp = getblk(vp, blk, blksize, 0, 0, BLK_META);
447 if (bp == NULL)
448 continue;
449
450 // XXXdbg
451 if (hfsmp->jnl) {
452 // XXXdbg -- skipping this for now since it makes a transaction
453 // become *way* too large
454 //journal_modify_block_start(hfsmp->jnl, bp);
455 }
456
457 bzero((char *)bp->b_data, blksize);
458 bp->b_flags |= B_AGE;
459
460 // XXXdbg
461 if (hfsmp->jnl) {
462 // XXXdbg -- skipping this for now since it makes a transaction
463 // become *way* too large
464 //journal_modify_block_end(hfsmp->jnl, bp);
465
466 // XXXdbg - remove this once we decide what to do with the
467 // writes to the journal
468 if ((blk % 32) == 0)
469 VOP_BWRITE(bp);
470 else
471 bawrite(bp);
472 } else {
473 /* wait/yield every 32 blocks so we don't hog all the buffers */
474 if ((blk % 32) == 0)
475 VOP_BWRITE(bp);
476 else
477 bawrite(bp);
478 }
479 --blkcnt;
480 ++blk;
481 }
482
483 return (0);
484 }