]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_btreeio.c
xnu-344.23.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_btreeio.c
1 /*
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/buf.h>
26 #include <sys/kernel.h>
27 #include <sys/mount.h>
28 #include <sys/vnode.h>
29
30
31 #include "hfs.h"
32 #include "hfs_cnode.h"
33 #include "hfs_dbg.h"
34 #include "hfs_endian.h"
35
36 #include "hfscommon/headers/FileMgrInternal.h"
37 #include "hfscommon/headers/BTreesPrivate.h"
38
39 #define FORCESYNCBTREEWRITES 0
40
41
42 static int ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount);
43
44
45 __private_extern__
46 OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, ItemCount minBlockCount)
47 {
48 BTreeControlBlockPtr bTreePtr;
49
50 DBG_ASSERT(vp != NULL);
51 DBG_ASSERT(blockSize >= kMinNodeSize);
52 if (blockSize > MAXBSIZE )
53 return (fsBTBadNodeSize);
54
55 bTreePtr = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr;
56 bTreePtr->nodeSize = blockSize;
57
58 return (E_NONE);
59 }
60
61
62 __private_extern__
63 OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions options, BlockDescriptor *block)
64 {
65 OSStatus retval = E_NONE;
66 struct buf *bp = NULL;
67
68 if (options & kGetEmptyBlock)
69 bp = getblk(vp, blockNum, block->blockSize, 0, 0, BLK_META);
70 else
71 retval = meta_bread(vp, blockNum, block->blockSize, NOCRED, &bp);
72
73 DBG_ASSERT(bp != NULL);
74 DBG_ASSERT(bp->b_data != NULL);
75 DBG_ASSERT(bp->b_bcount == block->blockSize);
76 DBG_ASSERT(bp->b_lblkno == blockNum);
77
78 if (bp == NULL)
79 retval = -1; //XXX need better error
80
81 if (retval == E_NONE) {
82 block->blockHeader = bp;
83 block->buffer = bp->b_data;
84 block->blockReadFromDisk = (bp->b_flags & B_CACHE) == 0; /* not found in cache ==> came from disk */
85
86 // XXXdbg
87 block->isModified = 0;
88
89 #if BYTE_ORDER == LITTLE_ENDIAN
90 /* Endian swap B-Tree node (only if it's a valid block) */
91 if (!(options & kGetEmptyBlock)) {
92 /* This happens when we first open the b-tree, we might not have all the node data on hand */
93 if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) &&
94 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != bp->b_bcount) &&
95 (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != bp->b_bcount)) {
96
97 /* Don't swap the descriptors at all, we don't care (this block will be invalidated) */
98 SWAP_BT_NODE (block, ISHFSPLUS(VTOVCB(vp)), VTOC(vp)->c_fileid, 3);
99
100 /* The node needs swapping */
101 } else if (*((UInt16 *)((char *)block->buffer + (block->blockSize - sizeof (UInt16)))) == 0x0e00) {
102 SWAP_BT_NODE (block, ISHFSPLUS(VTOVCB(vp)), VTOC(vp)->c_fileid, 0);
103 #if 0
104 /* The node is not already in native byte order, hence corrupt */
105 } else if (*((UInt16 *)((char *)block->buffer + (block->blockSize - sizeof (UInt16)))) != 0x000e) {
106 panic ("%s Corrupt B-Tree node detected!\n", "GetBTreeBlock:");
107 #endif
108 }
109 }
110 #endif
111 } else {
112 if (bp)
113 brelse(bp);
114 block->blockHeader = NULL;
115 block->buffer = NULL;
116 }
117
118 return (retval);
119 }
120
121
122 __private_extern__
123 void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr)
124 {
125 struct hfsmount *hfsmp = VTOHFS(vp);
126 struct buf *bp = NULL;
127
128 if (hfsmp->jnl == NULL) {
129 return;
130 }
131
132 bp = (struct buf *) blockPtr->blockHeader;
133 if (bp == NULL) {
134 panic("ModifyBlockStart: null bp for blockdescptr 0x%x?!?\n", blockPtr);
135 return;
136 }
137
138 journal_modify_block_start(hfsmp->jnl, bp);
139 blockPtr->isModified = 1;
140 }
141
142
143 __private_extern__
144 OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options)
145 {
146 struct hfsmount *hfsmp = VTOHFS(vp);
147 extern int bdwrite_internal(struct buf *, int);
148 OSStatus retval = E_NONE;
149 struct buf *bp = NULL;
150
151 bp = (struct buf *) blockPtr->blockHeader;
152
153 if (bp == NULL) {
154 retval = -1;
155 goto exit;
156 }
157
158 if (options & kTrashBlock) {
159 bp->b_flags |= B_INVAL;
160 if (hfsmp->jnl && (bp->b_flags & B_LOCKED)) {
161 journal_kill_block(hfsmp->jnl, bp);
162 } else {
163 brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
164 }
165 } else {
166 if (options & kForceWriteBlock) {
167 if (hfsmp->jnl) {
168 if (blockPtr->isModified == 0) {
169 panic("hfs: releaseblock: modified is 0 but forcewrite set! bp 0x%x\n", bp);
170 }
171 retval = journal_modify_block_end(hfsmp->jnl, bp);
172 blockPtr->isModified = 0;
173 } else {
174 retval = VOP_BWRITE(bp);
175 }
176 } else if (options & kMarkBlockDirty) {
177 if ((options & kLockTransaction) && hfsmp->jnl == NULL) {
178 /*
179 *
180 * Set the B_LOCKED flag and unlock the buffer, causing brelse to move
181 * the buffer onto the LOCKED free list. This is necessary, otherwise
182 * getnewbuf() would try to reclaim the buffers using bawrite, which
183 * isn't going to work.
184 *
185 */
186 extern int count_lock_queue __P((void));
187 /* Don't hog all the buffers... */
188 if (count_lock_queue() > kMaxLockedMetaBuffers) {
189 hfs_btsync(vp, HFS_SYNCTRANS);
190 /* Rollback sync time to cause a sync on lock release... */
191 (void) BTSetLastSync(VTOF(vp), time.tv_sec - (kMaxSecsForFsync + 1));
192 }
193
194 bp->b_flags |= B_LOCKED;
195 }
196
197 /*
198 * Delay-write this block.
199 * If the maximum delayed buffers has been exceeded then
200 * free up some buffers and fall back to an asynchronous write.
201 */
202 if (hfsmp->jnl) {
203 if (blockPtr->isModified == 0) {
204 panic("hfs: releaseblock: modified is 0 but markdirty set! bp 0x%x\n", bp);
205 }
206 retval = journal_modify_block_end(hfsmp->jnl, bp);
207 blockPtr->isModified = 0;
208 } else if (bdwrite_internal(bp, 1) != 0) {
209 hfs_btsync(vp, 0);
210 /* Rollback sync time to cause a sync on lock release... */
211 (void) BTSetLastSync(VTOF(vp), time.tv_sec - (kMaxSecsForFsync + 1));
212 bp->b_flags &= ~B_LOCKED;
213 bawrite(bp);
214 }
215 } else {
216 // check if we had previously called journal_modify_block_start()
217 // on this block and if so, abort it (which will call brelse()).
218 if (hfsmp->jnl && blockPtr->isModified) {
219 // XXXdbg - I don't want to call modify_block_abort()
220 // because I think it may be screwing up the
221 // journal and blowing away a block that has
222 // valid data in it.
223 //
224 // journal_modify_block_abort(hfsmp->jnl, bp);
225 //panic("hfs: releaseblock called for 0x%x but mod_block_start previously called.\n", bp);
226 journal_modify_block_end(hfsmp->jnl, bp);
227 blockPtr->isModified = 0;
228 } else {
229 brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
230 }
231 };
232 };
233
234 exit:
235 return (retval);
236 }
237
238
239 __private_extern__
240 OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF)
241 {
242 #pragma unused (maxEOF)
243
244 OSStatus retval, ret;
245 UInt64 actualBytesAdded, origSize;
246 UInt64 bytesToAdd;
247 u_int32_t startAllocation;
248 u_int32_t fileblocks;
249 BTreeInfoRec btInfo;
250 ExtendedVCB *vcb;
251 FCB *filePtr;
252 struct proc *p = NULL;
253 UInt64 trim = 0;
254
255 filePtr = GetFileControlBlock(vp);
256
257 if ( minEOF > filePtr->fcbEOF )
258 {
259 bytesToAdd = minEOF - filePtr->fcbEOF;
260
261 if (bytesToAdd < filePtr->ff_clumpsize)
262 bytesToAdd = filePtr->ff_clumpsize; //XXX why not always be a mutiple of clump size?
263 }
264 else
265 {
266 return -1;
267 }
268
269 vcb = VTOVCB(vp);
270
271 /*
272 * The Extents B-tree can't have overflow extents. ExtendFileC will
273 * return an error if an attempt is made to extend the Extents B-tree
274 * when the resident extents are exhausted.
275 */
276 /* XXX warning - this can leave the volume bitmap unprotected during ExtendFileC call */
277 if(VTOC(vp)->c_fileid != kHFSExtentsFileID)
278 {
279 p = current_proc();
280 /* lock extents b-tree (also protects volume bitmap) */
281 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, p);
282 if (retval)
283 return (retval);
284 }
285
286 (void) BTGetInformation(filePtr, 0, &btInfo);
287
288 #if 0 // XXXdbg
289 /*
290 * The b-tree code expects nodes to be contiguous. So when
291 * the allocation block size is less than the b-tree node
292 * size, we need to force disk allocations to be contiguous.
293 */
294 if (vcb->blockSize >= btInfo.nodeSize) {
295 extendFlags = 0;
296 } else {
297 /* Ensure that all b-tree nodes are contiguous on disk */
298 extendFlags = kEFContigMask;
299 }
300 #endif
301
302 origSize = filePtr->fcbEOF;
303 fileblocks = filePtr->ff_blocks;
304 startAllocation = vcb->nextAllocation;
305
306 // loop trying to get a contiguous chunk that's an integer multiple
307 // of the btree node size. if we can't get a contiguous chunk that
308 // is at least the node size then we break out of the loop and let
309 // the error propagate back up.
310 do {
311 retval = ExtendFileC(vcb, filePtr, bytesToAdd, 0, kEFContigMask, &actualBytesAdded);
312 if (retval == dskFulErr && actualBytesAdded == 0) {
313
314 if (bytesToAdd == btInfo.nodeSize || bytesToAdd < (minEOF - origSize)) {
315 // if we're here there's nothing else to try, we're out
316 // of space so we break and bail out.
317 break;
318 } else {
319 bytesToAdd >>= 1;
320 if (bytesToAdd < btInfo.nodeSize) {
321 bytesToAdd = btInfo.nodeSize;
322 } else if ((bytesToAdd % btInfo.nodeSize) != 0) {
323 // make sure it's an integer multiple of the nodeSize
324 bytesToAdd -= (bytesToAdd % btInfo.nodeSize);
325 }
326 }
327 }
328 } while (retval == dskFulErr && actualBytesAdded == 0);
329
330 /*
331 * If a new extent was added then move the roving allocator
332 * reference forward by the current b-tree file size so
333 * there's plenty of room to grow.
334 */
335 if ((retval == 0) &&
336 (vcb->nextAllocation > startAllocation) &&
337 ((vcb->nextAllocation + fileblocks) < vcb->totalBlocks)) {
338 vcb->nextAllocation += fileblocks;
339 }
340
341 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
342
343 // XXXdbg ExtendFileC() could have returned an error even though
344 // it grew the file to be big enough for our needs. If this is
345 // the case, we don't care about retval so we blow it away.
346 //
347 if (filePtr->fcbEOF >= minEOF && retval != 0) {
348 retval = 0;
349 }
350
351 // XXXdbg if the file grew but isn't large enough or isn't an
352 // even multiple of the nodeSize then trim things back. if
353 // the file isn't large enough we trim back to the original
354 // size. otherwise we trim back to be an even multiple of the
355 // btree node size.
356 //
357 if ((filePtr->fcbEOF < minEOF) || (actualBytesAdded % btInfo.nodeSize) != 0) {
358
359 if (filePtr->fcbEOF < minEOF) {
360 retval = dskFulErr;
361
362 if (filePtr->fcbEOF < origSize) {
363 panic("hfs: btree file eof %lld less than orig size %lld!\n",
364 filePtr->fcbEOF, origSize);
365 }
366
367 trim = filePtr->fcbEOF - origSize;
368 if (trim != actualBytesAdded) {
369 panic("hfs: trim == %lld but actualBytesAdded == %lld\n",
370 trim, actualBytesAdded);
371 }
372 } else {
373 trim = (actualBytesAdded % btInfo.nodeSize);
374 }
375
376 ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0);
377 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
378
379 // XXXdbg - panic if the file didn't get trimmed back properly
380 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
381 panic("hfs: truncate file didn't! fcbEOF %lld nsize %d fcb 0x%x\n",
382 filePtr->fcbEOF, btInfo.nodeSize, filePtr);
383 }
384
385 if (ret) {
386 // XXXdbg - this probably doesn't need to be a panic()
387 panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %d)\n",
388 filePtr->fcbEOF, trim, ret);
389 return ret;
390 }
391 actualBytesAdded -= trim;
392 }
393
394 if(VTOC(vp)->c_fileid != kHFSExtentsFileID) {
395 /*
396 * Get any extents overflow b-tree changes to disk ASAP!
397 */
398 (void) BTFlushPath(VTOF(vcb->extentsRefNum));
399 (void) VOP_FSYNC(vcb->extentsRefNum, NOCRED, MNT_WAIT, p);
400
401 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
402 }
403
404 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
405 panic("hfs: extendbtree: fcb 0x%x has eof 0x%llx not a multiple of 0x%x (trim %llx)\n",
406 filePtr, filePtr->fcbEOF, btInfo.nodeSize, trim);
407 }
408
409 /*
410 * Update the Alternate MDB or Alternate VolumeHeader
411 */
412 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
413 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
414 (VTOC(vp)->c_fileid == kHFSAttributesFileID)
415 ) {
416 MarkVCBDirty( vcb );
417 ret = hfs_flushvolumeheader(VCBTOHFS(vcb), MNT_WAIT, HFS_ALTFLUSH);
418 }
419
420 ret = ClearBTNodes(vp, btInfo.nodeSize, filePtr->fcbEOF - actualBytesAdded, actualBytesAdded);
421 if (ret)
422 return (ret);
423
424 return retval;
425 }
426
427
428 /*
429 * Clear out (zero) new b-tree nodes on disk.
430 */
431 static int
432 ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount)
433 {
434 struct hfsmount *hfsmp = VTOHFS(vp);
435 struct buf *bp = NULL;
436 daddr_t blk;
437 daddr_t blkcnt;
438
439 blk = offset / blksize;
440 blkcnt = amount / blksize;
441
442 while (blkcnt > 0) {
443 bp = getblk(vp, blk, blksize, 0, 0, BLK_META);
444 if (bp == NULL)
445 continue;
446
447 // XXXdbg
448 if (hfsmp->jnl) {
449 // XXXdbg -- skipping this for now since it makes a transaction
450 // become *way* too large
451 //journal_modify_block_start(hfsmp->jnl, bp);
452 }
453
454 bzero((char *)bp->b_data, blksize);
455 bp->b_flags |= B_AGE;
456
457 // XXXdbg
458 if (hfsmp->jnl) {
459 // XXXdbg -- skipping this for now since it makes a transaction
460 // become *way* too large
461 //journal_modify_block_end(hfsmp->jnl, bp);
462
463 // XXXdbg - remove this once we decide what to do with the
464 // writes to the journal
465 if ((blk % 32) == 0)
466 VOP_BWRITE(bp);
467 else
468 bawrite(bp);
469 } else {
470 /* wait/yield every 32 blocks so we don't hog all the buffers */
471 if ((blk % 32) == 0)
472 VOP_BWRITE(bp);
473 else
474 bawrite(bp);
475 }
476 --blkcnt;
477 ++blk;
478 }
479
480 return (0);
481 }