]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_btreeio.c
xnu-517.9.4.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_btreeio.c
1 /*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/buf.h>
26 #include <sys/kernel.h>
27 #include <sys/mount.h>
28 #include <sys/vnode.h>
29
30
31 #include "hfs.h"
32 #include "hfs_cnode.h"
33 #include "hfs_dbg.h"
34 #include "hfs_endian.h"
35
36 #include "hfscommon/headers/FileMgrInternal.h"
37 #include "hfscommon/headers/BTreesPrivate.h"
38
39 #define FORCESYNCBTREEWRITES 0
40
41
42 static int ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount);
43
44
45 __private_extern__
46 OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, ItemCount minBlockCount)
47 {
48 BTreeControlBlockPtr bTreePtr;
49
50 DBG_ASSERT(vp != NULL);
51 DBG_ASSERT(blockSize >= kMinNodeSize);
52 if (blockSize > MAXBSIZE )
53 return (fsBTBadNodeSize);
54
55 bTreePtr = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr;
56 bTreePtr->nodeSize = blockSize;
57
58 return (E_NONE);
59 }
60
61
62 __private_extern__
63 OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions options, BlockDescriptor *block)
64 {
65 OSStatus retval = E_NONE;
66 struct buf *bp = NULL;
67
68 if (options & kGetEmptyBlock)
69 bp = getblk(vp, blockNum, block->blockSize, 0, 0, BLK_META);
70 else
71 retval = meta_bread(vp, blockNum, block->blockSize, NOCRED, &bp);
72
73 DBG_ASSERT(bp != NULL);
74 DBG_ASSERT(bp->b_data != NULL);
75 DBG_ASSERT(bp->b_bcount == block->blockSize);
76 DBG_ASSERT(bp->b_lblkno == blockNum);
77
78 if (bp == NULL)
79 retval = -1; //XXX need better error
80
81 if (retval == E_NONE) {
82 block->blockHeader = bp;
83 block->buffer = bp->b_data;
84 block->blockReadFromDisk = (bp->b_flags & B_CACHE) == 0; /* not found in cache ==> came from disk */
85
86 // XXXdbg
87 block->isModified = 0;
88
89 #if BYTE_ORDER == LITTLE_ENDIAN
90 /* Endian swap B-Tree node (only if it's a valid block) */
91 if (!(options & kGetEmptyBlock)) {
92 /* This happens when we first open the b-tree, we might not have all the node data on hand */
93 if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) &&
94 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != bp->b_bcount) &&
95 (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != bp->b_bcount)) {
96
97 /* Don't swap the descriptors at all, we don't care (this block will be invalidated) */
98 SWAP_BT_NODE (block, ISHFSPLUS(VTOVCB(vp)), VTOC(vp)->c_fileid, 3);
99
100 /* The node needs swapping */
101 } else if (*((UInt16 *)((char *)block->buffer + (block->blockSize - sizeof (UInt16)))) == 0x0e00) {
102 SWAP_BT_NODE (block, ISHFSPLUS(VTOVCB(vp)), VTOC(vp)->c_fileid, 0);
103 #if 0
104 /* The node is not already in native byte order, hence corrupt */
105 } else if (*((UInt16 *)((char *)block->buffer + (block->blockSize - sizeof (UInt16)))) != 0x000e) {
106 panic ("%s Corrupt B-Tree node detected!\n", "GetBTreeBlock:");
107 #endif
108 }
109 }
110 #endif
111 } else {
112 if (bp)
113 brelse(bp);
114 block->blockHeader = NULL;
115 block->buffer = NULL;
116 }
117
118 return (retval);
119 }
120
121
122 __private_extern__
123 void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr)
124 {
125 struct hfsmount *hfsmp = VTOHFS(vp);
126 struct buf *bp = NULL;
127
128 if (hfsmp->jnl == NULL) {
129 return;
130 }
131
132 bp = (struct buf *) blockPtr->blockHeader;
133 if (bp == NULL) {
134 panic("ModifyBlockStart: null bp for blockdescptr 0x%x?!?\n", blockPtr);
135 return;
136 }
137
138 journal_modify_block_start(hfsmp->jnl, bp);
139 blockPtr->isModified = 1;
140 }
141
142 static int
143 btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp)
144 {
145 #if BYTE_ORDER == LITTLE_ENDIAN
146 struct vnode *vp = bp->b_vp;
147 BlockDescriptor block;
148
149 /* Prepare the block pointer */
150 block.blockHeader = bp;
151 block.buffer = bp->b_data;
152 /* not found in cache ==> came from disk */
153 block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;
154 block.blockSize = bp->b_bcount;
155
156 // XXXdbg have to swap the data before it goes in the journal
157 SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
158 #endif
159
160 return journal_modify_block_end(hfsmp->jnl, bp);
161 }
162
163
164 __private_extern__
165 OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options)
166 {
167 struct hfsmount *hfsmp = VTOHFS(vp);
168 extern int bdwrite_internal(struct buf *, int);
169 OSStatus retval = E_NONE;
170 struct buf *bp = NULL;
171
172 bp = (struct buf *) blockPtr->blockHeader;
173
174 if (bp == NULL) {
175 retval = -1;
176 goto exit;
177 }
178
179 if (options & kTrashBlock) {
180 bp->b_flags |= B_INVAL;
181 if (hfsmp->jnl && (bp->b_flags & B_LOCKED)) {
182 journal_kill_block(hfsmp->jnl, bp);
183 } else {
184 brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
185 }
186 } else {
187 if (options & kForceWriteBlock) {
188 if (hfsmp->jnl) {
189 if (blockPtr->isModified == 0) {
190 panic("hfs: releaseblock: modified is 0 but forcewrite set! bp 0x%x\n", bp);
191 }
192
193 retval = btree_journal_modify_block_end(hfsmp, bp);
194 blockPtr->isModified = 0;
195 } else {
196 retval = VOP_BWRITE(bp);
197 }
198 } else if (options & kMarkBlockDirty) {
199 if ((options & kLockTransaction) && hfsmp->jnl == NULL) {
200 /*
201 *
202 * Set the B_LOCKED flag and unlock the buffer, causing brelse to move
203 * the buffer onto the LOCKED free list. This is necessary, otherwise
204 * getnewbuf() would try to reclaim the buffers using bawrite, which
205 * isn't going to work.
206 *
207 */
208 extern int count_lock_queue __P((void));
209 /* Don't hog all the buffers... */
210 if (count_lock_queue() > kMaxLockedMetaBuffers) {
211 hfs_btsync(vp, HFS_SYNCTRANS);
212 /* Rollback sync time to cause a sync on lock release... */
213 (void) BTSetLastSync(VTOF(vp), time.tv_sec - (kMaxSecsForFsync + 1));
214 }
215
216 bp->b_flags |= B_LOCKED;
217 }
218
219 /*
220 * Delay-write this block.
221 * If the maximum delayed buffers has been exceeded then
222 * free up some buffers and fall back to an asynchronous write.
223 */
224 if (hfsmp->jnl) {
225 if (blockPtr->isModified == 0) {
226 panic("hfs: releaseblock: modified is 0 but markdirty set! bp 0x%x\n", bp);
227 }
228 retval = btree_journal_modify_block_end(hfsmp, bp);
229 blockPtr->isModified = 0;
230 } else if (bdwrite_internal(bp, 1) != 0) {
231 hfs_btsync(vp, 0);
232 /* Rollback sync time to cause a sync on lock release... */
233 (void) BTSetLastSync(VTOF(vp), time.tv_sec - (kMaxSecsForFsync + 1));
234 bp->b_flags &= ~B_LOCKED;
235 bawrite(bp);
236 }
237 } else {
238 // check if we had previously called journal_modify_block_start()
239 // on this block and if so, abort it (which will call brelse()).
240 if (hfsmp->jnl && blockPtr->isModified) {
241 // XXXdbg - I don't want to call modify_block_abort()
242 // because I think it may be screwing up the
243 // journal and blowing away a block that has
244 // valid data in it.
245 //
246 // journal_modify_block_abort(hfsmp->jnl, bp);
247 //panic("hfs: releaseblock called for 0x%x but mod_block_start previously called.\n", bp);
248 btree_journal_modify_block_end(hfsmp, bp);
249 blockPtr->isModified = 0;
250 } else {
251 brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
252 }
253 };
254 };
255
256 exit:
257 return (retval);
258 }
259
260
261 __private_extern__
262 OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF)
263 {
264 #pragma unused (maxEOF)
265
266 OSStatus retval, ret;
267 UInt64 actualBytesAdded, origSize;
268 UInt64 bytesToAdd;
269 u_int32_t startAllocation;
270 u_int32_t fileblocks;
271 BTreeInfoRec btInfo;
272 ExtendedVCB *vcb;
273 FCB *filePtr;
274 struct proc *p = NULL;
275 UInt64 trim = 0;
276
277 filePtr = GetFileControlBlock(vp);
278
279 if ( minEOF > filePtr->fcbEOF )
280 {
281 bytesToAdd = minEOF - filePtr->fcbEOF;
282
283 if (bytesToAdd < filePtr->ff_clumpsize)
284 bytesToAdd = filePtr->ff_clumpsize; //XXX why not always be a mutiple of clump size?
285 }
286 else
287 {
288 return -1;
289 }
290
291 vcb = VTOVCB(vp);
292
293 /*
294 * The Extents B-tree can't have overflow extents. ExtendFileC will
295 * return an error if an attempt is made to extend the Extents B-tree
296 * when the resident extents are exhausted.
297 */
298 /* XXX warning - this can leave the volume bitmap unprotected during ExtendFileC call */
299 if(VTOC(vp)->c_fileid != kHFSExtentsFileID)
300 {
301 p = current_proc();
302 /* lock extents b-tree (also protects volume bitmap) */
303 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, p);
304 if (retval)
305 return (retval);
306 }
307
308 (void) BTGetInformation(filePtr, 0, &btInfo);
309
310 #if 0 // XXXdbg
311 /*
312 * The b-tree code expects nodes to be contiguous. So when
313 * the allocation block size is less than the b-tree node
314 * size, we need to force disk allocations to be contiguous.
315 */
316 if (vcb->blockSize >= btInfo.nodeSize) {
317 extendFlags = 0;
318 } else {
319 /* Ensure that all b-tree nodes are contiguous on disk */
320 extendFlags = kEFContigMask;
321 }
322 #endif
323
324 origSize = filePtr->fcbEOF;
325 fileblocks = filePtr->ff_blocks;
326 startAllocation = vcb->nextAllocation;
327
328 // loop trying to get a contiguous chunk that's an integer multiple
329 // of the btree node size. if we can't get a contiguous chunk that
330 // is at least the node size then we break out of the loop and let
331 // the error propagate back up.
332 do {
333 retval = ExtendFileC(vcb, filePtr, bytesToAdd, 0,
334 kEFContigMask | kEFMetadataMask,
335 &actualBytesAdded);
336 if (retval == dskFulErr && actualBytesAdded == 0) {
337
338 if (bytesToAdd == btInfo.nodeSize || bytesToAdd < (minEOF - origSize)) {
339 // if we're here there's nothing else to try, we're out
340 // of space so we break and bail out.
341 break;
342 } else {
343 bytesToAdd >>= 1;
344 if (bytesToAdd < btInfo.nodeSize) {
345 bytesToAdd = btInfo.nodeSize;
346 } else if ((bytesToAdd % btInfo.nodeSize) != 0) {
347 // make sure it's an integer multiple of the nodeSize
348 bytesToAdd -= (bytesToAdd % btInfo.nodeSize);
349 }
350 }
351 }
352 } while (retval == dskFulErr && actualBytesAdded == 0);
353
354 /*
355 * If a new extent was added then move the roving allocator
356 * reference forward by the current b-tree file size so
357 * there's plenty of room to grow.
358 */
359 if ((retval == 0) &&
360 ((VCBTOHFS(vcb)->hfs_flags & HFS_METADATA_ZONE) == 0) &&
361 (vcb->nextAllocation > startAllocation) &&
362 ((vcb->nextAllocation + fileblocks) < vcb->totalBlocks)) {
363 vcb->nextAllocation += fileblocks;
364 }
365
366 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
367
368 // XXXdbg ExtendFileC() could have returned an error even though
369 // it grew the file to be big enough for our needs. If this is
370 // the case, we don't care about retval so we blow it away.
371 //
372 if (filePtr->fcbEOF >= minEOF && retval != 0) {
373 retval = 0;
374 }
375
376 // XXXdbg if the file grew but isn't large enough or isn't an
377 // even multiple of the nodeSize then trim things back. if
378 // the file isn't large enough we trim back to the original
379 // size. otherwise we trim back to be an even multiple of the
380 // btree node size.
381 //
382 if ((filePtr->fcbEOF < minEOF) || (actualBytesAdded % btInfo.nodeSize) != 0) {
383
384 if (filePtr->fcbEOF < minEOF) {
385 retval = dskFulErr;
386
387 if (filePtr->fcbEOF < origSize) {
388 panic("hfs: btree file eof %lld less than orig size %lld!\n",
389 filePtr->fcbEOF, origSize);
390 }
391
392 trim = filePtr->fcbEOF - origSize;
393 if (trim != actualBytesAdded) {
394 panic("hfs: trim == %lld but actualBytesAdded == %lld\n",
395 trim, actualBytesAdded);
396 }
397 } else {
398 trim = (actualBytesAdded % btInfo.nodeSize);
399 }
400
401 ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0);
402 filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
403
404 // XXXdbg - panic if the file didn't get trimmed back properly
405 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
406 panic("hfs: truncate file didn't! fcbEOF %lld nsize %d fcb 0x%x\n",
407 filePtr->fcbEOF, btInfo.nodeSize, filePtr);
408 }
409
410 if (ret) {
411 // XXXdbg - this probably doesn't need to be a panic()
412 panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %d)\n",
413 filePtr->fcbEOF, trim, ret);
414 return ret;
415 }
416 actualBytesAdded -= trim;
417 }
418
419 if(VTOC(vp)->c_fileid != kHFSExtentsFileID) {
420 /*
421 * Get any extents overflow b-tree changes to disk ASAP!
422 */
423 (void) BTFlushPath(VTOF(vcb->extentsRefNum));
424 (void) VOP_FSYNC(vcb->extentsRefNum, NOCRED, MNT_WAIT, p);
425
426 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
427 }
428
429 if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
430 panic("hfs: extendbtree: fcb 0x%x has eof 0x%llx not a multiple of 0x%x (trim %llx)\n",
431 filePtr, filePtr->fcbEOF, btInfo.nodeSize, trim);
432 }
433
434 /*
435 * Update the Alternate MDB or Alternate VolumeHeader
436 */
437 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
438 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
439 (VTOC(vp)->c_fileid == kHFSAttributesFileID)
440 ) {
441 MarkVCBDirty( vcb );
442 ret = hfs_flushvolumeheader(VCBTOHFS(vcb), MNT_WAIT, HFS_ALTFLUSH);
443 } else {
444 struct timeval tv = time;
445
446 VTOC(vp)->c_flag |= C_CHANGE | C_UPDATE;
447 (void) VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
448 }
449
450 ret = ClearBTNodes(vp, btInfo.nodeSize, filePtr->fcbEOF - actualBytesAdded, actualBytesAdded);
451 if (ret)
452 return (ret);
453
454 return retval;
455 }
456
457
458 /*
459 * Clear out (zero) new b-tree nodes on disk.
460 */
461 static int
462 ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount)
463 {
464 struct hfsmount *hfsmp = VTOHFS(vp);
465 struct buf *bp = NULL;
466 daddr_t blk;
467 daddr_t blkcnt;
468
469 blk = offset / blksize;
470 blkcnt = amount / blksize;
471
472 while (blkcnt > 0) {
473 bp = getblk(vp, blk, blksize, 0, 0, BLK_META);
474 if (bp == NULL)
475 continue;
476
477 // XXXdbg
478 if (hfsmp->jnl) {
479 // XXXdbg -- skipping this for now since it makes a transaction
480 // become *way* too large
481 //journal_modify_block_start(hfsmp->jnl, bp);
482 }
483
484 bzero((char *)bp->b_data, blksize);
485 bp->b_flags |= B_AGE;
486
487 // XXXdbg
488 if (hfsmp->jnl) {
489 // XXXdbg -- skipping this for now since it makes a transaction
490 // become *way* too large
491 //journal_modify_block_end(hfsmp->jnl, bp);
492
493 // XXXdbg - remove this once we decide what to do with the
494 // writes to the journal
495 if ((blk % 32) == 0)
496 VOP_BWRITE(bp);
497 else
498 bawrite(bp);
499 } else {
500 /* wait/yield every 32 blocks so we don't hog all the buffers */
501 if ((blk % 32) == 0)
502 VOP_BWRITE(bp);
503 else
504 bawrite(bp);
505 }
506 --blkcnt;
507 ++blk;
508 }
509
510 return (0);
511 }