]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_vfsutils.c
ca011c65253fd229b38beabf662e0c5efd4f419c
[apple/xnu.git] / bsd / hfs / hfs_vfsutils.c
1 /*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_vfsutils.c 4.0
29 *
30 * (c) 1997-2002 Apple Computer, Inc. All Rights Reserved
31 *
32 * hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
33 *
34 */
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/stat.h>
40 #include <sys/mount.h>
41 #include <sys/mount_internal.h>
42 #include <sys/buf.h>
43 #include <sys/buf_internal.h>
44 #include <sys/ubc.h>
45 #include <sys/unistd.h>
46 #include <sys/utfconv.h>
47 #include <sys/kauth.h>
48 #include <sys/fcntl.h>
49 #include <sys/fsctl.h>
50 #include <sys/vnode_internal.h>
51 #include <kern/clock.h>
52 #include <stdbool.h>
53
54 #include <libkern/OSAtomic.h>
55
56 /* for parsing boot-args */
57 #include <pexpert/pexpert.h>
58
59 #if CONFIG_PROTECT
60 #include <sys/cprotect.h>
61 #endif
62
63 #include "hfs.h"
64 #include "hfs_catalog.h"
65 #include "hfs_dbg.h"
66 #include "hfs_mount.h"
67 #include "hfs_endian.h"
68 #include "hfs_cnode.h"
69 #include "hfs_fsctl.h"
70
71 #include "hfscommon/headers/FileMgrInternal.h"
72 #include "hfscommon/headers/BTreesInternal.h"
73 #include "hfscommon/headers/HFSUnicodeWrappers.h"
74
75 /* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */
76 extern int hfs_resize_debug;
77
78 static void ReleaseMetaFileVNode(struct vnode *vp);
79 static int hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
80
81 static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
82 static void hfs_thaw_locked(struct hfsmount *hfsmp);
83
84 #define HFS_MOUNT_DEBUG 1
85
86
87 //*******************************************************************************
88 // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
89 // hence are not in the right byte order on little endian machines. It is
90 // the responsibility of the finder and other clients to swap the data.
91 //*******************************************************************************
92
93 //*******************************************************************************
94 // Routine: hfs_MountHFSVolume
95 //
96 //
97 //*******************************************************************************
98 unsigned char hfs_catname[] = "Catalog B-tree";
99 unsigned char hfs_extname[] = "Extents B-tree";
100 unsigned char hfs_vbmname[] = "Volume Bitmap";
101 unsigned char hfs_attrname[] = "Attribute B-tree";
102 unsigned char hfs_startupname[] = "Startup File";
103
104 #if CONFIG_HFS_STD
105 OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
106 __unused struct proc *p)
107 {
108 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
109 int error;
110 ByteCount utf8chars;
111 struct cat_desc cndesc;
112 struct cat_attr cnattr;
113 struct cat_fork fork;
114 int newvnode_flags = 0;
115
116 /* Block size must be a multiple of 512 */
117 if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
118 (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
119 return (EINVAL);
120
121 /* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
122 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
123 ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
124 return (EINVAL);
125 }
126 hfsmp->hfs_flags |= HFS_STANDARD;
127 /*
128 * The MDB seems OK: transfer info from it into VCB
129 * Note - the VCB starts out clear (all zeros)
130 *
131 */
132 vcb->vcbSigWord = SWAP_BE16 (mdb->drSigWord);
133 vcb->hfs_itime = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
134 vcb->localCreateDate = SWAP_BE32 (mdb->drCrDate);
135 vcb->vcbLsMod = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
136 vcb->vcbAtrb = SWAP_BE16 (mdb->drAtrb);
137 vcb->vcbNmFls = SWAP_BE16 (mdb->drNmFls);
138 vcb->vcbVBMSt = SWAP_BE16 (mdb->drVBMSt);
139 vcb->nextAllocation = SWAP_BE16 (mdb->drAllocPtr);
140 vcb->totalBlocks = SWAP_BE16 (mdb->drNmAlBlks);
141 vcb->allocLimit = vcb->totalBlocks;
142 vcb->blockSize = SWAP_BE32 (mdb->drAlBlkSiz);
143 vcb->vcbClpSiz = SWAP_BE32 (mdb->drClpSiz);
144 vcb->vcbAlBlSt = SWAP_BE16 (mdb->drAlBlSt);
145 vcb->vcbNxtCNID = SWAP_BE32 (mdb->drNxtCNID);
146 vcb->freeBlocks = SWAP_BE16 (mdb->drFreeBks);
147 vcb->vcbVolBkUp = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
148 vcb->vcbWrCnt = SWAP_BE32 (mdb->drWrCnt);
149 vcb->vcbNmRtDirs = SWAP_BE16 (mdb->drNmRtDirs);
150 vcb->vcbFilCnt = SWAP_BE32 (mdb->drFilCnt);
151 vcb->vcbDirCnt = SWAP_BE32 (mdb->drDirCnt);
152 bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
153 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
154 vcb->vcbWrCnt++; /* Compensate for write of MDB on last flush */
155
156 /* convert hfs encoded name into UTF-8 string */
157 error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
158 /*
159 * When an HFS name cannot be encoded with the current
160 * volume encoding we use MacRoman as a fallback.
161 */
162 if (error || (utf8chars == 0)) {
163 error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
164 /* If we fail to encode to UTF8 from Mac Roman, the name is bad. Deny the mount */
165 if (error) {
166 goto MtVolErr;
167 }
168 }
169
170 hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
171 vcb->vcbVBMIOSize = kHFSBlockSize;
172
173 /* Generate the partition-based AVH location */
174 hfsmp->hfs_partition_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
175 hfsmp->hfs_logical_block_count);
176
177 /* HFS standard is read-only, so just stuff the FS location in here, too */
178 hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
179
180 bzero(&cndesc, sizeof(cndesc));
181 cndesc.cd_parentcnid = kHFSRootParentID;
182 cndesc.cd_flags |= CD_ISMETA;
183 bzero(&cnattr, sizeof(cnattr));
184 cnattr.ca_linkcount = 1;
185 cnattr.ca_mode = S_IFREG;
186 bzero(&fork, sizeof(fork));
187
188 /*
189 * Set up Extents B-tree vnode
190 */
191 cndesc.cd_nameptr = hfs_extname;
192 cndesc.cd_namelen = strlen((char *)hfs_extname);
193 cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
194 fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
195 fork.cf_blocks = fork.cf_size / vcb->blockSize;
196 fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
197 fork.cf_vblocks = 0;
198 fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
199 fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
200 fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
201 fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
202 fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
203 fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
204 cnattr.ca_blocks = fork.cf_blocks;
205
206 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
207 &hfsmp->hfs_extents_vp, &newvnode_flags);
208 if (error) {
209 if (HFS_MOUNT_DEBUG) {
210 printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
211 }
212 goto MtVolErr;
213 }
214 error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
215 (KeyCompareProcPtr)CompareExtentKeys));
216 if (error) {
217 if (HFS_MOUNT_DEBUG) {
218 printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
219 }
220 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
221 goto MtVolErr;
222 }
223 hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
224
225 /*
226 * Set up Catalog B-tree vnode...
227 */
228 cndesc.cd_nameptr = hfs_catname;
229 cndesc.cd_namelen = strlen((char *)hfs_catname);
230 cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
231 fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
232 fork.cf_blocks = fork.cf_size / vcb->blockSize;
233 fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
234 fork.cf_vblocks = 0;
235 fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
236 fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
237 fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
238 fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
239 fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
240 fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
241 cnattr.ca_blocks = fork.cf_blocks;
242
243 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
244 &hfsmp->hfs_catalog_vp, &newvnode_flags);
245 if (error) {
246 if (HFS_MOUNT_DEBUG) {
247 printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
248 }
249 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
250 goto MtVolErr;
251 }
252 error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
253 (KeyCompareProcPtr)CompareCatalogKeys));
254 if (error) {
255 if (HFS_MOUNT_DEBUG) {
256 printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
257 }
258 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
259 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
260 goto MtVolErr;
261 }
262 hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
263
264 /*
265 * Set up dummy Allocation file vnode (used only for locking bitmap)
266 */
267 cndesc.cd_nameptr = hfs_vbmname;
268 cndesc.cd_namelen = strlen((char *)hfs_vbmname);
269 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
270 bzero(&fork, sizeof(fork));
271 cnattr.ca_blocks = 0;
272
273 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
274 &hfsmp->hfs_allocation_vp, &newvnode_flags);
275 if (error) {
276 if (HFS_MOUNT_DEBUG) {
277 printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
278 }
279 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
280 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
281 goto MtVolErr;
282 }
283 hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
284
285 /* mark the volume dirty (clear clean unmount bit) */
286 vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
287
288 if (error == noErr) {
289 error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL);
290 if (HFS_MOUNT_DEBUG) {
291 printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
292 }
293 }
294
295 if (error == noErr) {
296 /* If the disk isn't write protected.. */
297 if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
298 MarkVCBDirty (vcb); // mark VCB dirty so it will be written
299 }
300 }
301
302 /*
303 * all done with system files so we can unlock now...
304 */
305 hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
306 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
307 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
308
309 if (error == noErr) {
310 /* If successful, then we can just return once we've unlocked the cnodes */
311 return error;
312 }
313
314 //-- Release any resources allocated so far before exiting with an error:
315 MtVolErr:
316 hfsUnmount(hfsmp, NULL);
317
318 return (error);
319 }
320
321 #endif
322
323 //*******************************************************************************
324 //
325 // Sanity check Volume Header Block:
326 // Input argument *vhp is a pointer to a HFSPlusVolumeHeader block that has
327 // not been endian-swapped and represents the on-disk contents of this sector.
328 // This routine will not change the endianness of vhp block.
329 //
330 //*******************************************************************************
331 OSErr hfs_ValidateHFSPlusVolumeHeader(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp)
332 {
333 u_int16_t signature;
334 u_int16_t hfs_version;
335 u_int32_t blockSize;
336
337 signature = SWAP_BE16(vhp->signature);
338 hfs_version = SWAP_BE16(vhp->version);
339
340 if (signature == kHFSPlusSigWord) {
341 if (hfs_version != kHFSPlusVersion) {
342 printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFS+ version: %x\n", hfs_version);
343 return (EINVAL);
344 }
345 } else if (signature == kHFSXSigWord) {
346 if (hfs_version != kHFSXVersion) {
347 printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFSX version: %x\n", hfs_version);
348 return (EINVAL);
349 }
350 } else {
351 /* Removed printf for invalid HFS+ signature because it gives
352 * false error for UFS root volume
353 */
354 if (HFS_MOUNT_DEBUG) {
355 printf("hfs_ValidateHFSPlusVolumeHeader: unknown Volume Signature : %x\n", signature);
356 }
357 return (EINVAL);
358 }
359
360 /* Block size must be at least 512 and a power of 2 */
361 blockSize = SWAP_BE32(vhp->blockSize);
362 if (blockSize < 512 || !powerof2(blockSize)) {
363 if (HFS_MOUNT_DEBUG) {
364 printf("hfs_ValidateHFSPlusVolumeHeader: invalid blocksize (%d) \n", blockSize);
365 }
366 return (EINVAL);
367 }
368
369 if (blockSize < hfsmp->hfs_logical_block_size) {
370 if (HFS_MOUNT_DEBUG) {
371 printf("hfs_ValidateHFSPlusVolumeHeader: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n",
372 blockSize, hfsmp->hfs_logical_block_size);
373 }
374 return (EINVAL);
375 }
376 return 0;
377 }
378
379 //*******************************************************************************
380 // Routine: hfs_MountHFSPlusVolume
381 //
382 //
383 //*******************************************************************************
384
385 OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
386 off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
387 {
388 register ExtendedVCB *vcb;
389 struct cat_desc cndesc;
390 struct cat_attr cnattr;
391 struct cat_fork cfork;
392 u_int32_t blockSize;
393 daddr64_t spare_sectors;
394 struct BTreeInfoRec btinfo;
395 u_int16_t signature;
396 u_int16_t hfs_version;
397 int newvnode_flags = 0;
398 int i;
399 OSErr retval;
400 char converted_volname[256];
401 size_t volname_length = 0;
402 size_t conv_volname_length = 0;
403
404 signature = SWAP_BE16(vhp->signature);
405 hfs_version = SWAP_BE16(vhp->version);
406
407 retval = hfs_ValidateHFSPlusVolumeHeader(hfsmp, vhp);
408 if (retval)
409 return retval;
410
411 if (signature == kHFSXSigWord) {
412 /* The in-memory signature is always 'H+'. */
413 signature = kHFSPlusSigWord;
414 hfsmp->hfs_flags |= HFS_X;
415 }
416
417 blockSize = SWAP_BE32(vhp->blockSize);
418 /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
419 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
420 (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
421 if (HFS_MOUNT_DEBUG) {
422 printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
423 }
424 return (EINVAL);
425 }
426
427 /* Make sure we can live with the physical block size. */
428 if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
429 (embeddedOffset & (hfsmp->hfs_logical_block_size - 1))) {
430 if (HFS_MOUNT_DEBUG) {
431 printf("hfs_mounthfsplus: hfs_logical_blocksize (%d) \n",
432 hfsmp->hfs_logical_block_size);
433 }
434 return (ENXIO);
435 }
436
437 /*
438 * If allocation block size is less than the physical block size,
439 * same data could be cached in two places and leads to corruption.
440 *
441 * HFS Plus reserves one allocation block for the Volume Header.
442 * If the physical size is larger, then when we read the volume header,
443 * we will also end up reading in the next allocation block(s).
444 * If those other allocation block(s) is/are modified, and then the volume
445 * header is modified, the write of the volume header's buffer will write
446 * out the old contents of the other allocation blocks.
447 *
448 * We assume that the physical block size is same as logical block size.
449 * The physical block size value is used to round down the offsets for
450 * reading and writing the primary and alternate volume headers.
451 *
452 * The same logic to ensure good hfs_physical_block_size is also in
453 * hfs_mountfs so that hfs_mountfs, hfs_MountHFSPlusVolume and
454 * later are doing the I/Os using same block size.
455 */
456 if (blockSize < hfsmp->hfs_physical_block_size) {
457 hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
458 hfsmp->hfs_log_per_phys = 1;
459 }
460
461 /*
462 * The VolumeHeader seems OK: transfer info from it into VCB
463 * Note - the VCB starts out clear (all zeros)
464 */
465 vcb = HFSTOVCB(hfsmp);
466
467 vcb->vcbSigWord = signature;
468 vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
469 vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate));
470 vcb->vcbAtrb = SWAP_BE32(vhp->attributes);
471 vcb->vcbClpSiz = SWAP_BE32(vhp->rsrcClumpSize);
472 vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID);
473 vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
474 vcb->vcbWrCnt = SWAP_BE32(vhp->writeCount);
475 vcb->vcbFilCnt = SWAP_BE32(vhp->fileCount);
476 vcb->vcbDirCnt = SWAP_BE32(vhp->folderCount);
477
478 /* copy 32 bytes of Finder info */
479 bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
480
481 vcb->vcbAlBlSt = 0; /* hfs+ allocation blocks start at first block of volume */
482 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
483 vcb->vcbWrCnt++; /* compensate for write of Volume Header on last flush */
484
485 /* Now fill in the Extended VCB info */
486 vcb->nextAllocation = SWAP_BE32(vhp->nextAllocation);
487 vcb->totalBlocks = SWAP_BE32(vhp->totalBlocks);
488 vcb->allocLimit = vcb->totalBlocks;
489 vcb->freeBlocks = SWAP_BE32(vhp->freeBlocks);
490 vcb->blockSize = blockSize;
491 vcb->encodingsBitmap = SWAP_BE64(vhp->encodingsBitmap);
492 vcb->localCreateDate = SWAP_BE32(vhp->createDate);
493
494 vcb->hfsPlusIOPosOffset = embeddedOffset;
495
496 /* Default to no free block reserve */
497 vcb->reserveBlocks = 0;
498
499 /*
500 * Update the logical block size in the mount struct
501 * (currently set up from the wrapper MDB) using the
502 * new blocksize value:
503 */
504 hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
505 vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
506
507 /*
508 * Validate and initialize the location of the alternate volume header.
509 *
510 * Note that there may be spare sectors beyond the end of the filesystem that still
511 * belong to our partition.
512 */
513
514 spare_sectors = hfsmp->hfs_logical_block_count -
515 (((daddr64_t)vcb->totalBlocks * blockSize) /
516 hfsmp->hfs_logical_block_size);
517
518 /*
519 * Differentiate between "innocuous" spare sectors and the more unusual
520 * degenerate case:
521 *
522 * *** Innocuous spare sectors exist if:
523 *
524 * A) the number of bytes assigned to the partition (by multiplying logical
525 * block size * logical block count) is greater than the filesystem size
526 * (by multiplying allocation block count and allocation block size)
527 *
528 * and
529 *
530 * B) the remainder is less than the size of a full allocation block's worth of bytes.
531 *
532 * This handles the normal case where there may be a few extra sectors, but the two
533 * are fundamentally in sync.
534 *
535 * *** Degenerate spare sectors exist if:
536 * A) The number of bytes assigned to the partition (by multiplying logical
537 * block size * logical block count) is greater than the filesystem size
538 * (by multiplying allocation block count and block size).
539 *
540 * and
541 *
542 * B) the remainder is greater than a full allocation's block worth of bytes.
543 * In this case, a smaller file system exists in a larger partition.
544 * This can happen in various ways, including when volume is resized but the
545 * partition is yet to be resized. Under this condition, we have to assume that
546 * a partition management software may resize the partition to match
547 * the file system size in the future. Therefore we should update
548 * alternate volume header at two locations on the disk,
549 * a. 1024 bytes before end of the partition
550 * b. 1024 bytes before end of the file system
551 */
552
553 if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
554 /*
555 * Handle the degenerate case above. FS < partition size.
556 * AVH located at 1024 bytes from the end of the partition
557 */
558 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
559 HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
560
561 /* AVH located at 1024 bytes from the end of the filesystem */
562 hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
563 HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
564 (((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size));
565 }
566 else {
567 /* Innocuous spare sectors; Partition & FS notion are in sync */
568 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
569 HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
570
571 hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
572 }
573 if (hfs_resize_debug) {
574 printf ("hfs_MountHFSPlusVolume: partition_avh_sector=%qu, fs_avh_sector=%qu\n",
575 hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
576 }
577
578 bzero(&cndesc, sizeof(cndesc));
579 cndesc.cd_parentcnid = kHFSRootParentID;
580 cndesc.cd_flags |= CD_ISMETA;
581 bzero(&cnattr, sizeof(cnattr));
582 cnattr.ca_linkcount = 1;
583 cnattr.ca_mode = S_IFREG;
584
585 /*
586 * Set up Extents B-tree vnode
587 */
588 cndesc.cd_nameptr = hfs_extname;
589 cndesc.cd_namelen = strlen((char *)hfs_extname);
590 cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
591
592 cfork.cf_size = SWAP_BE64 (vhp->extentsFile.logicalSize);
593 cfork.cf_new_size= 0;
594 cfork.cf_clump = SWAP_BE32 (vhp->extentsFile.clumpSize);
595 cfork.cf_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks);
596 cfork.cf_vblocks = 0;
597 cnattr.ca_blocks = cfork.cf_blocks;
598 for (i = 0; i < kHFSPlusExtentDensity; i++) {
599 cfork.cf_extents[i].startBlock =
600 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
601 cfork.cf_extents[i].blockCount =
602 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
603 }
604 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
605 &hfsmp->hfs_extents_vp, &newvnode_flags);
606 if (retval)
607 {
608 if (HFS_MOUNT_DEBUG) {
609 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
610 }
611 goto ErrorExit;
612 }
613 hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
614 hfs_unlock(hfsmp->hfs_extents_cp);
615
616 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
617 (KeyCompareProcPtr) CompareExtentKeysPlus));
618 if (retval)
619 {
620 if (HFS_MOUNT_DEBUG) {
621 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
622 }
623 goto ErrorExit;
624 }
625 /*
626 * Set up Catalog B-tree vnode
627 */
628 cndesc.cd_nameptr = hfs_catname;
629 cndesc.cd_namelen = strlen((char *)hfs_catname);
630 cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
631
632 cfork.cf_size = SWAP_BE64 (vhp->catalogFile.logicalSize);
633 cfork.cf_clump = SWAP_BE32 (vhp->catalogFile.clumpSize);
634 cfork.cf_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks);
635 cfork.cf_vblocks = 0;
636 cnattr.ca_blocks = cfork.cf_blocks;
637 for (i = 0; i < kHFSPlusExtentDensity; i++) {
638 cfork.cf_extents[i].startBlock =
639 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
640 cfork.cf_extents[i].blockCount =
641 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
642 }
643 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
644 &hfsmp->hfs_catalog_vp, &newvnode_flags);
645 if (retval) {
646 if (HFS_MOUNT_DEBUG) {
647 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
648 }
649 goto ErrorExit;
650 }
651 hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
652 hfs_unlock(hfsmp->hfs_catalog_cp);
653
654 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
655 (KeyCompareProcPtr) CompareExtendedCatalogKeys));
656 if (retval) {
657 if (HFS_MOUNT_DEBUG) {
658 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
659 }
660 goto ErrorExit;
661 }
662 if ((hfsmp->hfs_flags & HFS_X) &&
663 BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
664 if (btinfo.keyCompareType == kHFSBinaryCompare) {
665 hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
666 /* Install a case-sensitive key compare */
667 (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
668 (KeyCompareProcPtr)cat_binarykeycompare);
669 }
670 }
671
672 /*
673 * Set up Allocation file vnode
674 */
675 cndesc.cd_nameptr = hfs_vbmname;
676 cndesc.cd_namelen = strlen((char *)hfs_vbmname);
677 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
678
679 cfork.cf_size = SWAP_BE64 (vhp->allocationFile.logicalSize);
680 cfork.cf_clump = SWAP_BE32 (vhp->allocationFile.clumpSize);
681 cfork.cf_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks);
682 cfork.cf_vblocks = 0;
683 cnattr.ca_blocks = cfork.cf_blocks;
684 for (i = 0; i < kHFSPlusExtentDensity; i++) {
685 cfork.cf_extents[i].startBlock =
686 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
687 cfork.cf_extents[i].blockCount =
688 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
689 }
690 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
691 &hfsmp->hfs_allocation_vp, &newvnode_flags);
692 if (retval) {
693 if (HFS_MOUNT_DEBUG) {
694 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
695 }
696 goto ErrorExit;
697 }
698 hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
699 hfs_unlock(hfsmp->hfs_allocation_cp);
700
701 /*
702 * Set up Attribute B-tree vnode
703 */
704 if (vhp->attributesFile.totalBlocks != 0) {
705 cndesc.cd_nameptr = hfs_attrname;
706 cndesc.cd_namelen = strlen((char *)hfs_attrname);
707 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
708
709 cfork.cf_size = SWAP_BE64 (vhp->attributesFile.logicalSize);
710 cfork.cf_clump = SWAP_BE32 (vhp->attributesFile.clumpSize);
711 cfork.cf_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks);
712 cfork.cf_vblocks = 0;
713 cnattr.ca_blocks = cfork.cf_blocks;
714 for (i = 0; i < kHFSPlusExtentDensity; i++) {
715 cfork.cf_extents[i].startBlock =
716 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
717 cfork.cf_extents[i].blockCount =
718 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
719 }
720 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
721 &hfsmp->hfs_attribute_vp, &newvnode_flags);
722 if (retval) {
723 if (HFS_MOUNT_DEBUG) {
724 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
725 }
726 goto ErrorExit;
727 }
728 hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
729 hfs_unlock(hfsmp->hfs_attribute_cp);
730 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
731 (KeyCompareProcPtr) hfs_attrkeycompare));
732 if (retval) {
733 if (HFS_MOUNT_DEBUG) {
734 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
735 }
736 goto ErrorExit;
737 }
738
739 /* Initialize vnode for virtual attribute data file that spans the
740 * entire file system space for performing I/O to attribute btree
741 * We hold iocount on the attrdata vnode for the entire duration
742 * of mount (similar to btree vnodes)
743 */
744 retval = init_attrdata_vnode(hfsmp);
745 if (retval) {
746 if (HFS_MOUNT_DEBUG) {
747 printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
748 }
749 goto ErrorExit;
750 }
751 }
752
753 /*
754 * Set up Startup file vnode
755 */
756 if (vhp->startupFile.totalBlocks != 0) {
757 cndesc.cd_nameptr = hfs_startupname;
758 cndesc.cd_namelen = strlen((char *)hfs_startupname);
759 cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
760
761 cfork.cf_size = SWAP_BE64 (vhp->startupFile.logicalSize);
762 cfork.cf_clump = SWAP_BE32 (vhp->startupFile.clumpSize);
763 cfork.cf_blocks = SWAP_BE32 (vhp->startupFile.totalBlocks);
764 cfork.cf_vblocks = 0;
765 cnattr.ca_blocks = cfork.cf_blocks;
766 for (i = 0; i < kHFSPlusExtentDensity; i++) {
767 cfork.cf_extents[i].startBlock =
768 SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
769 cfork.cf_extents[i].blockCount =
770 SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
771 }
772 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
773 &hfsmp->hfs_startup_vp, &newvnode_flags);
774 if (retval) {
775 if (HFS_MOUNT_DEBUG) {
776 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
777 }
778 goto ErrorExit;
779 }
780 hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
781 hfs_unlock(hfsmp->hfs_startup_cp);
782 }
783
784 /*
785 * Pick up volume name and create date
786 *
787 * Acquiring the volume name should not manipulate the bitmap, only the catalog
788 * btree and possibly the extents overflow b-tree.
789 */
790 retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL);
791 if (retval) {
792 if (HFS_MOUNT_DEBUG) {
793 printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
794 }
795 goto ErrorExit;
796 }
797 vcb->hfs_itime = cnattr.ca_itime;
798 vcb->volumeNameEncodingHint = cndesc.cd_encoding;
799 bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
800 volname_length = strlen ((const char*)vcb->vcbVN);
801 cat_releasedesc(&cndesc);
802
803 #define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
804
805
806 /* Send the volume name down to CoreStorage if necessary */
807 retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
808 if (retval == 0) {
809 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
810 }
811
812 /* reset retval == 0. we don't care about errors in volname conversion */
813 retval = 0;
814
815
816 /*
817 * We now always initiate a full bitmap scan even if the volume is read-only because this is
818 * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily
819 * expects. TRIMs will not be delivered to the underlying media if the volume is not
820 * read-write though.
821 */
822 thread_t allocator_scanner;
823 hfsmp->scan_var = 0;
824
825 /* Take the HFS mount mutex and wait on scan_var */
826 hfs_lock_mount (hfsmp);
827
828 kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner);
829 /* Wait until it registers that it's got the appropriate locks */
830 while ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) == 0) {
831 (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, (PDROP | PINOD), "hfs_scan_blocks", 0);
832 if (hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) {
833 break;
834 }
835 else {
836 hfs_lock_mount (hfsmp);
837 }
838 }
839
840 thread_deallocate (allocator_scanner);
841
842 /* mark the volume dirty (clear clean unmount bit) */
843 vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
844 if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
845 hfs_flushvolumeheader(hfsmp, TRUE, 0);
846 }
847
848 /* kHFSHasFolderCount is only supported/updated on HFSX volumes */
849 if ((hfsmp->hfs_flags & HFS_X) != 0) {
850 hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
851 }
852
853 //
854 // Check if we need to do late journal initialization. This only
855 // happens if a previous version of MacOS X (or 9) touched the disk.
856 // In that case hfs_late_journal_init() will go re-locate the journal
857 // and journal_info_block files and validate that they're still kosher.
858 //
859 if ( (vcb->vcbAtrb & kHFSVolumeJournaledMask)
860 && (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
861 && (hfsmp->jnl == NULL)) {
862
863 retval = hfs_late_journal_init(hfsmp, vhp, args);
864 if (retval != 0) {
865 if (retval == EROFS) {
866 // EROFS is a special error code that means the volume has an external
867 // journal which we couldn't find. in that case we do not want to
868 // rewrite the volume header - we'll just refuse to mount the volume.
869 if (HFS_MOUNT_DEBUG) {
870 printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
871 }
872 retval = EINVAL;
873 goto ErrorExit;
874 }
875
876 hfsmp->jnl = NULL;
877
878 // if the journal failed to open, then set the lastMountedVersion
879 // to be "FSK!" which fsck_hfs will see and force the fsck instead
880 // of just bailing out because the volume is journaled.
881 if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
882 HFSPlusVolumeHeader *jvhp;
883 daddr64_t mdb_offset;
884 struct buf *bp = NULL;
885
886 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
887
888 mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
889
890 bp = NULL;
891 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
892 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
893 hfsmp->hfs_physical_block_size, cred, &bp);
894 if (retval == 0) {
895 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
896
897 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
898 printf ("hfs(3): Journal replay fail. Writing lastMountVersion as FSK!\n");
899 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
900 buf_bwrite(bp);
901 } else {
902 buf_brelse(bp);
903 }
904 bp = NULL;
905 } else if (bp) {
906 buf_brelse(bp);
907 // clear this so the error exit path won't try to use it
908 bp = NULL;
909 }
910 }
911
912 if (HFS_MOUNT_DEBUG) {
913 printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
914 }
915 retval = EINVAL;
916 goto ErrorExit;
917 } else if (hfsmp->jnl) {
918 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
919 }
920 } else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
921 struct cat_attr jinfo_attr, jnl_attr;
922
923 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
924 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
925 }
926
927 // if we're here we need to fill in the fileid's for the
928 // journal and journal_info_block.
929 hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
930 hfsmp->hfs_jnlfileid = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
931 if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
932 printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
933 printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
934 }
935
936 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
937 vcb->vcbAtrb |= kHFSVolumeJournaledMask;
938 }
939
940 if (hfsmp->jnl == NULL) {
941 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
942 }
943 }
944
945 if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected
946 {
947 MarkVCBDirty( vcb ); // mark VCB dirty so it will be written
948 }
949
950 /*
951 * Distinguish 3 potential cases involving content protection:
952 * 1. mount point bit set; vcbAtrb does not support it. Fail.
953 * 2. mount point bit set; vcbattrb supports it. we're good.
954 * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good.
955 */
956 if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
957 /* Does the mount point support it ? */
958 if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) {
959 /* Case 1 above */
960 retval = EINVAL;
961 goto ErrorExit;
962 }
963 }
964 else {
965 /* not requested in the mount point. Is it in FS? */
966 if (vcb->vcbAtrb & kHFSContentProtectionMask) {
967 /* Case 3 above */
968 vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
969 }
970 }
971
972 /* At this point, if the mount point flag is set, we can enable it. */
973 if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
974 /* Cases 2+3 above */
975 #if CONFIG_PROTECT
976 /* Get the EAs as needed. */
977 int cperr = 0;
978 uint16_t majorversion;
979 uint16_t minorversion;
980 uint64_t flags;
981 uint8_t cryptogen = 0;
982 struct cp_root_xattr *xattr = NULL;
983 MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK);
984 if (xattr == NULL) {
985 retval = ENOMEM;
986 goto ErrorExit;
987 }
988 bzero (xattr, sizeof(struct cp_root_xattr));
989
990 /* go get the EA to get the version information */
991 cperr = cp_getrootxattr (hfsmp, xattr);
992 /*
993 * If there was no EA there, then write one out.
994 * Assuming EA is not present on the root means
995 * this is an erase install or a very old FS
996 */
997
998 if (cperr == 0) {
999 /* Have to run a valid CP version. */
1000 if ((xattr->major_version < CP_PREV_MAJOR_VERS) || (xattr->major_version > CP_NEW_MAJOR_VERS)) {
1001 cperr = EINVAL;
1002 }
1003 }
1004 else if (cperr == ENOATTR) {
1005 printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS);
1006 bzero(xattr, sizeof(struct cp_root_xattr));
1007 xattr->major_version = CP_NEW_MAJOR_VERS;
1008 xattr->minor_version = CP_MINOR_VERS;
1009 cperr = cp_setrootxattr (hfsmp, xattr);
1010 }
1011 majorversion = xattr->major_version;
1012 minorversion = xattr->minor_version;
1013 flags = xattr->flags;
1014 if (xattr->flags & CP_ROOT_CRYPTOG1) {
1015 cryptogen = 1;
1016 }
1017
1018 if (xattr) {
1019 FREE(xattr, M_TEMP);
1020 }
1021
1022 /* Recheck for good status */
1023 if (cperr == 0) {
1024 /* If we got here, then the CP version is valid. Set it in the mount point */
1025 hfsmp->hfs_running_cp_major_vers = majorversion;
1026 printf("Running with CP root xattr: %d.%d\n", majorversion, minorversion);
1027 hfsmp->cproot_flags = flags;
1028 hfsmp->cp_crypto_generation = cryptogen;
1029
1030 /*
1031 * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree.
1032 * Ensure that the boot-arg's value is valid for FILES (not directories),
1033 * since only files are actually protected for now.
1034 */
1035
1036 PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
1037
1038 if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
1039 PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
1040 }
1041
1042 if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
1043 hfsmp->default_cp_class = PROTECTION_CLASS_C;
1044 }
1045 }
1046 else {
1047 retval = EPERM;
1048 goto ErrorExit;
1049 }
1050 #else
1051 /* If CONFIG_PROTECT not built, ignore CP */
1052 vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT);
1053 #endif
1054 }
1055
1056 /*
1057 * Establish a metadata allocation zone.
1058 */
1059 hfs_metadatazone_init(hfsmp, false);
1060
1061 /*
1062 * Make any metadata zone adjustments.
1063 */
1064 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
1065 /* Keep the roving allocator out of the metadata zone. */
1066 if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
1067 vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
1068 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1069 }
1070 } else {
1071 if (vcb->nextAllocation <= 1) {
1072 vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
1073 }
1074 }
1075 vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
1076
1077 /* Setup private/hidden directories for hardlinks. */
1078 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
1079 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
1080
1081 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1082 hfs_remove_orphans(hfsmp);
1083
1084 /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
1085 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1086 {
1087 retval = hfs_erase_unused_nodes(hfsmp);
1088 if (retval) {
1089 if (HFS_MOUNT_DEBUG) {
1090 printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
1091 }
1092
1093 goto ErrorExit;
1094 }
1095 }
1096
1097 /*
1098 * Allow hot file clustering if conditions allow.
1099 */
1100 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
1101 ((hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_SSD)) == 0)) {
1102 (void) hfs_recording_init(hfsmp);
1103 }
1104
1105 /* Force ACLs on HFS+ file systems. */
1106 vfs_setextendedsecurity(HFSTOVFS(hfsmp));
1107
1108 /* Enable extent-based extended attributes by default */
1109 hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
1110
1111 return (0);
1112
1113 ErrorExit:
1114 /*
1115 * A fatal error occurred and the volume cannot be mounted, so
1116 * release any resources that we acquired...
1117 */
1118 hfsUnmount(hfsmp, NULL);
1119
1120 if (HFS_MOUNT_DEBUG) {
1121 printf("hfs_mounthfsplus: encountered error (%d)\n", retval);
1122 }
1123 return (retval);
1124 }
1125
1126
1127 /*
1128 * ReleaseMetaFileVNode
1129 *
1130 * vp L - -
1131 */
1132 static void ReleaseMetaFileVNode(struct vnode *vp)
1133 {
1134 struct filefork *fp;
1135
1136 if (vp && (fp = VTOF(vp))) {
1137 if (fp->fcbBTCBPtr != NULL) {
1138 (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1139 (void) BTClosePath(fp);
1140 hfs_unlock(VTOC(vp));
1141 }
1142
1143 /* release the node even if BTClosePath fails */
1144 vnode_recycle(vp);
1145 vnode_put(vp);
1146 }
1147 }
1148
1149
1150 /*************************************************************
1151 *
1152 * Unmounts a hfs volume.
1153 * At this point vflush() has been called (to dump all non-metadata files)
1154 *
1155 *************************************************************/
1156
1157 int
1158 hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
1159 {
1160 /* Get rid of our attribute data vnode (if any). This is done
1161 * after the vflush() during mount, so we don't need to worry
1162 * about any locks.
1163 */
1164 if (hfsmp->hfs_attrdata_vp) {
1165 ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
1166 hfsmp->hfs_attrdata_vp = NULLVP;
1167 }
1168
1169 if (hfsmp->hfs_startup_vp) {
1170 ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
1171 hfsmp->hfs_startup_cp = NULL;
1172 hfsmp->hfs_startup_vp = NULL;
1173 }
1174
1175 if (hfsmp->hfs_attribute_vp) {
1176 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
1177 hfsmp->hfs_attribute_cp = NULL;
1178 hfsmp->hfs_attribute_vp = NULL;
1179 }
1180
1181 if (hfsmp->hfs_catalog_vp) {
1182 ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
1183 hfsmp->hfs_catalog_cp = NULL;
1184 hfsmp->hfs_catalog_vp = NULL;
1185 }
1186
1187 if (hfsmp->hfs_extents_vp) {
1188 ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
1189 hfsmp->hfs_extents_cp = NULL;
1190 hfsmp->hfs_extents_vp = NULL;
1191 }
1192
1193 if (hfsmp->hfs_allocation_vp) {
1194 ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
1195 hfsmp->hfs_allocation_cp = NULL;
1196 hfsmp->hfs_allocation_vp = NULL;
1197 }
1198
1199 return (0);
1200 }
1201
1202
1203 /*
1204 * Test if fork has overflow extents.
1205 *
1206 * Returns:
1207 * non-zero - overflow extents exist
1208 * zero - overflow extents do not exist
1209 */
1210 __private_extern__
1211 bool overflow_extents(struct filefork *fp)
1212 {
1213 u_int32_t blocks;
1214
1215 //
1216 // If the vnode pointer is NULL then we're being called
1217 // from hfs_remove_orphans() with a faked-up filefork
1218 // and therefore it has to be an HFS+ volume. Otherwise
1219 // we check through the volume header to see what type
1220 // of volume we're on.
1221 //
1222
1223 #if CONFIG_HFS_STD
1224 if (FTOV(fp) && VTOVCB(FTOV(fp))->vcbSigWord == kHFSSigWord) {
1225 if (fp->ff_extents[2].blockCount == 0)
1226 return false;
1227
1228 blocks = fp->ff_extents[0].blockCount +
1229 fp->ff_extents[1].blockCount +
1230 fp->ff_extents[2].blockCount;
1231
1232 return fp->ff_blocks > blocks;
1233 }
1234 #endif
1235
1236 if (fp->ff_extents[7].blockCount == 0)
1237 return false;
1238
1239 blocks = fp->ff_extents[0].blockCount +
1240 fp->ff_extents[1].blockCount +
1241 fp->ff_extents[2].blockCount +
1242 fp->ff_extents[3].blockCount +
1243 fp->ff_extents[4].blockCount +
1244 fp->ff_extents[5].blockCount +
1245 fp->ff_extents[6].blockCount +
1246 fp->ff_extents[7].blockCount;
1247
1248 return fp->ff_blocks > blocks;
1249 }
1250
1251 static __attribute__((pure))
1252 boolean_t hfs_is_frozen(struct hfsmount *hfsmp)
1253 {
1254 return (hfsmp->hfs_freeze_state == HFS_FROZEN
1255 || (hfsmp->hfs_freeze_state == HFS_FREEZING
1256 && current_thread() != hfsmp->hfs_freezing_thread));
1257 }
1258
1259 /*
1260 * Lock the HFS global journal lock
1261 */
1262 int
1263 hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype)
1264 {
1265 thread_t thread = current_thread();
1266
1267 if (hfsmp->hfs_global_lockowner == thread) {
1268 panic ("hfs_lock_global: locking against myself!");
1269 }
1270
1271 /*
1272 * This check isn't really necessary but this stops us taking
1273 * the mount lock in most cases. The essential check is below.
1274 */
1275 if (hfs_is_frozen(hfsmp)) {
1276 /*
1277 * Unfortunately, there is no easy way of getting a notification
1278 * for when a process is exiting and it's possible for the exiting
1279 * process to get blocked somewhere else. To catch this, we
1280 * periodically monitor the frozen process here and thaw if
1281 * we spot that it's exiting.
1282 */
1283 frozen:
1284 hfs_lock_mount(hfsmp);
1285
1286 struct timespec ts = { 0, 500 * NSEC_PER_MSEC };
1287
1288 while (hfs_is_frozen(hfsmp)) {
1289 if (hfsmp->hfs_freeze_state == HFS_FROZEN
1290 && proc_exiting(hfsmp->hfs_freezing_proc)) {
1291 hfs_thaw_locked(hfsmp);
1292 break;
1293 }
1294
1295 msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
1296 PWAIT, "hfs_lock_global (frozen)", &ts);
1297 }
1298 hfs_unlock_mount(hfsmp);
1299 }
1300
1301 /* HFS_SHARED_LOCK */
1302 if (locktype == HFS_SHARED_LOCK) {
1303 lck_rw_lock_shared (&hfsmp->hfs_global_lock);
1304 hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
1305 }
1306 /* HFS_EXCLUSIVE_LOCK */
1307 else {
1308 lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
1309 hfsmp->hfs_global_lockowner = thread;
1310 }
1311
1312 /*
1313 * We have to check if we're frozen again because of the time
1314 * between when we checked and when we took the global lock.
1315 */
1316 if (hfs_is_frozen(hfsmp)) {
1317 hfs_unlock_global(hfsmp);
1318 goto frozen;
1319 }
1320
1321 return 0;
1322 }
1323
1324
1325 /*
1326 * Unlock the HFS global journal lock
1327 */
1328 void
1329 hfs_unlock_global (struct hfsmount *hfsmp)
1330 {
1331 thread_t thread = current_thread();
1332
1333 /* HFS_LOCK_EXCLUSIVE */
1334 if (hfsmp->hfs_global_lockowner == thread) {
1335 hfsmp->hfs_global_lockowner = NULL;
1336 lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
1337 }
1338 /* HFS_LOCK_SHARED */
1339 else {
1340 lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
1341 }
1342 }
1343
1344 /*
1345 * Lock the HFS mount lock
1346 *
1347 * Note: this is a mutex, not a rw lock!
1348 */
1349 inline
1350 void hfs_lock_mount (struct hfsmount *hfsmp) {
1351 lck_mtx_lock (&(hfsmp->hfs_mutex));
1352 }
1353
1354 /*
1355 * Unlock the HFS mount lock
1356 *
1357 * Note: this is a mutex, not a rw lock!
1358 */
1359 inline
1360 void hfs_unlock_mount (struct hfsmount *hfsmp) {
1361 lck_mtx_unlock (&(hfsmp->hfs_mutex));
1362 }
1363
1364 /*
1365 * Lock HFS system file(s).
1366 */
1367 int
1368 hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype)
1369 {
1370 /*
1371 * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
1372 */
1373 if (flags & SFL_CATALOG) {
1374 #ifdef HFS_CHECK_LOCK_ORDER
1375 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
1376 panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
1377 }
1378 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1379 panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
1380 }
1381 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1382 panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
1383 }
1384 #endif /* HFS_CHECK_LOCK_ORDER */
1385
1386 if (hfsmp->hfs_catalog_cp) {
1387 (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT);
1388 /*
1389 * When the catalog file has overflow extents then
1390 * also acquire the extents b-tree lock if its not
1391 * already requested.
1392 */
1393 if (((flags & SFL_EXTENTS) == 0) &&
1394 (hfsmp->hfs_catalog_vp != NULL) &&
1395 (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
1396 flags |= SFL_EXTENTS;
1397 }
1398 } else {
1399 flags &= ~SFL_CATALOG;
1400 }
1401 }
1402
1403 if (flags & SFL_ATTRIBUTE) {
1404 #ifdef HFS_CHECK_LOCK_ORDER
1405 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1406 panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
1407 }
1408 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1409 panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
1410 }
1411 #endif /* HFS_CHECK_LOCK_ORDER */
1412
1413 if (hfsmp->hfs_attribute_cp) {
1414 (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT);
1415 /*
1416 * When the attribute file has overflow extents then
1417 * also acquire the extents b-tree lock if its not
1418 * already requested.
1419 */
1420 if (((flags & SFL_EXTENTS) == 0) &&
1421 (hfsmp->hfs_attribute_vp != NULL) &&
1422 (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) {
1423 flags |= SFL_EXTENTS;
1424 }
1425 } else {
1426 flags &= ~SFL_ATTRIBUTE;
1427 }
1428 }
1429
1430 if (flags & SFL_STARTUP) {
1431 #ifdef HFS_CHECK_LOCK_ORDER
1432 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1433 panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
1434 }
1435 #endif /* HFS_CHECK_LOCK_ORDER */
1436
1437 if (hfsmp->hfs_startup_cp) {
1438 (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT);
1439 /*
1440 * When the startup file has overflow extents then
1441 * also acquire the extents b-tree lock if its not
1442 * already requested.
1443 */
1444 if (((flags & SFL_EXTENTS) == 0) &&
1445 (hfsmp->hfs_startup_vp != NULL) &&
1446 (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) {
1447 flags |= SFL_EXTENTS;
1448 }
1449 } else {
1450 flags &= ~SFL_STARTUP;
1451 }
1452 }
1453
1454 /*
1455 * To prevent locks being taken in the wrong order, the extent lock
1456 * gets a bitmap lock as well.
1457 */
1458 if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
1459 if (hfsmp->hfs_allocation_cp) {
1460 (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1461 /*
1462 * The bitmap lock is also grabbed when only extent lock
1463 * was requested. Set the bitmap lock bit in the lock
1464 * flags which callers will use during unlock.
1465 */
1466 flags |= SFL_BITMAP;
1467 } else {
1468 flags &= ~SFL_BITMAP;
1469 }
1470 }
1471
1472 if (flags & SFL_EXTENTS) {
1473 /*
1474 * Since the extents btree lock is recursive we always
1475 * need exclusive access.
1476 */
1477 if (hfsmp->hfs_extents_cp) {
1478 (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1479
1480 if (hfsmp->hfs_mp->mnt_kern_flag & MNTK_SWAP_MOUNT) {
1481 /*
1482 * because we may need this lock on the pageout path (if a swapfile allocation
1483 * spills into the extents overflow tree), we will grant the holder of this
1484 * lock the privilege of dipping into the reserve free pool in order to prevent
1485 * a deadlock from occurring if we need those pageouts to complete before we
1486 * will make any new pages available on the free list... the deadlock can occur
1487 * if this thread needs to allocate memory while this lock is held
1488 */
1489 if (set_vm_privilege(TRUE) == FALSE) {
1490 /*
1491 * indicate that we need to drop vm_privilege
1492 * when we unlock
1493 */
1494 flags |= SFL_VM_PRIV;
1495 }
1496 }
1497 } else {
1498 flags &= ~SFL_EXTENTS;
1499 }
1500 }
1501
1502 return (flags);
1503 }
1504
1505 /*
1506 * unlock HFS system file(s).
1507 */
1508 void
1509 hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
1510 {
1511 struct timeval tv;
1512 u_int32_t lastfsync;
1513 int numOfLockedBuffs;
1514
1515 if (hfsmp->jnl == NULL) {
1516 microuptime(&tv);
1517 lastfsync = tv.tv_sec;
1518 }
1519 if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1520 hfs_unlock(hfsmp->hfs_startup_cp);
1521 }
1522 if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1523 if (hfsmp->jnl == NULL) {
1524 BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1525 numOfLockedBuffs = count_lock_queue();
1526 if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1527 ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1528 kMaxSecsForFsync))) {
1529 hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1530 }
1531 }
1532 hfs_unlock(hfsmp->hfs_attribute_cp);
1533 }
1534 if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
1535 if (hfsmp->jnl == NULL) {
1536 BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1537 numOfLockedBuffs = count_lock_queue();
1538 if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1539 ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1540 kMaxSecsForFsync))) {
1541 hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1542 }
1543 }
1544 hfs_unlock(hfsmp->hfs_catalog_cp);
1545 }
1546 if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
1547 hfs_unlock(hfsmp->hfs_allocation_cp);
1548 }
1549 if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
1550 if (hfsmp->jnl == NULL) {
1551 BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1552 numOfLockedBuffs = count_lock_queue();
1553 if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1554 ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1555 kMaxSecsForFsync))) {
1556 hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1557 }
1558 }
1559 hfs_unlock(hfsmp->hfs_extents_cp);
1560
1561 if (flags & SFL_VM_PRIV) {
1562 /*
1563 * revoke the vm_privilege we granted this thread
1564 * now that we have unlocked the overflow extents
1565 */
1566 set_vm_privilege(FALSE);
1567 }
1568 }
1569 }
1570
1571
1572 /*
1573 * RequireFileLock
1574 *
1575 * Check to see if a vnode is locked in the current context
1576 * This is to be used for debugging purposes only!!
1577 */
1578 #if HFS_DIAGNOSTIC
1579 void RequireFileLock(FileReference vp, int shareable)
1580 {
1581 int locked;
1582
1583 /* The extents btree and allocation bitmap are always exclusive. */
1584 if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1585 VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1586 shareable = 0;
1587 }
1588
1589 locked = VTOC(vp)->c_lockowner == current_thread();
1590
1591 if (!locked && !shareable) {
1592 switch (VTOC(vp)->c_fileid) {
1593 case kHFSExtentsFileID:
1594 panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1595 break;
1596 case kHFSCatalogFileID:
1597 panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1598 break;
1599 case kHFSAllocationFileID:
1600 /* The allocation file can hide behind the jornal lock. */
1601 if (VTOHFS(vp)->jnl == NULL)
1602 panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1603 break;
1604 case kHFSStartupFileID:
1605 panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1606 case kHFSAttributesFileID:
1607 panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1608 break;
1609 }
1610 }
1611 }
1612 #endif
1613
1614
1615 /*
1616 * There are three ways to qualify for ownership rights on an object:
1617 *
1618 * 1. (a) Your UID matches the cnode's UID.
1619 * (b) The object in question is owned by "unknown"
1620 * 2. (a) Permissions on the filesystem are being ignored and
1621 * your UID matches the replacement UID.
1622 * (b) Permissions on the filesystem are being ignored and
1623 * the replacement UID is "unknown".
1624 * 3. You are root.
1625 *
1626 */
1627 int
1628 hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1629 __unused struct proc *p, int invokesuperuserstatus)
1630 {
1631 if ((kauth_cred_getuid(cred) == cnode_uid) || /* [1a] */
1632 (cnode_uid == UNKNOWNUID) || /* [1b] */
1633 ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) && /* [2] */
1634 ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) || /* [2a] */
1635 (hfsmp->hfs_uid == UNKNOWNUID))) || /* [2b] */
1636 (invokesuperuserstatus && (suser(cred, 0) == 0))) { /* [3] */
1637 return (0);
1638 } else {
1639 return (EPERM);
1640 }
1641 }
1642
1643
1644 u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1645 u_int32_t blockSizeLimit,
1646 u_int32_t baseMultiple) {
1647 /*
1648 Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1649 specified limit but still an even multiple of the baseMultiple.
1650 */
1651 int baseBlockCount, blockCount;
1652 u_int32_t trialBlockSize;
1653
1654 if (allocationBlockSize % baseMultiple != 0) {
1655 /*
1656 Whoops: the allocation blocks aren't even multiples of the specified base:
1657 no amount of dividing them into even parts will be a multiple, either then!
1658 */
1659 return 512; /* Hope for the best */
1660 };
1661
1662 /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1663 from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1664 Even though the former (the result of the loop below) is the larger allocation
1665 block size, the latter is more efficient: */
1666 if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1667
1668 /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1669 baseBlockCount = allocationBlockSize / baseMultiple; /* Now guaranteed to be an even multiple */
1670
1671 for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1672 trialBlockSize = blockCount * baseMultiple;
1673 if (allocationBlockSize % trialBlockSize == 0) { /* An even multiple? */
1674 if ((trialBlockSize <= blockSizeLimit) &&
1675 (trialBlockSize % baseMultiple == 0)) {
1676 return trialBlockSize;
1677 };
1678 };
1679 };
1680
1681 /* Note: we should never get here, since blockCount = 1 should always work,
1682 but this is nice and safe and makes the compiler happy, too ... */
1683 return 512;
1684 }
1685
1686
1687 u_int32_t
1688 GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1689 struct cat_attr *fattr, struct cat_fork *forkinfo)
1690 {
1691 struct hfsmount * hfsmp;
1692 struct cat_desc jdesc;
1693 int lockflags;
1694 int error;
1695
1696 if (vcb->vcbSigWord != kHFSPlusSigWord)
1697 return (0);
1698
1699 hfsmp = VCBTOHFS(vcb);
1700
1701 memset(&jdesc, 0, sizeof(struct cat_desc));
1702 jdesc.cd_parentcnid = kRootDirID;
1703 jdesc.cd_nameptr = (const u_int8_t *)name;
1704 jdesc.cd_namelen = strlen(name);
1705
1706 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1707 error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL);
1708 hfs_systemfile_unlock(hfsmp, lockflags);
1709
1710 if (error == 0) {
1711 return (fattr->ca_fileid);
1712 } else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1713 return (0);
1714 }
1715
1716 return (0); /* XXX what callers expect on an error */
1717 }
1718
1719
1720 /*
1721 * On HFS Plus Volumes, there can be orphaned files or directories
1722 * These are files or directories that were unlinked while busy.
1723 * If the volume was not cleanly unmounted then some of these may
1724 * have persisted and need to be removed.
1725 */
1726 void
1727 hfs_remove_orphans(struct hfsmount * hfsmp)
1728 {
1729 struct BTreeIterator * iterator = NULL;
1730 struct FSBufferDescriptor btdata;
1731 struct HFSPlusCatalogFile filerec;
1732 struct HFSPlusCatalogKey * keyp;
1733 struct proc *p = current_proc();
1734 FCB *fcb;
1735 ExtendedVCB *vcb;
1736 char filename[32];
1737 char tempname[32];
1738 size_t namelen;
1739 cat_cookie_t cookie;
1740 int catlock = 0;
1741 int catreserve = 0;
1742 int started_tr = 0;
1743 int lockflags;
1744 int result;
1745 int orphaned_files = 0;
1746 int orphaned_dirs = 0;
1747
1748 bzero(&cookie, sizeof(cookie));
1749
1750 if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1751 return;
1752
1753 vcb = HFSTOVCB(hfsmp);
1754 fcb = VTOF(hfsmp->hfs_catalog_vp);
1755
1756 btdata.bufferAddress = &filerec;
1757 btdata.itemSize = sizeof(filerec);
1758 btdata.itemCount = 1;
1759
1760 MALLOC(iterator, struct BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1761 bzero(iterator, sizeof(*iterator));
1762
1763 /* Build a key to "temp" */
1764 keyp = (HFSPlusCatalogKey*)&iterator->key;
1765 keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1766 keyp->nodeName.length = 4; /* "temp" */
1767 keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1768 keyp->nodeName.unicode[0] = 't';
1769 keyp->nodeName.unicode[1] = 'e';
1770 keyp->nodeName.unicode[2] = 'm';
1771 keyp->nodeName.unicode[3] = 'p';
1772
1773 /*
1774 * Position the iterator just before the first real temp file/dir.
1775 */
1776 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1777 (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1778 hfs_systemfile_unlock(hfsmp, lockflags);
1779
1780 /* Visit all the temp files/dirs in the HFS+ private directory. */
1781 for (;;) {
1782 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1783 result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1784 hfs_systemfile_unlock(hfsmp, lockflags);
1785 if (result)
1786 break;
1787 if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1788 break;
1789
1790 (void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1791 (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1792
1793 (void) snprintf(tempname, sizeof(tempname), "%s%d",
1794 HFS_DELETE_PREFIX, filerec.fileID);
1795
1796 /*
1797 * Delete all files (and directories) named "tempxxx",
1798 * where xxx is the file's cnid in decimal.
1799 *
1800 */
1801 if (bcmp(tempname, filename, namelen) == 0) {
1802 struct filefork dfork;
1803 struct filefork rfork;
1804 struct cnode cnode;
1805 int mode = 0;
1806
1807 bzero(&dfork, sizeof(dfork));
1808 bzero(&rfork, sizeof(rfork));
1809 bzero(&cnode, sizeof(cnode));
1810
1811 /* Delete any attributes, ignore errors */
1812 (void) hfs_removeallattr(hfsmp, filerec.fileID);
1813
1814 if (hfs_start_transaction(hfsmp) != 0) {
1815 printf("hfs_remove_orphans: failed to start transaction\n");
1816 goto exit;
1817 }
1818 started_tr = 1;
1819
1820 /*
1821 * Reserve some space in the Catalog file.
1822 */
1823 if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1824 printf("hfs_remove_orphans: cat_preflight failed\n");
1825 goto exit;
1826 }
1827 catreserve = 1;
1828
1829 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1830 catlock = 1;
1831
1832 /* Build a fake cnode */
1833 cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1834 &dfork.ff_data, &rfork.ff_data);
1835 cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1836 cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
1837 cnode.c_desc.cd_namelen = namelen;
1838 cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
1839 cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
1840
1841 /* Position iterator at previous entry */
1842 if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
1843 NULL, NULL) != 0) {
1844 break;
1845 }
1846
1847 /* Truncate the file to zero (both forks) */
1848 if (dfork.ff_blocks > 0) {
1849 u_int64_t fsize;
1850
1851 dfork.ff_cp = &cnode;
1852 cnode.c_datafork = &dfork;
1853 cnode.c_rsrcfork = NULL;
1854 fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
1855 while (fsize > 0) {
1856 if (fsize > HFS_BIGFILE_SIZE) {
1857 fsize -= HFS_BIGFILE_SIZE;
1858 } else {
1859 fsize = 0;
1860 }
1861
1862 if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0,
1863 cnode.c_attr.ca_fileid, false) != 0) {
1864 printf("hfs: error truncating data fork!\n");
1865 break;
1866 }
1867
1868 //
1869 // if we're iteratively truncating this file down,
1870 // then end the transaction and start a new one so
1871 // that no one transaction gets too big.
1872 //
1873 if (fsize > 0 && started_tr) {
1874 /* Drop system file locks before starting
1875 * another transaction to preserve lock order.
1876 */
1877 hfs_systemfile_unlock(hfsmp, lockflags);
1878 catlock = 0;
1879 hfs_end_transaction(hfsmp);
1880
1881 if (hfs_start_transaction(hfsmp) != 0) {
1882 started_tr = 0;
1883 break;
1884 }
1885 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1886 catlock = 1;
1887 }
1888 }
1889 }
1890
1891 if (rfork.ff_blocks > 0) {
1892 rfork.ff_cp = &cnode;
1893 cnode.c_datafork = NULL;
1894 cnode.c_rsrcfork = &rfork;
1895 if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
1896 printf("hfs: error truncating rsrc fork!\n");
1897 break;
1898 }
1899 }
1900
1901 /* Remove the file or folder record from the Catalog */
1902 if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
1903 printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
1904 hfs_systemfile_unlock(hfsmp, lockflags);
1905 catlock = 0;
1906 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1907 break;
1908 }
1909
1910 mode = cnode.c_attr.ca_mode & S_IFMT;
1911
1912 if (mode == S_IFDIR) {
1913 orphaned_dirs++;
1914 }
1915 else {
1916 orphaned_files++;
1917 }
1918
1919 /* Update parent and volume counts */
1920 hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
1921 if (mode == S_IFDIR) {
1922 DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
1923 }
1924
1925 (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
1926 &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
1927
1928 /* Drop locks and end the transaction */
1929 hfs_systemfile_unlock(hfsmp, lockflags);
1930 cat_postflight(hfsmp, &cookie, p);
1931 catlock = catreserve = 0;
1932
1933 /*
1934 Now that Catalog is unlocked, update the volume info, making
1935 sure to differentiate between files and directories
1936 */
1937 if (mode == S_IFDIR) {
1938 hfs_volupdate(hfsmp, VOL_RMDIR, 0);
1939 }
1940 else{
1941 hfs_volupdate(hfsmp, VOL_RMFILE, 0);
1942 }
1943
1944 if (started_tr) {
1945 hfs_end_transaction(hfsmp);
1946 started_tr = 0;
1947 }
1948
1949 } /* end if */
1950 } /* end for */
1951 if (orphaned_files > 0 || orphaned_dirs > 0)
1952 printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
1953 exit:
1954 if (catlock) {
1955 hfs_systemfile_unlock(hfsmp, lockflags);
1956 }
1957 if (catreserve) {
1958 cat_postflight(hfsmp, &cookie, p);
1959 }
1960 if (started_tr) {
1961 hfs_end_transaction(hfsmp);
1962 }
1963
1964 FREE(iterator, M_TEMP);
1965 hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
1966 }
1967
1968
1969 /*
1970 * This will return the correct logical block size for a given vnode.
1971 * For most files, it is the allocation block size, for meta data like
1972 * BTrees, this is kept as part of the BTree private nodeSize
1973 */
1974 u_int32_t
1975 GetLogicalBlockSize(struct vnode *vp)
1976 {
1977 u_int32_t logBlockSize;
1978
1979 DBG_ASSERT(vp != NULL);
1980
1981 /* start with default */
1982 logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
1983
1984 if (vnode_issystem(vp)) {
1985 if (VTOF(vp)->fcbBTCBPtr != NULL) {
1986 BTreeInfoRec bTreeInfo;
1987
1988 /*
1989 * We do not lock the BTrees, because if we are getting block..then the tree
1990 * should be locked in the first place.
1991 * We just want the nodeSize wich will NEVER change..so even if the world
1992 * is changing..the nodeSize should remain the same. Which argues why lock
1993 * it in the first place??
1994 */
1995
1996 (void) BTGetInformation (VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
1997
1998 logBlockSize = bTreeInfo.nodeSize;
1999
2000 } else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
2001 logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
2002 }
2003 }
2004
2005 DBG_ASSERT(logBlockSize > 0);
2006
2007 return logBlockSize;
2008 }
2009
2010 #if HFS_SPARSE_DEV
2011 static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks)
2012 {
2013 struct vfsstatfs *vfsp; /* 272 bytes */
2014 uint64_t vfreeblks;
2015 struct timeval now;
2016
2017 hfs_lock_mount(hfsmp);
2018
2019 vnode_t backing_vp = hfsmp->hfs_backingfs_rootvp;
2020 if (!backing_vp) {
2021 hfs_unlock_mount(hfsmp);
2022 return false;
2023 }
2024
2025 // usecount is not enough; we need iocount
2026 if (vnode_get(backing_vp)) {
2027 hfs_unlock_mount(hfsmp);
2028 *pfree_blks = 0;
2029 return true;
2030 }
2031
2032 uint32_t loanedblks = hfsmp->loanedBlocks;
2033 uint32_t bandblks = hfsmp->hfs_sparsebandblks;
2034 uint64_t maxblks = hfsmp->hfs_backingfs_maxblocks;
2035
2036 hfs_unlock_mount(hfsmp);
2037
2038 mount_t backingfs_mp = vnode_mount(backing_vp);
2039
2040 microtime(&now);
2041 if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
2042 vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
2043 hfsmp->hfs_last_backingstatfs = now.tv_sec;
2044 }
2045
2046 if (!(vfsp = vfs_statfs(backingfs_mp))) {
2047 vnode_put(backing_vp);
2048 return false;
2049 }
2050
2051 vfreeblks = vfsp->f_bavail;
2052 /* Normalize block count if needed. */
2053 if (vfsp->f_bsize != hfsmp->blockSize)
2054 vfreeblks = vfreeblks * vfsp->f_bsize / hfsmp->blockSize;
2055 if (vfreeblks > bandblks)
2056 vfreeblks -= bandblks;
2057 else
2058 vfreeblks = 0;
2059
2060 /*
2061 * Take into account any delayed allocations. It is not
2062 * certain what the original reason for the "2 *" is. Most
2063 * likely it is to allow for additional requirements in the
2064 * host file system and metadata required by disk images. The
2065 * number of loaned blocks is likely to be small and we will
2066 * stop using them as we get close to the limit.
2067 */
2068 loanedblks = 2 * loanedblks;
2069 if (vfreeblks > loanedblks)
2070 vfreeblks -= loanedblks;
2071 else
2072 vfreeblks = 0;
2073
2074 if (maxblks)
2075 vfreeblks = MIN(vfreeblks, maxblks);
2076
2077 vnode_put(backing_vp);
2078
2079 *pfree_blks = vfreeblks;
2080
2081 return true;
2082 }
2083 #endif
2084
2085 u_int32_t
2086 hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
2087 {
2088 u_int32_t freeblks;
2089 u_int32_t rsrvblks;
2090 u_int32_t loanblks;
2091
2092 /*
2093 * We don't bother taking the mount lock
2094 * to look at these values since the values
2095 * themselves are each updated atomically
2096 * on aligned addresses.
2097 */
2098 freeblks = hfsmp->freeBlocks;
2099 rsrvblks = hfsmp->reserveBlocks;
2100 loanblks = hfsmp->loanedBlocks;
2101 if (wantreserve) {
2102 if (freeblks > rsrvblks)
2103 freeblks -= rsrvblks;
2104 else
2105 freeblks = 0;
2106 }
2107 if (freeblks > loanblks)
2108 freeblks -= loanblks;
2109 else
2110 freeblks = 0;
2111
2112 #if HFS_SPARSE_DEV
2113 /*
2114 * When the underlying device is sparse, check the
2115 * available space on the backing store volume.
2116 */
2117 uint64_t vfreeblks;
2118 if (hfs_get_backing_free_blks(hfsmp, &vfreeblks))
2119 freeblks = MIN(freeblks, vfreeblks);
2120 #endif /* HFS_SPARSE_DEV */
2121
2122 if (hfsmp->hfs_flags & HFS_CS) {
2123 uint64_t cs_free_bytes;
2124 uint64_t cs_free_blks;
2125 if (VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSGETFREEBYTES,
2126 (caddr_t)&cs_free_bytes, 0, vfs_context_kernel()) == 0) {
2127 cs_free_blks = cs_free_bytes / hfsmp->blockSize;
2128 if (cs_free_blks > loanblks)
2129 cs_free_blks -= loanblks;
2130 else
2131 cs_free_blks = 0;
2132 freeblks = MIN(cs_free_blks, freeblks);
2133 }
2134 }
2135
2136 return (freeblks);
2137 }
2138
2139 /*
2140 * Map HFS Common errors (negative) to BSD error codes (positive).
2141 * Positive errors (ie BSD errors) are passed through unchanged.
2142 */
2143 short MacToVFSError(OSErr err)
2144 {
2145 if (err >= 0)
2146 return err;
2147
2148 /* BSD/VFS internal errnos */
2149 switch (err) {
2150 case ERESERVEDNAME: /* -8 */
2151 return err;
2152 }
2153
2154 switch (err) {
2155 case dskFulErr: /* -34 */
2156 case btNoSpaceAvail: /* -32733 */
2157 return ENOSPC;
2158 case fxOvFlErr: /* -32750 */
2159 return EOVERFLOW;
2160
2161 case btBadNode: /* -32731 */
2162 return EIO;
2163
2164 case memFullErr: /* -108 */
2165 return ENOMEM; /* +12 */
2166
2167 case cmExists: /* -32718 */
2168 case btExists: /* -32734 */
2169 return EEXIST; /* +17 */
2170
2171 case cmNotFound: /* -32719 */
2172 case btNotFound: /* -32735 */
2173 return ENOENT; /* 28 */
2174
2175 case cmNotEmpty: /* -32717 */
2176 return ENOTEMPTY; /* 66 */
2177
2178 case cmFThdDirErr: /* -32714 */
2179 return EISDIR; /* 21 */
2180
2181 case fxRangeErr: /* -32751 */
2182 return ERANGE;
2183
2184 case bdNamErr: /* -37 */
2185 return ENAMETOOLONG; /* 63 */
2186
2187 case paramErr: /* -50 */
2188 case fileBoundsErr: /* -1309 */
2189 return EINVAL; /* +22 */
2190
2191 case fsBTBadNodeSize:
2192 return ENXIO;
2193
2194 default:
2195 return EIO; /* +5 */
2196 }
2197 }
2198
2199
2200 /*
2201 * Find the current thread's directory hint for a given index.
2202 *
2203 * Requires an exclusive lock on directory cnode.
2204 *
2205 * Use detach if the cnode lock must be dropped while the hint is still active.
2206 */
2207 __private_extern__
2208 directoryhint_t *
2209 hfs_getdirhint(struct cnode *dcp, int index, int detach)
2210 {
2211 struct timeval tv;
2212 directoryhint_t *hint;
2213 boolean_t need_remove, need_init;
2214 const u_int8_t * name;
2215
2216 microuptime(&tv);
2217
2218 /*
2219 * Look for an existing hint first. If not found, create a new one (when
2220 * the list is not full) or recycle the oldest hint. Since new hints are
2221 * always added to the head of the list, the last hint is always the
2222 * oldest.
2223 */
2224 TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2225 if (hint->dh_index == index)
2226 break;
2227 }
2228 if (hint != NULL) { /* found an existing hint */
2229 need_init = false;
2230 need_remove = true;
2231 } else { /* cannot find an existing hint */
2232 need_init = true;
2233 if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
2234 /* Create a default directory hint */
2235 MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK);
2236 ++dcp->c_dirhintcnt;
2237 need_remove = false;
2238 } else { /* recycle the last (i.e., the oldest) hint */
2239 hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
2240 if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
2241 (name = hint->dh_desc.cd_nameptr)) {
2242 hint->dh_desc.cd_nameptr = NULL;
2243 hint->dh_desc.cd_namelen = 0;
2244 hint->dh_desc.cd_flags &= ~CD_HASBUF;
2245 vfs_removename((const char *)name);
2246 }
2247 need_remove = true;
2248 }
2249 }
2250
2251 if (need_remove)
2252 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2253
2254 if (detach)
2255 --dcp->c_dirhintcnt;
2256 else
2257 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2258
2259 if (need_init) {
2260 hint->dh_index = index;
2261 hint->dh_desc.cd_flags = 0;
2262 hint->dh_desc.cd_encoding = 0;
2263 hint->dh_desc.cd_namelen = 0;
2264 hint->dh_desc.cd_nameptr = NULL;
2265 hint->dh_desc.cd_parentcnid = dcp->c_fileid;
2266 hint->dh_desc.cd_hint = dcp->c_childhint;
2267 hint->dh_desc.cd_cnid = 0;
2268 }
2269 hint->dh_time = tv.tv_sec;
2270 return (hint);
2271 }
2272
2273 /*
2274 * Release a single directory hint.
2275 *
2276 * Requires an exclusive lock on directory cnode.
2277 */
2278 __private_extern__
2279 void
2280 hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
2281 {
2282 const u_int8_t * name;
2283 directoryhint_t *hint;
2284
2285 /* Check if item is on list (could be detached) */
2286 TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2287 if (hint == relhint) {
2288 TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
2289 --dcp->c_dirhintcnt;
2290 break;
2291 }
2292 }
2293 name = relhint->dh_desc.cd_nameptr;
2294 if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2295 relhint->dh_desc.cd_nameptr = NULL;
2296 relhint->dh_desc.cd_namelen = 0;
2297 relhint->dh_desc.cd_flags &= ~CD_HASBUF;
2298 vfs_removename((const char *)name);
2299 }
2300 FREE_ZONE(relhint, sizeof(directoryhint_t), M_HFSDIRHINT);
2301 }
2302
2303 /*
2304 * Release directory hints for given directory
2305 *
2306 * Requires an exclusive lock on directory cnode.
2307 */
2308 __private_extern__
2309 void
2310 hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
2311 {
2312 struct timeval tv;
2313 directoryhint_t *hint, *prev;
2314 const u_int8_t * name;
2315
2316 if (stale_hints_only)
2317 microuptime(&tv);
2318
2319 /* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
2320 for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
2321 if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
2322 break; /* stop here if this entry is too new */
2323 name = hint->dh_desc.cd_nameptr;
2324 if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2325 hint->dh_desc.cd_nameptr = NULL;
2326 hint->dh_desc.cd_namelen = 0;
2327 hint->dh_desc.cd_flags &= ~CD_HASBUF;
2328 vfs_removename((const char *)name);
2329 }
2330 prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
2331 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2332 FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT);
2333 --dcp->c_dirhintcnt;
2334 }
2335 }
2336
2337 /*
2338 * Insert a detached directory hint back into the list of dirhints.
2339 *
2340 * Requires an exclusive lock on directory cnode.
2341 */
2342 __private_extern__
2343 void
2344 hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
2345 {
2346 directoryhint_t *test;
2347
2348 TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
2349 if (test == hint)
2350 panic("hfs_insertdirhint: hint %p already on list!", hint);
2351 }
2352
2353 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2354 ++dcp->c_dirhintcnt;
2355 }
2356
2357 /*
2358 * Perform a case-insensitive compare of two UTF-8 filenames.
2359 *
2360 * Returns 0 if the strings match.
2361 */
2362 __private_extern__
2363 int
2364 hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
2365 {
2366 u_int16_t *ustr1, *ustr2;
2367 size_t ulen1, ulen2;
2368 size_t maxbytes;
2369 int cmp = -1;
2370
2371 if (len1 != len2)
2372 return (cmp);
2373
2374 maxbytes = kHFSPlusMaxFileNameChars << 1;
2375 MALLOC(ustr1, u_int16_t *, maxbytes << 1, M_TEMP, M_WAITOK);
2376 ustr2 = ustr1 + (maxbytes >> 1);
2377
2378 if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
2379 goto out;
2380 if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
2381 goto out;
2382
2383 cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
2384 out:
2385 FREE(ustr1, M_TEMP);
2386 return (cmp);
2387 }
2388
2389
2390 typedef struct jopen_cb_info {
2391 off_t jsize;
2392 char *desired_uuid;
2393 struct vnode *jvp;
2394 size_t blksize;
2395 int need_clean;
2396 int need_init;
2397 } jopen_cb_info;
2398
2399 static int
2400 journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
2401 {
2402 struct nameidata nd;
2403 jopen_cb_info *ji = (jopen_cb_info *)arg;
2404 char bsd_name[256];
2405 int error;
2406
2407 strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
2408 strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
2409
2410 if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) {
2411 return 1; // keep iterating
2412 }
2413
2414 // if we're here, either the desired uuid matched or there was no
2415 // desired uuid so let's try to open the device for writing and
2416 // see if it works. if it does, we'll use it.
2417
2418 NDINIT(&nd, LOOKUP, OP_LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
2419 if ((error = namei(&nd))) {
2420 printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
2421 return 1; // keep iterating
2422 }
2423
2424 ji->jvp = nd.ni_vp;
2425 nameidone(&nd);
2426
2427 if (ji->jvp == NULL) {
2428 printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error);
2429 } else {
2430 error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2431 if (error == 0) {
2432 // if the journal is dirty and we didn't specify a desired
2433 // journal device uuid, then do not use the journal. but
2434 // if the journal is just invalid (e.g. it hasn't been
2435 // initialized) then just set the need_init flag.
2436 if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2437 error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize);
2438 if (error == EBUSY) {
2439 VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2440 vnode_put(ji->jvp);
2441 ji->jvp = NULL;
2442 return 1; // keep iterating
2443 } else if (error == EINVAL) {
2444 ji->need_init = 1;
2445 }
2446 }
2447
2448 if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2449 strlcpy(ji->desired_uuid, uuid_str, 128);
2450 }
2451 vnode_setmountedon(ji->jvp);
2452 return 0; // stop iterating
2453 } else {
2454 vnode_put(ji->jvp);
2455 ji->jvp = NULL;
2456 }
2457 }
2458
2459 return 1; // keep iterating
2460 }
2461
2462 extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg);
2463 kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
2464
2465
2466 static vnode_t
2467 open_journal_dev(const char *vol_device,
2468 int need_clean,
2469 char *uuid_str,
2470 char *machine_serial_num,
2471 off_t jsize,
2472 size_t blksize,
2473 int *need_init)
2474 {
2475 int retry_counter=0;
2476 jopen_cb_info ji;
2477
2478 ji.jsize = jsize;
2479 ji.desired_uuid = uuid_str;
2480 ji.jvp = NULL;
2481 ji.blksize = blksize;
2482 ji.need_clean = need_clean;
2483 ji.need_init = 0;
2484
2485 // if (uuid_str[0] == '\0') {
2486 // printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
2487 // } else {
2488 // printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
2489 // }
2490 while (ji.jvp == NULL && retry_counter++ < 4) {
2491 if (retry_counter > 1) {
2492 if (uuid_str[0]) {
2493 printf("hfs: open_journal_dev: uuid %s not found. waiting 10sec.\n", uuid_str);
2494 } else {
2495 printf("hfs: open_journal_dev: no available external journal partition found. waiting 10sec.\n");
2496 }
2497 delay_for_interval(10* 1000000, NSEC_PER_USEC); // wait for ten seconds and then try again
2498 }
2499
2500 IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji);
2501 }
2502
2503 if (ji.jvp == NULL) {
2504 printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
2505 vol_device, uuid_str, machine_serial_num);
2506 }
2507
2508 *need_init = ji.need_init;
2509
2510 return ji.jvp;
2511 }
2512
2513
2514 int
2515 hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
2516 void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
2517 HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
2518 {
2519 JournalInfoBlock *jibp;
2520 struct buf *jinfo_bp, *bp;
2521 int sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2522 int retval, write_jibp = 0;
2523 uint32_t blksize = hfsmp->hfs_logical_block_size;
2524 struct vnode *devvp;
2525 struct hfs_mount_args *args = _args;
2526 u_int32_t jib_flags;
2527 u_int64_t jib_offset;
2528 u_int64_t jib_size;
2529 const char *dev_name;
2530
2531 devvp = hfsmp->hfs_devvp;
2532 dev_name = vnode_getname_printable(devvp);
2533
2534 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2535 arg_flags = args->journal_flags;
2536 arg_tbufsz = args->journal_tbuffer_size;
2537 }
2538
2539 sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
2540
2541 jinfo_bp = NULL;
2542 retval = (int)buf_meta_bread(devvp,
2543 (daddr64_t)((embeddedOffset/blksize) +
2544 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2545 hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
2546 if (retval) {
2547 if (jinfo_bp) {
2548 buf_brelse(jinfo_bp);
2549 }
2550 goto cleanup_dev_name;
2551 }
2552
2553 jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2554 jib_flags = SWAP_BE32(jibp->flags);
2555 jib_size = SWAP_BE64(jibp->size);
2556
2557 if (jib_flags & kJIJournalInFSMask) {
2558 hfsmp->jvp = hfsmp->hfs_devvp;
2559 jib_offset = SWAP_BE64(jibp->offset);
2560 } else {
2561 int need_init=0;
2562
2563 // if the volume was unmounted cleanly then we'll pick any
2564 // available external journal partition
2565 //
2566 if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
2567 *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2568 }
2569
2570 hfsmp->jvp = open_journal_dev(dev_name,
2571 !(jib_flags & kJIJournalNeedInitMask),
2572 (char *)&jibp->ext_jnl_uuid[0],
2573 (char *)&jibp->machine_serial_num[0],
2574 jib_size,
2575 hfsmp->hfs_logical_block_size,
2576 &need_init);
2577 if (hfsmp->jvp == NULL) {
2578 buf_brelse(jinfo_bp);
2579 retval = EROFS;
2580 goto cleanup_dev_name;
2581 } else {
2582 if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2583 strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2584 }
2585 }
2586
2587 jib_offset = 0;
2588 write_jibp = 1;
2589 if (need_init) {
2590 jib_flags |= kJIJournalNeedInitMask;
2591 }
2592 }
2593
2594 // save this off for the hack-y check in hfs_remove()
2595 hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2596 hfsmp->jnl_size = jib_size;
2597
2598 if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2599 // if the file system is read-only, check if the journal is empty.
2600 // if it is, then we can allow the mount. otherwise we have to
2601 // return failure.
2602 retval = journal_is_clean(hfsmp->jvp,
2603 jib_offset + embeddedOffset,
2604 jib_size,
2605 devvp,
2606 hfsmp->hfs_logical_block_size);
2607
2608 hfsmp->jnl = NULL;
2609
2610 buf_brelse(jinfo_bp);
2611
2612 if (retval) {
2613 const char *name = vnode_getname_printable(devvp);
2614 printf("hfs: early journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n",
2615 name);
2616 vnode_putname_printable(name);
2617 }
2618
2619 goto cleanup_dev_name;
2620 }
2621
2622 if (jib_flags & kJIJournalNeedInitMask) {
2623 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2624 jib_offset + embeddedOffset, jib_size);
2625 hfsmp->jnl = journal_create(hfsmp->jvp,
2626 jib_offset + embeddedOffset,
2627 jib_size,
2628 devvp,
2629 blksize,
2630 arg_flags,
2631 arg_tbufsz,
2632 hfs_sync_metadata, hfsmp->hfs_mp,
2633 hfsmp->hfs_mp);
2634 if (hfsmp->jnl)
2635 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2636
2637 // no need to start a transaction here... if this were to fail
2638 // we'd just re-init it on the next mount.
2639 jib_flags &= ~kJIJournalNeedInitMask;
2640 jibp->flags = SWAP_BE32(jib_flags);
2641 buf_bwrite(jinfo_bp);
2642 jinfo_bp = NULL;
2643 jibp = NULL;
2644 } else {
2645 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2646 // jib_offset + embeddedOffset,
2647 // jib_size, SWAP_BE32(vhp->blockSize));
2648
2649 hfsmp->jnl = journal_open(hfsmp->jvp,
2650 jib_offset + embeddedOffset,
2651 jib_size,
2652 devvp,
2653 blksize,
2654 arg_flags,
2655 arg_tbufsz,
2656 hfs_sync_metadata, hfsmp->hfs_mp,
2657 hfsmp->hfs_mp);
2658 if (hfsmp->jnl)
2659 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2660
2661 if (write_jibp) {
2662 buf_bwrite(jinfo_bp);
2663 } else {
2664 buf_brelse(jinfo_bp);
2665 }
2666 jinfo_bp = NULL;
2667 jibp = NULL;
2668
2669 if (hfsmp->jnl && mdbp) {
2670 // reload the mdb because it could have changed
2671 // if the journal had to be replayed.
2672 if (mdb_offset == 0) {
2673 mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2674 }
2675 bp = NULL;
2676 retval = (int)buf_meta_bread(devvp,
2677 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2678 hfsmp->hfs_physical_block_size, cred, &bp);
2679 if (retval) {
2680 if (bp) {
2681 buf_brelse(bp);
2682 }
2683 printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2684 retval);
2685 goto cleanup_dev_name;
2686 }
2687 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2688 buf_brelse(bp);
2689 bp = NULL;
2690 }
2691 }
2692
2693 // if we expected the journal to be there and we couldn't
2694 // create it or open it then we have to bail out.
2695 if (hfsmp->jnl == NULL) {
2696 printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2697 retval = EINVAL;
2698 goto cleanup_dev_name;
2699 }
2700
2701 retval = 0;
2702
2703 cleanup_dev_name:
2704 vnode_putname_printable(dev_name);
2705 return retval;
2706 }
2707
2708
2709 //
2710 // This function will go and re-locate the .journal_info_block and
2711 // the .journal files in case they moved (which can happen if you
2712 // run Norton SpeedDisk). If we fail to find either file we just
2713 // disable journaling for this volume and return. We turn off the
2714 // journaling bit in the vcb and assume it will get written to disk
2715 // later (if it doesn't on the next mount we'd do the same thing
2716 // again which is harmless). If we disable journaling we don't
2717 // return an error so that the volume is still mountable.
2718 //
2719 // If the info we find for the .journal_info_block and .journal files
2720 // isn't what we had stored, we re-set our cached info and proceed
2721 // with opening the journal normally.
2722 //
2723 static int
2724 hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2725 {
2726 JournalInfoBlock *jibp;
2727 struct buf *jinfo_bp;
2728 int sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2729 int retval, write_jibp = 0, recreate_journal = 0;
2730 struct vnode *devvp;
2731 struct cat_attr jib_attr, jattr;
2732 struct cat_fork jib_fork, jfork;
2733 ExtendedVCB *vcb;
2734 u_int32_t fid;
2735 struct hfs_mount_args *args = _args;
2736 u_int32_t jib_flags;
2737 u_int64_t jib_offset;
2738 u_int64_t jib_size;
2739
2740 devvp = hfsmp->hfs_devvp;
2741 vcb = HFSTOVCB(hfsmp);
2742
2743 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2744 if (args->journal_disable) {
2745 return 0;
2746 }
2747
2748 arg_flags = args->journal_flags;
2749 arg_tbufsz = args->journal_tbuffer_size;
2750 }
2751
2752 fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2753 if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2754 printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2755 jib_fork.cf_extents[0].startBlock);
2756 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2757 return 0;
2758 }
2759 hfsmp->hfs_jnlinfoblkid = fid;
2760
2761 // make sure the journal_info_block begins where we think it should.
2762 if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2763 printf("hfs: The journal_info_block moved (was: %d; is: %d). Fixing up\n",
2764 SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2765
2766 vcb->vcbJinfoBlock = jib_fork.cf_extents[0].startBlock;
2767 vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2768 recreate_journal = 1;
2769 }
2770
2771
2772 sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2773 jinfo_bp = NULL;
2774 retval = (int)buf_meta_bread(devvp,
2775 (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2776 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2777 hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2778 if (retval) {
2779 if (jinfo_bp) {
2780 buf_brelse(jinfo_bp);
2781 }
2782 printf("hfs: can't read journal info block. disabling journaling.\n");
2783 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2784 return 0;
2785 }
2786
2787 jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2788 jib_flags = SWAP_BE32(jibp->flags);
2789 jib_offset = SWAP_BE64(jibp->offset);
2790 jib_size = SWAP_BE64(jibp->size);
2791
2792 fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2793 if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2794 printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2795 jfork.cf_extents[0].startBlock);
2796 buf_brelse(jinfo_bp);
2797 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2798 return 0;
2799 }
2800 hfsmp->hfs_jnlfileid = fid;
2801
2802 // make sure the journal file begins where we think it should.
2803 if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2804 printf("hfs: The journal file moved (was: %lld; is: %d). Fixing up\n",
2805 (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2806
2807 jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2808 write_jibp = 1;
2809 recreate_journal = 1;
2810 }
2811
2812 // check the size of the journal file.
2813 if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2814 printf("hfs: The journal file changed size! (was %lld; is %lld). Fixing up.\n",
2815 jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2816
2817 jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2818 write_jibp = 1;
2819 recreate_journal = 1;
2820 }
2821
2822 if (jib_flags & kJIJournalInFSMask) {
2823 hfsmp->jvp = hfsmp->hfs_devvp;
2824 jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
2825 } else {
2826 const char *dev_name;
2827 int need_init = 0;
2828
2829 dev_name = vnode_getname_printable(devvp);
2830
2831 // since the journal is empty, just use any available external journal
2832 *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2833
2834 // this fills in the uuid of the device we actually get
2835 hfsmp->jvp = open_journal_dev(dev_name,
2836 !(jib_flags & kJIJournalNeedInitMask),
2837 (char *)&jibp->ext_jnl_uuid[0],
2838 (char *)&jibp->machine_serial_num[0],
2839 jib_size,
2840 hfsmp->hfs_logical_block_size,
2841 &need_init);
2842 if (hfsmp->jvp == NULL) {
2843 buf_brelse(jinfo_bp);
2844 vnode_putname_printable(dev_name);
2845 return EROFS;
2846 } else {
2847 if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2848 strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
2849 }
2850 }
2851 jib_offset = 0;
2852 recreate_journal = 1;
2853 write_jibp = 1;
2854 if (need_init) {
2855 jib_flags |= kJIJournalNeedInitMask;
2856 }
2857 vnode_putname_printable(dev_name);
2858 }
2859
2860 // save this off for the hack-y check in hfs_remove()
2861 hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2862 hfsmp->jnl_size = jib_size;
2863
2864 if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2865 // if the file system is read-only, check if the journal is empty.
2866 // if it is, then we can allow the mount. otherwise we have to
2867 // return failure.
2868 retval = journal_is_clean(hfsmp->jvp,
2869 jib_offset,
2870 jib_size,
2871 devvp,
2872 hfsmp->hfs_logical_block_size);
2873
2874 hfsmp->jnl = NULL;
2875
2876 buf_brelse(jinfo_bp);
2877
2878 if (retval) {
2879 const char *name = vnode_getname_printable(devvp);
2880 printf("hfs: late journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n",
2881 name);
2882 vnode_putname_printable(name);
2883 }
2884
2885 return retval;
2886 }
2887
2888 if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
2889 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2890 jib_offset, jib_size);
2891 hfsmp->jnl = journal_create(hfsmp->jvp,
2892 jib_offset,
2893 jib_size,
2894 devvp,
2895 hfsmp->hfs_logical_block_size,
2896 arg_flags,
2897 arg_tbufsz,
2898 hfs_sync_metadata, hfsmp->hfs_mp,
2899 hfsmp->hfs_mp);
2900 if (hfsmp->jnl)
2901 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2902
2903 // no need to start a transaction here... if this were to fail
2904 // we'd just re-init it on the next mount.
2905 jib_flags &= ~kJIJournalNeedInitMask;
2906 write_jibp = 1;
2907
2908 } else {
2909 //
2910 // if we weren't the last person to mount this volume
2911 // then we need to throw away the journal because it
2912 // is likely that someone else mucked with the disk.
2913 // if the journal is empty this is no big deal. if the
2914 // disk is dirty this prevents us from replaying the
2915 // journal over top of changes that someone else made.
2916 //
2917 arg_flags |= JOURNAL_RESET;
2918
2919 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2920 // jib_offset,
2921 // jib_size, SWAP_BE32(vhp->blockSize));
2922
2923 hfsmp->jnl = journal_open(hfsmp->jvp,
2924 jib_offset,
2925 jib_size,
2926 devvp,
2927 hfsmp->hfs_logical_block_size,
2928 arg_flags,
2929 arg_tbufsz,
2930 hfs_sync_metadata, hfsmp->hfs_mp,
2931 hfsmp->hfs_mp);
2932 if (hfsmp->jnl)
2933 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2934 }
2935
2936
2937 if (write_jibp) {
2938 jibp->flags = SWAP_BE32(jib_flags);
2939 jibp->offset = SWAP_BE64(jib_offset);
2940 jibp->size = SWAP_BE64(jib_size);
2941
2942 buf_bwrite(jinfo_bp);
2943 } else {
2944 buf_brelse(jinfo_bp);
2945 }
2946 jinfo_bp = NULL;
2947 jibp = NULL;
2948
2949 // if we expected the journal to be there and we couldn't
2950 // create it or open it then we have to bail out.
2951 if (hfsmp->jnl == NULL) {
2952 printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
2953 return EINVAL;
2954 }
2955
2956 return 0;
2957 }
2958
2959 /*
2960 * Calculate the allocation zone for metadata.
2961 *
2962 * This zone includes the following:
2963 * Allocation Bitmap file
2964 * Overflow Extents file
2965 * Journal file
2966 * Quota files
2967 * Clustered Hot files
2968 * Catalog file
2969 *
2970 * METADATA ALLOCATION ZONE
2971 * ____________________________________________________________________________
2972 * | | | | | | |
2973 * | BM | JF | OEF | CATALOG |---> | HOT FILES |
2974 * |____|____|_____|_______________|______________________________|___________|
2975 *
2976 * <------------------------------- N * 128 MB ------------------------------->
2977 *
2978 */
2979 #define GIGABYTE (u_int64_t)(1024*1024*1024)
2980
2981 #define OVERFLOW_DEFAULT_SIZE (4*1024*1024)
2982 #define OVERFLOW_MAXIMUM_SIZE (128*1024*1024)
2983 #define JOURNAL_DEFAULT_SIZE (8*1024*1024)
2984 #define JOURNAL_MAXIMUM_SIZE (512*1024*1024)
2985 #define HOTBAND_MINIMUM_SIZE (10*1024*1024)
2986 #define HOTBAND_MAXIMUM_SIZE (512*1024*1024)
2987
2988 /* Initialize the metadata zone.
2989 *
2990 * If the size of the volume is less than the minimum size for
2991 * metadata zone, metadata zone is disabled.
2992 *
2993 * If disable is true, disable metadata zone unconditionally.
2994 */
2995 void
2996 hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
2997 {
2998 ExtendedVCB *vcb;
2999 u_int64_t fs_size;
3000 u_int64_t zonesize;
3001 u_int64_t temp;
3002 u_int64_t filesize;
3003 u_int32_t blk;
3004 int items, really_do_it=1;
3005
3006 vcb = HFSTOVCB(hfsmp);
3007 fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
3008
3009 /*
3010 * For volumes less than 10 GB, don't bother.
3011 */
3012 if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
3013 really_do_it = 0;
3014 }
3015
3016 /*
3017 * Skip non-journaled volumes as well.
3018 */
3019 if (hfsmp->jnl == NULL) {
3020 really_do_it = 0;
3021 }
3022
3023 /* If caller wants to disable metadata zone, do it */
3024 if (disable == true) {
3025 really_do_it = 0;
3026 }
3027
3028 /*
3029 * Start with space for the boot blocks and Volume Header.
3030 * 1536 = byte offset from start of volume to end of volume header:
3031 * 1024 bytes is the offset from the start of the volume to the
3032 * start of the volume header (defined by the volume format)
3033 * + 512 bytes (the size of the volume header).
3034 */
3035 zonesize = roundup(1536, hfsmp->blockSize);
3036
3037 /*
3038 * Add the on-disk size of allocation bitmap.
3039 */
3040 zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
3041
3042 /*
3043 * Add space for the Journal Info Block and Journal (if they're in
3044 * this file system).
3045 */
3046 if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
3047 zonesize += hfsmp->blockSize + hfsmp->jnl_size;
3048 }
3049
3050 /*
3051 * Add the existing size of the Extents Overflow B-tree.
3052 * (It rarely grows, so don't bother reserving additional room for it.)
3053 */
3054 zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
3055
3056 /*
3057 * If there is an Attributes B-tree, leave room for 11 clumps worth.
3058 * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
3059 * When installing a full OS install onto a 20GB volume, we use
3060 * 7 to 8 clumps worth of space (depending on packages), so that leaves
3061 * us with another 3 or 4 clumps worth before we need another extent.
3062 */
3063 if (hfsmp->hfs_attribute_cp) {
3064 zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
3065 }
3066
3067 /*
3068 * Leave room for 11 clumps of the Catalog B-tree.
3069 * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
3070 * When installing a full OS install onto a 20GB volume, we use
3071 * 7 to 8 clumps worth of space (depending on packages), so that leaves
3072 * us with another 3 or 4 clumps worth before we need another extent.
3073 */
3074 zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
3075
3076 /*
3077 * Add space for hot file region.
3078 *
3079 * ...for now, use 5 MB per 1 GB (0.5 %)
3080 */
3081 filesize = (fs_size / 1024) * 5;
3082 if (filesize > HOTBAND_MAXIMUM_SIZE)
3083 filesize = HOTBAND_MAXIMUM_SIZE;
3084 else if (filesize < HOTBAND_MINIMUM_SIZE)
3085 filesize = HOTBAND_MINIMUM_SIZE;
3086 /*
3087 * Calculate user quota file requirements.
3088 */
3089 if (hfsmp->hfs_flags & HFS_QUOTAS) {
3090 items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
3091 if (items < QF_MIN_USERS)
3092 items = QF_MIN_USERS;
3093 else if (items > QF_MAX_USERS)
3094 items = QF_MAX_USERS;
3095 if (!powerof2(items)) {
3096 int x = items;
3097 items = 4;
3098 while (x>>1 != 1) {
3099 x = x >> 1;
3100 items = items << 1;
3101 }
3102 }
3103 filesize += (items + 1) * sizeof(struct dqblk);
3104 /*
3105 * Calculate group quota file requirements.
3106 *
3107 */
3108 items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
3109 if (items < QF_MIN_GROUPS)
3110 items = QF_MIN_GROUPS;
3111 else if (items > QF_MAX_GROUPS)
3112 items = QF_MAX_GROUPS;
3113 if (!powerof2(items)) {
3114 int x = items;
3115 items = 4;
3116 while (x>>1 != 1) {
3117 x = x >> 1;
3118 items = items << 1;
3119 }
3120 }
3121 filesize += (items + 1) * sizeof(struct dqblk);
3122 }
3123 zonesize += filesize;
3124
3125 /*
3126 * Round up entire zone to a bitmap block's worth.
3127 * The extra space goes to the catalog file and hot file area.
3128 */
3129 temp = zonesize;
3130 zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
3131 hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
3132 /*
3133 * If doing the round up for hfs_min_alloc_start would push us past
3134 * allocLimit, then just reset it back to 0. Though using a value
3135 * bigger than allocLimit would not cause damage in the block allocator
3136 * code, this value could get stored in the volume header and make it out
3137 * to disk, making the volume header technically corrupt.
3138 */
3139 if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
3140 hfsmp->hfs_min_alloc_start = 0;
3141 }
3142
3143 if (really_do_it == 0) {
3144 /* If metadata zone needs to be disabled because the
3145 * volume was truncated, clear the bit and zero out
3146 * the values that are no longer needed.
3147 */
3148 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3149 /* Disable metadata zone */
3150 hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
3151
3152 /* Zero out mount point values that are not required */
3153 hfsmp->hfs_catalog_maxblks = 0;
3154 hfsmp->hfs_hotfile_maxblks = 0;
3155 hfsmp->hfs_hotfile_start = 0;
3156 hfsmp->hfs_hotfile_end = 0;
3157 hfsmp->hfs_hotfile_freeblks = 0;
3158 hfsmp->hfs_metazone_start = 0;
3159 hfsmp->hfs_metazone_end = 0;
3160 }
3161
3162 return;
3163 }
3164
3165 temp = zonesize - temp; /* temp has extra space */
3166 filesize += temp / 3;
3167 hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
3168
3169 hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
3170
3171 /* Convert to allocation blocks. */
3172 blk = zonesize / vcb->blockSize;
3173
3174 /* The default metadata zone location is at the start of volume. */
3175 hfsmp->hfs_metazone_start = 1;
3176 hfsmp->hfs_metazone_end = blk - 1;
3177
3178 /* The default hotfile area is at the end of the zone. */
3179 if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) {
3180 hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
3181 hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
3182 hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
3183 }
3184 else {
3185 hfsmp->hfs_hotfile_start = 0;
3186 hfsmp->hfs_hotfile_end = 0;
3187 hfsmp->hfs_hotfile_freeblks = 0;
3188 }
3189 #if 0
3190 printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
3191 printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
3192 printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
3193 #endif
3194 hfsmp->hfs_flags |= HFS_METADATA_ZONE;
3195 }
3196
3197
3198 static u_int32_t
3199 hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
3200 {
3201 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3202 int lockflags;
3203 int freeblocks;
3204
3205 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3206 freeblocks = MetaZoneFreeBlocks(vcb);
3207 hfs_systemfile_unlock(hfsmp, lockflags);
3208
3209 /* Minus Extents overflow file reserve. */
3210 freeblocks -=
3211 hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
3212 /* Minus catalog file reserve. */
3213 freeblocks -=
3214 hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
3215 if (freeblocks < 0)
3216 freeblocks = 0;
3217
3218 return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
3219 }
3220
3221 /*
3222 * Determine if a file is a "virtual" metadata file.
3223 * This includes journal and quota files.
3224 */
3225 int
3226 hfs_virtualmetafile(struct cnode *cp)
3227 {
3228 const char * filename;
3229
3230
3231 if (cp->c_parentcnid != kHFSRootFolderID)
3232 return (0);
3233
3234 filename = (const char *)cp->c_desc.cd_nameptr;
3235 if (filename == NULL)
3236 return (0);
3237
3238 if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
3239 (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
3240 (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
3241 (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
3242 (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
3243 return (1);
3244
3245 return (0);
3246 }
3247
3248 __private_extern__
3249 void hfs_syncer_lock(struct hfsmount *hfsmp)
3250 {
3251 hfs_lock_mount(hfsmp);
3252 }
3253
3254 __private_extern__
3255 void hfs_syncer_unlock(struct hfsmount *hfsmp)
3256 {
3257 hfs_unlock_mount(hfsmp);
3258 }
3259
3260 __private_extern__
3261 void hfs_syncer_wait(struct hfsmount *hfsmp)
3262 {
3263 msleep(&hfsmp->hfs_sync_incomplete, &hfsmp->hfs_mutex, PWAIT,
3264 "hfs_syncer_wait", NULL);
3265 }
3266
3267 __private_extern__
3268 void hfs_syncer_wakeup(struct hfsmount *hfsmp)
3269 {
3270 wakeup(&hfsmp->hfs_sync_incomplete);
3271 }
3272
3273 __private_extern__
3274 uint64_t hfs_usecs_to_deadline(uint64_t usecs)
3275 {
3276 uint64_t deadline;
3277 clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline);
3278 return deadline;
3279 }
3280
3281 __private_extern__
3282 void hfs_syncer_queue(thread_call_t syncer)
3283 {
3284 if (thread_call_enter_delayed_with_leeway(syncer,
3285 NULL,
3286 hfs_usecs_to_deadline(HFS_META_DELAY),
3287 0,
3288 THREAD_CALL_DELAY_SYS_BACKGROUND)) {
3289 printf("hfs: syncer already scheduled!\n");
3290 }
3291 }
3292
3293 //
3294 // Fire off a timed callback to sync the disk if the
3295 // volume is on ejectable media.
3296 //
3297 __private_extern__
3298 void
3299 hfs_sync_ejectable(struct hfsmount *hfsmp)
3300 {
3301 // If we don't have a syncer or we get called by the syncer, just return
3302 if (!hfsmp->hfs_syncer || current_thread() == hfsmp->hfs_syncer_thread)
3303 return;
3304
3305 hfs_syncer_lock(hfsmp);
3306
3307 if (!timerisset(&hfsmp->hfs_sync_req_oldest))
3308 microuptime(&hfsmp->hfs_sync_req_oldest);
3309
3310 /* If hfs_unmount is running, it will set hfs_syncer to NULL. Also we
3311 don't want to queue again if there is a sync outstanding. */
3312 if (!hfsmp->hfs_syncer || hfsmp->hfs_sync_incomplete) {
3313 hfs_syncer_unlock(hfsmp);
3314 return;
3315 }
3316
3317 hfsmp->hfs_sync_incomplete = TRUE;
3318
3319 thread_call_t syncer = hfsmp->hfs_syncer;
3320
3321 hfs_syncer_unlock(hfsmp);
3322
3323 hfs_syncer_queue(syncer);
3324 }
3325
3326 int
3327 hfs_start_transaction(struct hfsmount *hfsmp)
3328 {
3329 int ret = 0, unlock_on_err = 0;
3330 thread_t thread = current_thread();
3331
3332 #ifdef HFS_CHECK_LOCK_ORDER
3333 /*
3334 * You cannot start a transaction while holding a system
3335 * file lock. (unless the transaction is nested.)
3336 */
3337 if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
3338 if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
3339 panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
3340 }
3341 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
3342 panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
3343 }
3344 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
3345 panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
3346 }
3347 }
3348 #endif /* HFS_CHECK_LOCK_ORDER */
3349
3350 if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
3351 /*
3352 * The global lock should be held shared if journal is
3353 * active to prevent disabling. If we're not the owner
3354 * of the journal lock, verify that we're not already
3355 * holding the global lock exclusive before moving on.
3356 */
3357 if (hfsmp->hfs_global_lockowner == thread) {
3358 ret = EBUSY;
3359 goto out;
3360 }
3361
3362 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3363 OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3364 unlock_on_err = 1;
3365 }
3366
3367 /* If a downgrade to read-only mount is in progress, no other
3368 * thread than the downgrade thread is allowed to modify
3369 * the file system.
3370 */
3371 if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
3372 hfsmp->hfs_downgrading_thread != thread) {
3373 ret = EROFS;
3374 goto out;
3375 }
3376
3377 if (hfsmp->jnl) {
3378 ret = journal_start_transaction(hfsmp->jnl);
3379 if (ret == 0) {
3380 OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
3381 }
3382 } else {
3383 ret = 0;
3384 }
3385
3386 out:
3387 if (ret != 0 && unlock_on_err) {
3388 hfs_unlock_global (hfsmp);
3389 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3390 }
3391
3392 return ret;
3393 }
3394
3395 int
3396 hfs_end_transaction(struct hfsmount *hfsmp)
3397 {
3398 int need_unlock=0, ret;
3399
3400 if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread()
3401 && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
3402 need_unlock = 1;
3403 }
3404
3405 if (hfsmp->jnl) {
3406 ret = journal_end_transaction(hfsmp->jnl);
3407 } else {
3408 ret = 0;
3409 }
3410
3411 if (need_unlock) {
3412 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3413 hfs_unlock_global (hfsmp);
3414 hfs_sync_ejectable(hfsmp);
3415 }
3416
3417 return ret;
3418 }
3419
3420
3421 void
3422 hfs_journal_lock(struct hfsmount *hfsmp)
3423 {
3424 /* Only peek at hfsmp->jnl while holding the global lock */
3425 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3426 if (hfsmp->jnl) {
3427 journal_lock(hfsmp->jnl);
3428 }
3429 hfs_unlock_global (hfsmp);
3430 }
3431
3432 void
3433 hfs_journal_unlock(struct hfsmount *hfsmp)
3434 {
3435 /* Only peek at hfsmp->jnl while holding the global lock */
3436 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3437 if (hfsmp->jnl) {
3438 journal_unlock(hfsmp->jnl);
3439 }
3440 hfs_unlock_global (hfsmp);
3441 }
3442
3443 /*
3444 * Flush the contents of the journal to the disk.
3445 *
3446 * Input:
3447 * wait_for_IO -
3448 * If TRUE, wait to write in-memory journal to the disk
3449 * consistently, and also wait to write all asynchronous
3450 * metadata blocks to its corresponding locations
3451 * consistently on the disk. This means that the journal
3452 * is empty at this point and does not contain any
3453 * transactions. This is overkill in normal scenarios
3454 * but is useful whenever the metadata blocks are required
3455 * to be consistent on-disk instead of just the journal
3456 * being consistent; like before live verification
3457 * and live volume resizing.
3458 *
3459 * If FALSE, only wait to write in-memory journal to the
3460 * disk consistently. This means that the journal still
3461 * contains uncommitted transactions and the file system
3462 * metadata blocks in the journal transactions might be
3463 * written asynchronously to the disk. But there is no
3464 * guarantee that they are written to the disk before
3465 * returning to the caller. Note that this option is
3466 * sufficient for file system data integrity as it
3467 * guarantees consistent journal content on the disk.
3468 */
3469 int
3470 hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO)
3471 {
3472 int ret;
3473
3474 /* Only peek at hfsmp->jnl while holding the global lock */
3475 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3476 if (hfsmp->jnl) {
3477 ret = journal_flush(hfsmp->jnl, wait_for_IO);
3478 } else {
3479 ret = 0;
3480 }
3481 hfs_unlock_global (hfsmp);
3482
3483 return ret;
3484 }
3485
3486
3487 /*
3488 * hfs_erase_unused_nodes
3489 *
3490 * Check wheter a volume may suffer from unused Catalog B-tree nodes that
3491 * are not zeroed (due to <rdar://problem/6947811>). If so, just write
3492 * zeroes to the unused nodes.
3493 *
3494 * How do we detect when a volume needs this repair? We can't always be
3495 * certain. If a volume was created after a certain date, then it may have
3496 * been created with the faulty newfs_hfs. Since newfs_hfs only created one
3497 * clump, we can assume that if a Catalog B-tree is larger than its clump size,
3498 * that means that the entire first clump must have been written to, which means
3499 * there shouldn't be unused and unwritten nodes in that first clump, and this
3500 * repair is not needed.
3501 *
3502 * We have defined a bit in the Volume Header's attributes to indicate when the
3503 * unused nodes have been repaired. A newer newfs_hfs will set this bit.
3504 * As will fsck_hfs when it repairs the unused nodes.
3505 */
3506 int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
3507 {
3508 int result;
3509 struct filefork *catalog;
3510 int lockflags;
3511
3512 if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
3513 {
3514 /* This volume has already been checked and repaired. */
3515 return 0;
3516 }
3517
3518 if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
3519 {
3520 /* This volume is too old to have had the problem. */
3521 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3522 return 0;
3523 }
3524
3525 catalog = hfsmp->hfs_catalog_cp->c_datafork;
3526 if (catalog->ff_size > catalog->ff_clumpsize)
3527 {
3528 /* The entire first clump must have been in use at some point. */
3529 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3530 return 0;
3531 }
3532
3533 /*
3534 * If we get here, we need to zero out those unused nodes.
3535 *
3536 * We start a transaction and lock the catalog since we're going to be
3537 * making on-disk changes. But note that BTZeroUnusedNodes doens't actually
3538 * do its writing via the journal, because that would be too much I/O
3539 * to fit in a transaction, and it's a pain to break it up into multiple
3540 * transactions. (It behaves more like growing a B-tree would.)
3541 */
3542 printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
3543 result = hfs_start_transaction(hfsmp);
3544 if (result)
3545 goto done;
3546 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3547 result = BTZeroUnusedNodes(catalog);
3548 vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
3549 hfs_systemfile_unlock(hfsmp, lockflags);
3550 hfs_end_transaction(hfsmp);
3551 if (result == 0)
3552 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3553 printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
3554
3555 done:
3556 return result;
3557 }
3558
3559
3560 extern time_t snapshot_timestamp;
3561
3562 int
3563 check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg)
3564 {
3565 int snapshot_error = 0;
3566
3567 if (vp == NULL) {
3568 return 0;
3569 }
3570
3571 /* Swap files are special; skip them */
3572 if (vnode_isswap(vp)) {
3573 return 0;
3574 }
3575
3576 if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
3577 // the change time is within this epoch
3578 int error;
3579
3580 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
3581 if (error == EDEADLK) {
3582 snapshot_error = 0;
3583 } else if (error) {
3584 if (error == EAGAIN) {
3585 printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n");
3586 } else if (error == EINTR) {
3587 // printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n");
3588 snapshot_error = EINTR;
3589 }
3590 }
3591 }
3592
3593 if (snapshot_error) return snapshot_error;
3594
3595 return 0;
3596 }
3597
3598 int
3599 check_for_dataless_file(struct vnode *vp, uint64_t op_type)
3600 {
3601 int error;
3602
3603 if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
3604 // there's nothing to do, it's not dataless
3605 return 0;
3606 }
3607
3608 /* Swap files are special; ignore them */
3609 if (vnode_isswap(vp)) {
3610 return 0;
3611 }
3612
3613 // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
3614 error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
3615 if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
3616 error = 0;
3617 } else if (error) {
3618 if (error == EAGAIN) {
3619 printf("hfs: dataless: timed out waiting for namespace handler...\n");
3620 // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
3621 return 0;
3622 } else if (error == EINTR) {
3623 // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
3624 return EINTR;
3625 }
3626 } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
3627 //
3628 // if we're here, the dataless bit is still set on the file
3629 // which means it didn't get handled. we return an error
3630 // but it's presently ignored by all callers of this function.
3631 //
3632 // XXXdbg - EDATANOTPRESENT is what we really need...
3633 //
3634 return EBADF;
3635 }
3636
3637 return error;
3638 }
3639
3640
3641 //
3642 // NOTE: this function takes care of starting a transaction and
3643 // acquiring the systemfile lock so that it can call
3644 // cat_update().
3645 //
3646 // NOTE: do NOT hold and cnode locks while calling this function
3647 // to avoid deadlocks (because we take a lock on the root
3648 // cnode)
3649 //
3650 int
3651 hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
3652 {
3653 struct vnode *rvp;
3654 struct cnode *cp;
3655 int error;
3656
3657 error = VFS_ROOT(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
3658 if (error) {
3659 return error;
3660 }
3661
3662 cp = VTOC(rvp);
3663 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
3664 return error;
3665 }
3666 struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
3667
3668 int lockflags;
3669 if (hfs_start_transaction(hfsmp) != 0) {
3670 return error;
3671 }
3672 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3673
3674 if (extinfo->document_id == 0) {
3675 // initialize this to start at 3 (one greater than the root-dir id)
3676 extinfo->document_id = 3;
3677 }
3678
3679 *docid = extinfo->document_id++;
3680
3681 // mark the root cnode dirty
3682 cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
3683 (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
3684
3685 hfs_systemfile_unlock (hfsmp, lockflags);
3686 (void) hfs_end_transaction(hfsmp);
3687
3688 (void) hfs_unlock(cp);
3689
3690 vnode_put(rvp);
3691 rvp = NULL;
3692
3693 return 0;
3694 }
3695
3696
3697 /*
3698 * Return information about number of file system allocation blocks
3699 * taken by metadata on a volume.
3700 *
3701 * This function populates struct hfsinfo_metadata with allocation blocks
3702 * used by extents overflow btree, catalog btree, bitmap, attribute btree,
3703 * journal file, and sum of all of the above.
3704 */
3705 int
3706 hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo)
3707 {
3708 int lockflags = 0;
3709 int ret_lockflags = 0;
3710
3711 /* Zero out the output buffer */
3712 bzero(hinfo, sizeof(struct hfsinfo_metadata));
3713
3714 /*
3715 * Getting number of allocation blocks for all btrees
3716 * should be a quick operation, so we grab locks for
3717 * all of them at the same time
3718 */
3719 lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
3720 ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3721 /*
3722 * Make sure that we were able to acquire all locks requested
3723 * to protect us against conditions like unmount in progress.
3724 */
3725 if ((lockflags & ret_lockflags) != lockflags) {
3726 /* Release any locks that were acquired */
3727 hfs_systemfile_unlock(hfsmp, ret_lockflags);
3728 return EPERM;
3729 }
3730
3731 /* Get information about all the btrees */
3732 hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks;
3733 hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks;
3734 hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks;
3735 hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks;
3736
3737 /* Done with btrees, give up the locks */
3738 hfs_systemfile_unlock(hfsmp, ret_lockflags);
3739
3740 /* Get information about journal file */
3741 hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize);
3742
3743 /* Calculate total number of metadata blocks */
3744 hinfo->total = hinfo->extents + hinfo->catalog +
3745 hinfo->allocation + hinfo->attribute +
3746 hinfo->journal;
3747
3748 return 0;
3749 }
3750
3751 static int
3752 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
3753 {
3754 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8");
3755
3756 return 0;
3757 }
3758
3759 __private_extern__
3760 int hfs_freeze(struct hfsmount *hfsmp)
3761 {
3762 // First make sure some other process isn't freezing
3763 hfs_lock_mount(hfsmp);
3764 while (hfsmp->hfs_freeze_state != HFS_THAWED) {
3765 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3766 PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) {
3767 hfs_unlock_mount(hfsmp);
3768 return EINTR;
3769 }
3770 }
3771
3772 // Stop new syncers from starting
3773 hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE;
3774
3775 // Now wait for all syncers to finish
3776 while (hfsmp->hfs_syncers) {
3777 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3778 PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) {
3779 hfs_thaw_locked(hfsmp);
3780 hfs_unlock_mount(hfsmp);
3781 return EINTR;
3782 }
3783 }
3784 hfs_unlock_mount(hfsmp);
3785
3786 // flush things before we get started to try and prevent
3787 // dirty data from being paged out while we're frozen.
3788 // note: we can't do this once we're in the freezing state because
3789 // other threads will need to take the global lock
3790 vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL);
3791
3792 // Block everything in hfs_lock_global now
3793 hfs_lock_mount(hfsmp);
3794 hfsmp->hfs_freeze_state = HFS_FREEZING;
3795 hfsmp->hfs_freezing_thread = current_thread();
3796 hfs_unlock_mount(hfsmp);
3797
3798 /* Take the exclusive lock to flush out anything else that
3799 might have the global lock at the moment and also so we
3800 can flush the journal. */
3801 hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
3802 journal_flush(hfsmp->jnl, TRUE);
3803 hfs_unlock_global(hfsmp);
3804
3805 // don't need to iterate on all vnodes, we just need to
3806 // wait for writes to the system files and the device vnode
3807 //
3808 // Now that journal flush waits for all metadata blocks to
3809 // be written out, waiting for btree writes is probably no
3810 // longer required.
3811 if (HFSTOVCB(hfsmp)->extentsRefNum)
3812 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3");
3813 if (HFSTOVCB(hfsmp)->catalogRefNum)
3814 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4");
3815 if (HFSTOVCB(hfsmp)->allocationsRefNum)
3816 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5");
3817 if (hfsmp->hfs_attribute_vp)
3818 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6");
3819 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7");
3820
3821 // We're done, mark frozen
3822 hfs_lock_mount(hfsmp);
3823 hfsmp->hfs_freeze_state = HFS_FROZEN;
3824 hfsmp->hfs_freezing_proc = current_proc();
3825 hfs_unlock_mount(hfsmp);
3826
3827 return 0;
3828 }
3829
3830 __private_extern__
3831 int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process)
3832 {
3833 hfs_lock_mount(hfsmp);
3834
3835 if (hfsmp->hfs_freeze_state != HFS_FROZEN) {
3836 hfs_unlock_mount(hfsmp);
3837 return EINVAL;
3838 }
3839 if (process && hfsmp->hfs_freezing_proc != process) {
3840 hfs_unlock_mount(hfsmp);
3841 return EPERM;
3842 }
3843
3844 hfs_thaw_locked(hfsmp);
3845
3846 hfs_unlock_mount(hfsmp);
3847
3848 return 0;
3849 }
3850
3851 static void hfs_thaw_locked(struct hfsmount *hfsmp)
3852 {
3853 hfsmp->hfs_freezing_proc = NULL;
3854 hfsmp->hfs_freeze_state = HFS_THAWED;
3855
3856 wakeup(&hfsmp->hfs_freeze_state);
3857 }