X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/e2fac8b15b12a7979f72090454d850e612fc5b13..143464d58d2bd6378e74eec636961ceb0d32fb91:/bsd/hfs/hfs_vfsutils.c diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c index d6dc1e356..5fe09c2ed 100644 --- a/bsd/hfs/hfs_vfsutils.c +++ b/bsd/hfs/hfs_vfsutils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -45,10 +46,19 @@ #include #include #include +#include #include +#include #include +/* for parsing boot-args */ +#include + +#if CONFIG_PROTECT +#include +#endif + #include "hfs.h" #include "hfs_catalog.h" #include "hfs_dbg.h" @@ -64,9 +74,10 @@ static void ReleaseMetaFileVNode(struct vnode *vp); static int hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args); -static void hfs_metadatazone_init(struct hfsmount *); static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *); +#define HFS_MOUNT_DEBUG 1 + //******************************************************************************* // Note: Finder information in the HFS/HFS+ metadata are considered opaque and @@ -85,8 +96,7 @@ unsigned char hfs_vbmname[] = "Volume Bitmap"; unsigned char hfs_attrname[] = "Attribute B-tree"; unsigned char hfs_startupname[] = "Startup File"; - -__private_extern__ +#if CONFIG_HFS_STD OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, __unused struct proc *p) { @@ -96,6 +106,7 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, struct cat_desc cndesc; struct cat_attr cnattr; struct cat_fork fork; + int newvnode_flags = 0; /* Block size must be a multiple of 512 */ if (SWAP_BE32(mdb->drAlBlkSiz) == 0 || @@ -114,7 +125,7 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, * */ vcb->vcbSigWord = SWAP_BE16 (mdb->drSigWord); - vcb->vcbCrDate = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate))); + vcb->hfs_itime = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate))); vcb->localCreateDate = SWAP_BE32 (mdb->drCrDate); vcb->vcbLsMod = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod))); vcb->vcbAtrb = SWAP_BE16 (mdb->drAtrb); @@ -143,8 +154,13 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, * When an HFS name cannot be encoded with the current * volume encoding we use MacRoman as a fallback. */ - if (error || (utf8chars == 0)) - (void) mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN); + if (error || (utf8chars == 0)) { + error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN); + /* If we fail to encode to UTF8 from Mac Roman, the name is bad. Deny the mount */ + if (error) { + goto MtVolErr; + } + } hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size); vcb->vcbVBMIOSize = kHFSBlockSize; @@ -179,11 +195,19 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, cnattr.ca_blocks = fork.cf_blocks; error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork, - &hfsmp->hfs_extents_vp); - if (error) goto MtVolErr; + &hfsmp->hfs_extents_vp, &newvnode_flags); + if (error) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error); + } + goto MtVolErr; + } error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp), (KeyCompareProcPtr)CompareExtentKeys)); if (error) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error); + } hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto MtVolErr; } @@ -208,14 +232,20 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, cnattr.ca_blocks = fork.cf_blocks; error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork, - &hfsmp->hfs_catalog_vp); + &hfsmp->hfs_catalog_vp, &newvnode_flags); if (error) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error); + } hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto MtVolErr; } error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp), (KeyCompareProcPtr)CompareCatalogKeys)); if (error) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error); + } hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto MtVolErr; @@ -232,8 +262,11 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, cnattr.ca_blocks = 0; error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork, - &hfsmp->hfs_allocation_vp); + &hfsmp->hfs_allocation_vp, &newvnode_flags); if (error) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error); + } hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto MtVolErr; @@ -243,44 +276,47 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, /* mark the volume dirty (clear clean unmount bit) */ vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask; - if (error == noErr) - { - error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, NULL, NULL, NULL); - } - - if ( error == noErr ) - { - if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected - { - MarkVCBDirty( vcb ); // mark VCB dirty so it will be written - } - } - + if (error == noErr) { + error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL); + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error); + } + } + + if (error == noErr) { + /* If the disk isn't write protected.. */ + if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) { + MarkVCBDirty (vcb); // mark VCB dirty so it will be written + } + } + /* * all done with system files so we can unlock now... */ hfs_unlock(VTOC(hfsmp->hfs_allocation_vp)); hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); - - goto CmdDone; + + if (error == noErr) { + /* If successful, then we can just return once we've unlocked the cnodes */ + return error; + } //-- Release any resources allocated so far before exiting with an error: MtVolErr: - ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp); - ReleaseMetaFileVNode(hfsmp->hfs_extents_vp); + hfsUnmount(hfsmp, NULL); -CmdDone: return (error); } +#endif + //******************************************************************************* // Routine: hfs_MountHFSPlusVolume // // //******************************************************************************* -__private_extern__ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred) { @@ -293,20 +329,24 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, struct BTreeInfoRec btinfo; u_int16_t signature; u_int16_t hfs_version; + int newvnode_flags = 0; int i; OSErr retval; + char converted_volname[256]; + size_t volname_length = 0; + size_t conv_volname_length = 0; signature = SWAP_BE16(vhp->signature); hfs_version = SWAP_BE16(vhp->version); if (signature == kHFSPlusSigWord) { if (hfs_version != kHFSPlusVersion) { - printf("hfs_mount: invalid HFS+ version: %d\n", hfs_version); + printf("hfs_mount: invalid HFS+ version: %x\n", hfs_version); return (EINVAL); } } else if (signature == kHFSXSigWord) { if (hfs_version != kHFSXVersion) { - printf("hfs_mount: invalid HFSX version: %d\n", hfs_version); + printf("hfs_mount: invalid HFSX version: %x\n", hfs_version); return (EINVAL); } /* The in-memory signature is always 'H+'. */ @@ -316,23 +356,38 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, /* Removed printf for invalid HFS+ signature because it gives * false error for UFS root volume */ + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: unknown Volume Signature : %x\n", signature); + } return (EINVAL); } /* Block size must be at least 512 and a power of 2 */ blockSize = SWAP_BE32(vhp->blockSize); - if (blockSize < 512 || !powerof2(blockSize)) + if (blockSize < 512 || !powerof2(blockSize)) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: invalid blocksize (%d) \n", blockSize); + } return (EINVAL); + } /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */ if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL && - (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) + (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n"); + } return (EINVAL); + } /* Make sure we can live with the physical block size. */ if ((disksize & (hfsmp->hfs_logical_block_size - 1)) || (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) || (blockSize < hfsmp->hfs_logical_block_size)) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n", + blockSize, hfsmp->hfs_logical_block_size); + } return (ENXIO); } @@ -437,9 +492,12 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, SWAP_BE32 (vhp->extentsFile.extents[i].blockCount); } retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, - &hfsmp->hfs_extents_vp); + &hfsmp->hfs_extents_vp, &newvnode_flags); if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval); + } goto ErrorExit; } hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp); @@ -449,6 +507,9 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, (KeyCompareProcPtr) CompareExtentKeysPlus)); if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval); + } goto ErrorExit; } /* @@ -470,8 +531,11 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, SWAP_BE32 (vhp->catalogFile.extents[i].blockCount); } retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, - &hfsmp->hfs_catalog_vp); + &hfsmp->hfs_catalog_vp, &newvnode_flags); if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval); + } goto ErrorExit; } hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp); @@ -480,6 +544,9 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp), (KeyCompareProcPtr) CompareExtendedCatalogKeys)); if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval); + } goto ErrorExit; } if ((hfsmp->hfs_flags & HFS_X) && @@ -511,8 +578,11 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, SWAP_BE32 (vhp->allocationFile.extents[i].blockCount); } retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, - &hfsmp->hfs_allocation_vp); + &hfsmp->hfs_allocation_vp, &newvnode_flags); if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval); + } goto ErrorExit; } hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp); @@ -538,8 +608,11 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, SWAP_BE32 (vhp->attributesFile.extents[i].blockCount); } retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, - &hfsmp->hfs_attribute_vp); + &hfsmp->hfs_attribute_vp, &newvnode_flags); if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval); + } goto ErrorExit; } hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp); @@ -547,6 +620,22 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp), (KeyCompareProcPtr) hfs_attrkeycompare)); if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval); + } + goto ErrorExit; + } + + /* Initialize vnode for virtual attribute data file that spans the + * entire file system space for performing I/O to attribute btree + * We hold iocount on the attrdata vnode for the entire duration + * of mount (similar to btree vnodes) + */ + retval = init_attrdata_vnode(hfsmp); + if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval); + } goto ErrorExit; } } @@ -571,23 +660,74 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, SWAP_BE32 (vhp->startupFile.extents[i].blockCount); } retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, - &hfsmp->hfs_startup_vp); + &hfsmp->hfs_startup_vp, &newvnode_flags); if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval); + } goto ErrorExit; } hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp); hfs_unlock(hfsmp->hfs_startup_cp); } - /* Pick up volume name and create date */ - retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, &cnattr, NULL); + /* + * Pick up volume name and create date + * + * Acquiring the volume name should not manipulate the bitmap, only the catalog + * btree and possibly the extents overflow b-tree. + */ + retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL); if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval); + } goto ErrorExit; } - vcb->vcbCrDate = cnattr.ca_itime; + vcb->hfs_itime = cnattr.ca_itime; vcb->volumeNameEncodingHint = cndesc.cd_encoding; bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen)); + volname_length = strlen ((const char*)vcb->vcbVN); cat_releasedesc(&cndesc); + +#define DKIOCCSSETLVNAME _IOW('d', 198, char[256]) + + + /* Send the volume name down to CoreStorage if necessary */ + retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED); + if (retval == 0) { + (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current()); + } + + /* reset retval == 0. we don't care about errors in volname conversion */ + retval = 0; + + + /* + * We now always initiate a full bitmap scan even if the volume is read-only because this is + * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily + * expects. TRIMs will not be delivered to the underlying media if the volume is not + * read-write though. + */ + thread_t allocator_scanner; + hfsmp->scan_var = 0; + + /* Take the HFS mount mutex and wait on scan_var */ + hfs_lock_mount (hfsmp); + + kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner); + /* Wait until it registers that it's got the appropriate locks */ + while ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) == 0) { + (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, (PDROP | PINOD), "hfs_scan_blocks", 0); + if (hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) { + break; + } + else { + hfs_lock_mount (hfsmp); + } + } + + thread_deallocate (allocator_scanner); /* mark the volume dirty (clear clean unmount bit) */ vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask; @@ -612,6 +752,17 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, retval = hfs_late_journal_init(hfsmp, vhp, args); if (retval != 0) { + if (retval == EROFS) { + // EROFS is a special error code that means the volume has an external + // journal which we couldn't find. in that case we do not want to + // rewrite the volume header - we'll just refuse to mount the volume. + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval); + } + retval = EINVAL; + goto ErrorExit; + } + hfsmp->jnl = NULL; // if the journal failed to open, then set the lastMountedVersion @@ -626,6 +777,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize)); + bp = NULL; retval = (int)buf_meta_bread(hfsmp->hfs_devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, cred, &bp); @@ -646,7 +798,10 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, bp = NULL; } } - + + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval); + } retval = EINVAL; goto ErrorExit; } else if (hfsmp->jnl) { @@ -677,10 +832,108 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, } } + if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected + { + MarkVCBDirty( vcb ); // mark VCB dirty so it will be written + } + + /* + * Distinguish 3 potential cases involving content protection: + * 1. mount point bit set; vcbAtrb does not support it. Fail. + * 2. mount point bit set; vcbattrb supports it. we're good. + * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good. + */ + if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) { + /* Does the mount point support it ? */ + if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) { + /* Case 1 above */ + retval = EINVAL; + goto ErrorExit; + } + } + else { + /* not requested in the mount point. Is it in FS? */ + if (vcb->vcbAtrb & kHFSContentProtectionMask) { + /* Case 3 above */ + vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT); + } + } + + /* At this point, if the mount point flag is set, we can enable it. */ + if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) { + /* Cases 2+3 above */ +#if CONFIG_PROTECT + /* Get the EAs as needed. */ + int cperr = 0; + uint16_t majorversion; + uint16_t minorversion; + + struct cp_root_xattr *xattr = NULL; + MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK); + if (xattr == NULL) { + retval = ENOMEM; + goto ErrorExit; + } + bzero (xattr, sizeof(struct cp_root_xattr)); + + /* go get the EA to get the version information */ + cperr = cp_getrootxattr (hfsmp, xattr); + /* + * If there was no EA there, then write one out. + * Assuming EA is not present on the root means + * this is an erase install or a very old FS + */ + + if (cperr == 0) { + /* Have to run a valid CP version. */ + if ((xattr->major_version < CP_PREV_MAJOR_VERS) || (xattr->major_version > CP_NEW_MAJOR_VERS)) { + cperr = EINVAL; + } + } + else if (cperr == ENOATTR) { + printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS); + bzero(xattr, sizeof(struct cp_root_xattr)); + xattr->major_version = CP_NEW_MAJOR_VERS; + xattr->minor_version = CP_MINOR_VERS; + xattr->flags = 0; + cperr = cp_setrootxattr (hfsmp, xattr); + } + majorversion = xattr->major_version; + minorversion = xattr->minor_version; + if (xattr) { + FREE(xattr, M_TEMP); + } + + /* Recheck for good status */ + if (cperr == 0) { + /* If we got here, then the CP version is valid. Set it in the mount point */ + hfsmp->hfs_running_cp_major_vers = majorversion; + printf("Running with CP root xattr: %d.%d\n", majorversion, minorversion); + + /* + * Acquire the boot-arg for the AKS default key. + * Ensure that the boot-arg's value is valid for FILES (not directories), + * since only files are actually protected for now. + */ + PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class)); + if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) { + hfsmp->default_cp_class = PROTECTION_CLASS_D; + } + } + else { + retval = EPERM; + goto ErrorExit; + } +#else + /* If CONFIG_PROTECT not built, ignore CP */ + vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT); +#endif + } + /* * Establish a metadata allocation zone. */ - hfs_metadatazone_init(hfsmp); + hfs_metadatazone_init(hfsmp, false); /* * Make any metadata zone adjustments. @@ -691,7 +944,12 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, vcb->nextAllocation <= hfsmp->hfs_metazone_end) { HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1); } + } else { + if (vcb->nextAllocation <= 1) { + vcb->nextAllocation = hfsmp->hfs_min_alloc_start; + } } + vcb->sparseAllocation = hfsmp->hfs_min_alloc_start; /* Setup private/hidden directories for hardlinks. */ hfs_privatedir_init(hfsmp, FILE_HARDLINKS); @@ -700,38 +958,45 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) hfs_remove_orphans(hfsmp); - if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected + /* See if we need to erase unused Catalog nodes due to . */ + if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) { - MarkVCBDirty( vcb ); // mark VCB dirty so it will be written - } + retval = hfs_erase_unused_nodes(hfsmp); + if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN); + } + goto ErrorExit; + } + } + /* * Allow hot file clustering if conditions allow. */ if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && - ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) { + ((hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_SSD)) == 0)) { (void) hfs_recording_init(hfsmp); } /* Force ACLs on HFS+ file systems. */ vfs_setextendedsecurity(HFSTOVFS(hfsmp)); - /* Check if volume supports writing of extent-based extended attributes */ - hfs_check_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE); + /* Enable extent-based extended attributes by default */ + hfsmp->hfs_flags |= HFS_XATTR_EXTENTS; return (0); ErrorExit: /* - * A fatal error occurred and the volume cannot be mounted - * release any resources that we aquired... + * A fatal error occurred and the volume cannot be mounted, so + * release any resources that we acquired... */ - if (hfsmp->hfs_attribute_vp) - ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp); - ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp); - ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp); - ReleaseMetaFileVNode(hfsmp->hfs_extents_vp); - + hfsUnmount(hfsmp, NULL); + + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: encountered error (%d)\n", retval); + } return (retval); } @@ -747,7 +1012,7 @@ static void ReleaseMetaFileVNode(struct vnode *vp) if (vp && (fp = VTOF(vp))) { if (fp->fcbBTCBPtr != NULL) { - (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); + (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); (void) BTClosePath(fp); hfs_unlock(VTOC(vp)); } @@ -766,44 +1031,47 @@ static void ReleaseMetaFileVNode(struct vnode *vp) * *************************************************************/ -__private_extern__ int hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p) { - /* Get rid of our attribute data vnode (if any). */ + /* Get rid of our attribute data vnode (if any). This is done + * after the vflush() during mount, so we don't need to worry + * about any locks. + */ if (hfsmp->hfs_attrdata_vp) { - vnode_t advp = hfsmp->hfs_attrdata_vp; - - if (vnode_get(advp) == 0) { - vnode_rele_ext(advp, O_EVTONLY, 0); - vnode_put(advp); - } + ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp); hfsmp->hfs_attrdata_vp = NULLVP; } - if (hfsmp->hfs_startup_vp) + if (hfsmp->hfs_startup_vp) { ReleaseMetaFileVNode(hfsmp->hfs_startup_vp); - - if (hfsmp->hfs_allocation_vp) - ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp); - - if (hfsmp->hfs_attribute_vp) + hfsmp->hfs_startup_cp = NULL; + hfsmp->hfs_startup_vp = NULL; + } + + if (hfsmp->hfs_attribute_vp) { ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp); + hfsmp->hfs_attribute_cp = NULL; + hfsmp->hfs_attribute_vp = NULL; + } - ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp); - ReleaseMetaFileVNode(hfsmp->hfs_extents_vp); + if (hfsmp->hfs_catalog_vp) { + ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp); + hfsmp->hfs_catalog_cp = NULL; + hfsmp->hfs_catalog_vp = NULL; + } - /* - * Setting these pointers to NULL so that any references - * past this point will fail, and tell us the point of failure. - * Also, facilitates a check in hfs_update for a null catalog - * vp - */ - hfsmp->hfs_allocation_vp = NULL; - hfsmp->hfs_attribute_vp = NULL; - hfsmp->hfs_catalog_vp = NULL; - hfsmp->hfs_extents_vp = NULL; - hfsmp->hfs_startup_vp = NULL; + if (hfsmp->hfs_extents_vp) { + ReleaseMetaFileVNode(hfsmp->hfs_extents_vp); + hfsmp->hfs_extents_cp = NULL; + hfsmp->hfs_extents_vp = NULL; + } + + if (hfsmp->hfs_allocation_vp) { + ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp); + hfsmp->hfs_allocation_cp = NULL; + hfsmp->hfs_allocation_vp = NULL; + } return (0); } @@ -811,12 +1079,16 @@ hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p) /* * Test if fork has overflow extents. + * + * Returns: + * non-zero - overflow extents exist + * zero - overflow extents do not exist */ __private_extern__ int overflow_extents(struct filefork *fp) { - u_long blocks; + u_int32_t blocks; // // If the vnode pointer is NULL then we're being called @@ -849,19 +1121,83 @@ overflow_extents(struct filefork *fp) return (fp->ff_blocks > blocks); } +/* + * Lock the HFS global journal lock + */ +int +hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype) +{ + void *thread = current_thread(); + + if (hfsmp->hfs_global_lockowner == thread) { + panic ("hfs_lock_global: locking against myself!"); + } + + /* HFS_SHARED_LOCK */ + if (locktype == HFS_SHARED_LOCK) { + lck_rw_lock_shared (&hfsmp->hfs_global_lock); + hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER; + } + /* HFS_EXCLUSIVE_LOCK */ + else { + lck_rw_lock_exclusive (&hfsmp->hfs_global_lock); + hfsmp->hfs_global_lockowner = thread; + } + + return 0; +} + + +/* + * Unlock the HFS global journal lock + */ +void +hfs_unlock_global (struct hfsmount *hfsmp) +{ + + void *thread = current_thread(); + + /* HFS_LOCK_EXCLUSIVE */ + if (hfsmp->hfs_global_lockowner == thread) { + hfsmp->hfs_global_lockowner = NULL; + lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock); + } + /* HFS_LOCK_SHARED */ + else { + lck_rw_unlock_shared (&hfsmp->hfs_global_lock); + } +} + +/* + * Lock the HFS mount lock + * + * Note: this is a mutex, not a rw lock! + */ +inline +void hfs_lock_mount (struct hfsmount *hfsmp) { + lck_mtx_lock (&(hfsmp->hfs_mutex)); +} + +/* + * Unlock the HFS mount lock + * + * Note: this is a mutex, not a rw lock! + */ +inline +void hfs_unlock_mount (struct hfsmount *hfsmp) { + lck_mtx_unlock (&(hfsmp->hfs_mutex)); +} /* * Lock HFS system file(s). */ -__private_extern__ int -hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype) +hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype) { /* * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file */ if (flags & SFL_CATALOG) { - #ifdef HFS_CHECK_LOCK_ORDER if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) { panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)"); @@ -874,19 +1210,24 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype } #endif /* HFS_CHECK_LOCK_ORDER */ - (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype); - /* - * When the catalog file has overflow extents then - * also acquire the extents b-tree lock if its not - * already requested. - */ - if ((flags & SFL_EXTENTS) == 0 && - overflow_extents(VTOF(hfsmp->hfs_catalog_vp))) { - flags |= SFL_EXTENTS; + if (hfsmp->hfs_catalog_cp) { + (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT); + /* + * When the catalog file has overflow extents then + * also acquire the extents b-tree lock if its not + * already requested. + */ + if (((flags & SFL_EXTENTS) == 0) && + (hfsmp->hfs_catalog_vp != NULL) && + (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) { + flags |= SFL_EXTENTS; + } + } else { + flags &= ~SFL_CATALOG; } } - if (flags & SFL_ATTRIBUTE) { + if (flags & SFL_ATTRIBUTE) { #ifdef HFS_CHECK_LOCK_ORDER if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) { panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)"); @@ -897,20 +1238,22 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype #endif /* HFS_CHECK_LOCK_ORDER */ if (hfsmp->hfs_attribute_cp) { - (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype); + (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT); /* * When the attribute file has overflow extents then * also acquire the extents b-tree lock if its not * already requested. */ - if ((flags & SFL_EXTENTS) == 0 && - overflow_extents(VTOF(hfsmp->hfs_attribute_vp))) { + if (((flags & SFL_EXTENTS) == 0) && + (hfsmp->hfs_attribute_vp != NULL) && + (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) { flags |= SFL_EXTENTS; } } else { flags &= ~SFL_ATTRIBUTE; } } + if (flags & SFL_STARTUP) { #ifdef HFS_CHECK_LOCK_ORDER if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { @@ -918,54 +1261,59 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype } #endif /* HFS_CHECK_LOCK_ORDER */ - (void) hfs_lock(hfsmp->hfs_startup_cp, locktype); - /* - * When the startup file has overflow extents then - * also acquire the extents b-tree lock if its not - * already requested. - */ - if ((flags & SFL_EXTENTS) == 0 && - overflow_extents(VTOF(hfsmp->hfs_startup_vp))) { - flags |= SFL_EXTENTS; + if (hfsmp->hfs_startup_cp) { + (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT); + /* + * When the startup file has overflow extents then + * also acquire the extents b-tree lock if its not + * already requested. + */ + if (((flags & SFL_EXTENTS) == 0) && + (hfsmp->hfs_startup_vp != NULL) && + (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) { + flags |= SFL_EXTENTS; + } + } else { + flags &= ~SFL_STARTUP; } } + /* * To prevent locks being taken in the wrong order, the extent lock * gets a bitmap lock as well. */ if (flags & (SFL_BITMAP | SFL_EXTENTS)) { - /* - * Since the only bitmap operations are clearing and - * setting bits we always need exclusive access. And - * when we have a journal, we can "hide" behind that - * lock since we can only change the bitmap from - * within a transaction. - */ - if (hfsmp->jnl || (hfsmp->hfs_allocation_cp == NULL)) { - flags &= ~SFL_BITMAP; - } else { - (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK); - /* The bitmap lock is also grabbed when only extent lock + if (hfsmp->hfs_allocation_cp) { + (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + /* + * The bitmap lock is also grabbed when only extent lock * was requested. Set the bitmap lock bit in the lock * flags which callers will use during unlock. */ flags |= SFL_BITMAP; + } else { + flags &= ~SFL_BITMAP; } } + if (flags & SFL_EXTENTS) { /* * Since the extents btree lock is recursive we always * need exclusive access. */ - (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK); + if (hfsmp->hfs_extents_cp) { + (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + } else { + flags &= ~SFL_EXTENTS; + } } + return (flags); } /* * unlock HFS system file(s). */ -__private_extern__ void hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags) { @@ -992,7 +1340,7 @@ hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags) } hfs_unlock(hfsmp->hfs_attribute_cp); } - if (flags & SFL_CATALOG) { + if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) { if (hfsmp->jnl == NULL) { BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync); numOfLockedBuffs = count_lock_queue(); @@ -1004,10 +1352,10 @@ hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags) } hfs_unlock(hfsmp->hfs_catalog_cp); } - if (flags & SFL_BITMAP) { + if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) { hfs_unlock(hfsmp->hfs_allocation_cp); } - if (flags & SFL_EXTENTS) { + if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) { if (hfsmp->jnl == NULL) { BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync); numOfLockedBuffs = count_lock_queue(); @@ -1044,20 +1392,20 @@ void RequireFileLock(FileReference vp, int shareable) if (!locked && !shareable) { switch (VTOC(vp)->c_fileid) { case kHFSExtentsFileID: - panic("extents btree not locked! v: 0x%08X\n #\n", (u_int)vp); + panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp); break; case kHFSCatalogFileID: - panic("catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp); + panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp); break; case kHFSAllocationFileID: /* The allocation file can hide behind the jornal lock. */ if (VTOHFS(vp)->jnl == NULL) - panic("allocation file not locked! v: 0x%08X\n #\n", (u_int)vp); + panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp); break; case kHFSStartupFileID: - panic("startup file not locked! v: 0x%08X\n #\n", (u_int)vp); + panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp); case kHFSAttributesFileID: - panic("attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp); + panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp); break; } } @@ -1094,15 +1442,15 @@ hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred, } -unsigned long BestBlockSizeFit(unsigned long allocationBlockSize, - unsigned long blockSizeLimit, - unsigned long baseMultiple) { +u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize, + u_int32_t blockSizeLimit, + u_int32_t baseMultiple) { /* Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the specified limit but still an even multiple of the baseMultiple. */ int baseBlockCount, blockCount; - unsigned long trialBlockSize; + u_int32_t trialBlockSize; if (allocationBlockSize % baseMultiple != 0) { /* @@ -1137,8 +1485,7 @@ unsigned long BestBlockSizeFit(unsigned long allocationBlockSize, } -__private_extern__ -u_long +u_int32_t GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name, struct cat_attr *fattr, struct cat_fork *forkinfo) { @@ -1158,7 +1505,7 @@ GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name, jdesc.cd_namelen = strlen(name); lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_lookup(hfsmp, &jdesc, 0, NULL, fattr, forkinfo, NULL); + error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL); hfs_systemfile_unlock(hfsmp, lockflags); if (error == 0) { @@ -1177,7 +1524,6 @@ GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name, * If the volume was not cleanly unmounted then some of these may * have persisted and need to be removed. */ -__private_extern__ void hfs_remove_orphans(struct hfsmount * hfsmp) { @@ -1197,7 +1543,8 @@ hfs_remove_orphans(struct hfsmount * hfsmp) int started_tr = 0; int lockflags; int result; - int orphanedlinks = 0; + int orphaned_files = 0; + int orphaned_dirs = 0; bzero(&cookie, sizeof(cookie)); @@ -1254,8 +1601,9 @@ hfs_remove_orphans(struct hfsmount * hfsmp) */ if (bcmp(tempname, filename, namelen) == 0) { struct filefork dfork; - struct filefork rfork; + struct filefork rfork; struct cnode cnode; + int mode = 0; bzero(&dfork, sizeof(dfork)); bzero(&rfork, sizeof(rfork)); @@ -1312,8 +1660,9 @@ hfs_remove_orphans(struct hfsmount * hfsmp) fsize = 0; } - if (TruncateFileC(vcb, (FCB*)&dfork, fsize, false) != 0) { - printf("error truncting data fork!\n"); + if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0, + cnode.c_attr.ca_fileid, false) != 0) { + printf("hfs: error truncating data fork!\n"); break; } @@ -1344,8 +1693,8 @@ hfs_remove_orphans(struct hfsmount * hfsmp) rfork.ff_cp = &cnode; cnode.c_datafork = NULL; cnode.c_rsrcfork = &rfork; - if (TruncateFileC(vcb, (FCB*)&rfork, 0, false) != 0) { - printf("error truncting rsrc fork!\n"); + if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) { + printf("hfs: error truncating rsrc fork!\n"); break; } } @@ -1358,11 +1707,19 @@ hfs_remove_orphans(struct hfsmount * hfsmp) hfs_volupdate(hfsmp, VOL_UPDATE, 0); break; } - ++orphanedlinks; + + mode = cnode.c_attr.ca_mode & S_IFMT; + + if (mode == S_IFDIR) { + orphaned_dirs++; + } + else { + orphaned_files++; + } /* Update parent and volume counts */ hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--; - if (cnode.c_attr.ca_mode & S_IFDIR) { + if (mode == S_IFDIR) { DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]); } @@ -1378,7 +1735,7 @@ hfs_remove_orphans(struct hfsmount * hfsmp) Now that Catalog is unlocked, update the volume info, making sure to differentiate between files and directories */ - if (cnode.c_attr.ca_mode & S_IFDIR) { + if (mode == S_IFDIR) { hfs_volupdate(hfsmp, VOL_RMDIR, 0); } else{ @@ -1392,8 +1749,8 @@ hfs_remove_orphans(struct hfsmount * hfsmp) } /* end if */ } /* end for */ - if (orphanedlinks > 0) - printf("HFS: Removed %d orphaned unlinked files or directories \n", orphanedlinks); + if (orphaned_files > 0 || orphaned_dirs > 0) + printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs); exit: if (catlock) { hfs_systemfile_unlock(hfsmp, lockflags); @@ -1451,7 +1808,6 @@ u_int32_t logBlockSize; return logBlockSize; } -__private_extern__ u_int32_t hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) { @@ -1462,7 +1818,7 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) /* * We don't bother taking the mount lock * to look at these values since the values - * themselves are each updated automically + * themselves are each updated atomically * on aligned addresses. */ freeblks = hfsmp->freeBlocks; @@ -1479,7 +1835,7 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) else freeblks = 0; -#ifdef HFS_SPARSE_DEV +#if HFS_SPARSE_DEV /* * When the underlying device is sparse, check the * available space on the backing store volume. @@ -1500,7 +1856,7 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) } if ((vfsp = vfs_statfs(backingfs_mp))) { - HFS_MOUNT_LOCK(hfsmp, TRUE); + hfs_lock_mount (hfsmp); vfreeblks = vfsp->f_bavail; /* Normalize block count if needed. */ if (vfsp->f_bsize != hfsmp->blockSize) { @@ -1518,11 +1874,27 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) else vfreeblks = 0; + if (hfsmp->hfs_backingfs_maxblocks) { + vfreeblks = MIN(vfreeblks, hfsmp->hfs_backingfs_maxblocks); + } freeblks = MIN(vfreeblks, freeblks); - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount (hfsmp); } } #endif /* HFS_SPARSE_DEV */ + if (hfsmp->hfs_flags & HFS_CS) { + uint64_t cs_free_bytes; + uint64_t cs_free_blks; + if (VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSGETFREEBYTES, + (caddr_t)&cs_free_bytes, 0, vfs_context_kernel()) == 0) { + cs_free_blks = cs_free_bytes / hfsmp->blockSize; + if (cs_free_blks > loanblks) + cs_free_blks -= loanblks; + else + cs_free_blks = 0; + freeblks = MIN(cs_free_blks, freeblks); + } + } return (freeblks); } @@ -1536,6 +1908,12 @@ short MacToVFSError(OSErr err) if (err >= 0) return err; + /* BSD/VFS internal errnos */ + switch (err) { + case ERESERVEDNAME: /* -8 */ + return err; + } + switch (err) { case dskFulErr: /* -34 */ case btNoSpaceAvail: /* -32733 */ @@ -1772,7 +2150,130 @@ out: } -__private_extern__ +typedef struct jopen_cb_info { + off_t jsize; + char *desired_uuid; + struct vnode *jvp; + size_t blksize; + int need_clean; + int need_init; +} jopen_cb_info; + +static int +journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg) +{ + struct nameidata nd; + jopen_cb_info *ji = (jopen_cb_info *)arg; + char bsd_name[256]; + int error; + + strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name)); + strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5); + + if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) { + return 1; // keep iterating + } + + // if we're here, either the desired uuid matched or there was no + // desired uuid so let's try to open the device for writing and + // see if it works. if it does, we'll use it. + + NDINIT(&nd, LOOKUP, OP_LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel()); + if ((error = namei(&nd))) { + printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str); + return 1; // keep iterating + } + + ji->jvp = nd.ni_vp; + nameidone(&nd); + + if (ji->jvp == NULL) { + printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error); + } else { + error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel()); + if (error == 0) { + // if the journal is dirty and we didn't specify a desired + // journal device uuid, then do not use the journal. but + // if the journal is just invalid (e.g. it hasn't been + // initialized) then just set the need_init flag. + if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') { + error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize); + if (error == EBUSY) { + VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel()); + vnode_put(ji->jvp); + ji->jvp = NULL; + return 1; // keep iterating + } else if (error == EINVAL) { + ji->need_init = 1; + } + } + + if (ji->desired_uuid && ji->desired_uuid[0] == '\0') { + strlcpy(ji->desired_uuid, uuid_str, 128); + } + vnode_setmountedon(ji->jvp); + return 0; // stop iterating + } else { + vnode_put(ji->jvp); + ji->jvp = NULL; + } + } + + return 1; // keep iterating +} + +extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg); +kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len); + + +static vnode_t +open_journal_dev(const char *vol_device, + int need_clean, + char *uuid_str, + char *machine_serial_num, + off_t jsize, + size_t blksize, + int *need_init) +{ + int retry_counter=0; + jopen_cb_info ji; + + ji.jsize = jsize; + ji.desired_uuid = uuid_str; + ji.jvp = NULL; + ji.blksize = blksize; + ji.need_clean = need_clean; + ji.need_init = 0; + +// if (uuid_str[0] == '\0') { +// printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device); +// } else { +// printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str); +// } + while (ji.jvp == NULL && retry_counter++ < 4) { + if (retry_counter > 1) { + if (uuid_str[0]) { + printf("hfs: open_journal_dev: uuid %s not found. waiting 10sec.\n", uuid_str); + } else { + printf("hfs: open_journal_dev: no available external journal partition found. waiting 10sec.\n"); + } + delay_for_interval(10* 1000000, NSEC_PER_USEC); // wait for ten seconds and then try again + } + + IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji); + } + + if (ji.jvp == NULL) { + printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n", + vol_device, uuid_str, machine_serial_num); + } + + *need_init = ji.need_init; + + return ji.jvp; +} + + int hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args, off_t embeddedOffset, daddr64_t mdb_offset, @@ -1781,15 +2282,17 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, JournalInfoBlock *jibp; struct buf *jinfo_bp, *bp; int sectors_per_fsblock, arg_flags=0, arg_tbufsz=0; - int retval; + int retval, write_jibp = 0; uint32_t blksize = hfsmp->hfs_logical_block_size; struct vnode *devvp; struct hfs_mount_args *args = _args; u_int32_t jib_flags; u_int64_t jib_offset; u_int64_t jib_size; + const char *dev_name; devvp = hfsmp->hfs_devvp; + dev_name = vnode_getname_printable(devvp); if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) { arg_flags = args->journal_flags; @@ -1798,24 +2301,57 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize; + jinfo_bp = NULL; retval = (int)buf_meta_bread(devvp, (daddr64_t)((embeddedOffset/blksize) + - (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)), - SWAP_BE32(vhp->blockSize), cred, &jinfo_bp); - if (retval) - return retval; - + ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)), + hfsmp->hfs_physical_block_size, cred, &jinfo_bp); + if (retval) { + if (jinfo_bp) { + buf_brelse(jinfo_bp); + } + goto cleanup_dev_name; + } + jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp); jib_flags = SWAP_BE32(jibp->flags); - jib_offset = SWAP_BE64(jibp->offset); jib_size = SWAP_BE64(jibp->size); if (jib_flags & kJIJournalInFSMask) { hfsmp->jvp = hfsmp->hfs_devvp; + jib_offset = SWAP_BE64(jibp->offset); } else { - printf("hfs: journal not stored in fs! don't know what to do.\n"); - buf_brelse(jinfo_bp); - return EINVAL; + int need_init=0; + + // if the volume was unmounted cleanly then we'll pick any + // available external journal partition + // + if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) { + *((char *)&jibp->ext_jnl_uuid[0]) = '\0'; + } + + hfsmp->jvp = open_journal_dev(dev_name, + !(jib_flags & kJIJournalNeedInitMask), + (char *)&jibp->ext_jnl_uuid[0], + (char *)&jibp->machine_serial_num[0], + jib_size, + hfsmp->hfs_logical_block_size, + &need_init); + if (hfsmp->jvp == NULL) { + buf_brelse(jinfo_bp); + retval = EROFS; + goto cleanup_dev_name; + } else { + if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) { + strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num)); + } + } + + jib_offset = 0; + write_jibp = 1; + if (need_init) { + jib_flags |= kJIJournalNeedInitMask; + } } // save this off for the hack-y check in hfs_remove() @@ -1827,21 +2363,23 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, // if it is, then we can allow the mount. otherwise we have to // return failure. retval = journal_is_clean(hfsmp->jvp, - jib_offset + embeddedOffset, + jib_offset + embeddedOffset, jib_size, devvp, - hfsmp->hfs_logical_block_size); + hfsmp->hfs_logical_block_size); hfsmp->jnl = NULL; buf_brelse(jinfo_bp); if (retval) { - printf("hfs: early journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n", - vnode_name(devvp)); + const char *name = vnode_getname_printable(devvp); + printf("hfs: early journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n", + name); + vnode_putname_printable(name); } - return retval; + goto cleanup_dev_name; } if (jib_flags & kJIJournalNeedInitMask) { @@ -1854,7 +2392,10 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, blksize, arg_flags, arg_tbufsz, - hfs_sync_metadata, hfsmp->hfs_mp); + hfs_sync_metadata, hfsmp->hfs_mp, + hfsmp->hfs_mp); + if (hfsmp->jnl) + journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp); // no need to start a transaction here... if this were to fail // we'd just re-init it on the next mount. @@ -1875,9 +2416,16 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, blksize, arg_flags, arg_tbufsz, - hfs_sync_metadata, hfsmp->hfs_mp); + hfs_sync_metadata, hfsmp->hfs_mp, + hfsmp->hfs_mp); + if (hfsmp->jnl) + journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp); - buf_brelse(jinfo_bp); + if (write_jibp) { + buf_bwrite(jinfo_bp); + } else { + buf_brelse(jinfo_bp); + } jinfo_bp = NULL; jibp = NULL; @@ -1887,14 +2435,17 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, if (mdb_offset == 0) { mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize)); } + bp = NULL; retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, cred, &bp); if (retval) { - buf_brelse(bp); + if (bp) { + buf_brelse(bp); + } printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n", retval); - return retval; + goto cleanup_dev_name; } bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512); buf_brelse(bp); @@ -1902,17 +2453,19 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, } } - - //printf("journal @ 0x%x\n", hfsmp->jnl); - // if we expected the journal to be there and we couldn't // create it or open it then we have to bail out. if (hfsmp->jnl == NULL) { printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval); - return EINVAL; + retval = EINVAL; + goto cleanup_dev_name; } - return 0; + retval = 0; + +cleanup_dev_name: + vnode_putname_printable(dev_name); + return retval; } @@ -1941,7 +2494,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a struct cat_attr jib_attr, jattr; struct cat_fork jib_fork, jfork; ExtendedVCB *vcb; - u_long fid; + u_int32_t fid; struct hfs_mount_args *args = _args; u_int32_t jib_flags; u_int64_t jib_offset; @@ -1980,11 +2533,15 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size; + jinfo_bp = NULL; retval = (int)buf_meta_bread(devvp, - (daddr64_t)(vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size + - (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)), - SWAP_BE32(vhp->blockSize), NOCRED, &jinfo_bp); + (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size + + ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)), + hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp); if (retval) { + if (jinfo_bp) { + buf_brelse(jinfo_bp); + } printf("hfs: can't read journal info block. disabling journaling.\n"); vcb->vcbAtrb &= ~kHFSVolumeJournaledMask; return 0; @@ -2006,7 +2563,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a hfsmp->hfs_jnlfileid = fid; // make sure the journal file begins where we think it should. - if ((jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) { + if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) { printf("hfs: The journal file moved (was: %lld; is: %d). Fixing up\n", (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock); @@ -2027,10 +2584,40 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a if (jib_flags & kJIJournalInFSMask) { hfsmp->jvp = hfsmp->hfs_devvp; + jib_offset += (off_t)vcb->hfsPlusIOPosOffset; } else { - printf("hfs: journal not stored in fs! don't know what to do.\n"); - buf_brelse(jinfo_bp); - return EINVAL; + const char *dev_name; + int need_init = 0; + + dev_name = vnode_getname_printable(devvp); + + // since the journal is empty, just use any available external journal + *((char *)&jibp->ext_jnl_uuid[0]) = '\0'; + + // this fills in the uuid of the device we actually get + hfsmp->jvp = open_journal_dev(dev_name, + !(jib_flags & kJIJournalNeedInitMask), + (char *)&jibp->ext_jnl_uuid[0], + (char *)&jibp->machine_serial_num[0], + jib_size, + hfsmp->hfs_logical_block_size, + &need_init); + if (hfsmp->jvp == NULL) { + buf_brelse(jinfo_bp); + vnode_putname_printable(dev_name); + return EROFS; + } else { + if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) { + strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num)); + } + } + jib_offset = 0; + recreate_journal = 1; + write_jibp = 1; + if (need_init) { + jib_flags |= kJIJournalNeedInitMask; + } + vnode_putname_printable(dev_name); } // save this off for the hack-y check in hfs_remove() @@ -2042,7 +2629,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a // if it is, then we can allow the mount. otherwise we have to // return failure. retval = journal_is_clean(hfsmp->jvp, - jib_offset + (off_t)vcb->hfsPlusIOPosOffset, + jib_offset, jib_size, devvp, hfsmp->hfs_logical_block_size); @@ -2052,8 +2639,10 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a buf_brelse(jinfo_bp); if (retval) { - printf("hfs: late journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n", - vnode_name(devvp)); + const char *name = vnode_getname_printable(devvp); + printf("hfs: late journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n", + name); + vnode_putname_printable(name); } return retval; @@ -2061,15 +2650,18 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) { printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n", - jib_offset + (off_t)vcb->hfsPlusIOPosOffset, jib_size); + jib_offset, jib_size); hfsmp->jnl = journal_create(hfsmp->jvp, - jib_offset + (off_t)vcb->hfsPlusIOPosOffset, + jib_offset, jib_size, devvp, hfsmp->hfs_logical_block_size, arg_flags, arg_tbufsz, - hfs_sync_metadata, hfsmp->hfs_mp); + hfs_sync_metadata, hfsmp->hfs_mp, + hfsmp->hfs_mp); + if (hfsmp->jnl) + journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp); // no need to start a transaction here... if this were to fail // we'd just re-init it on the next mount. @@ -2088,17 +2680,20 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a arg_flags |= JOURNAL_RESET; //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n", - // jib_offset + (off_t)vcb->hfsPlusIOPosOffset, + // jib_offset, // jib_size, SWAP_BE32(vhp->blockSize)); hfsmp->jnl = journal_open(hfsmp->jvp, - jib_offset + (off_t)vcb->hfsPlusIOPosOffset, + jib_offset, jib_size, devvp, hfsmp->hfs_logical_block_size, arg_flags, arg_tbufsz, - hfs_sync_metadata, hfsmp->hfs_mp); + hfs_sync_metadata, hfsmp->hfs_mp, + hfsmp->hfs_mp); + if (hfsmp->jnl) + journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp); } @@ -2114,8 +2709,6 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a jinfo_bp = NULL; jibp = NULL; - //printf("journal @ 0x%x\n", hfsmp->jnl); - // if we expected the journal to be there and we couldn't // create it or open it then we have to bail out. if (hfsmp->jnl == NULL) { @@ -2155,8 +2748,15 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a #define HOTBAND_MINIMUM_SIZE (10*1024*1024) #define HOTBAND_MAXIMUM_SIZE (512*1024*1024) -static void -hfs_metadatazone_init(struct hfsmount *hfsmp) +/* Initialize the metadata zone. + * + * If the size of the volume is less than the minimum size for + * metadata zone, metadata zone is disabled. + * + * If disable is true, disable metadata zone unconditionally. + */ +void +hfs_metadatazone_init(struct hfsmount *hfsmp, int disable) { ExtendedVCB *vcb; u_int64_t fs_size; @@ -2164,58 +2764,78 @@ hfs_metadatazone_init(struct hfsmount *hfsmp) u_int64_t temp; u_int64_t filesize; u_int32_t blk; - int items; + int items, really_do_it=1; vcb = HFSTOVCB(hfsmp); - fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->totalBlocks; + fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit; /* * For volumes less than 10 GB, don't bother. */ - if (fs_size < ((u_int64_t)10 * GIGABYTE)) - return; + if (fs_size < ((u_int64_t)10 * GIGABYTE)) { + really_do_it = 0; + } + /* * Skip non-journaled volumes as well. */ - if (hfsmp->jnl == NULL) - return; + if (hfsmp->jnl == NULL) { + really_do_it = 0; + } + + /* If caller wants to disable metadata zone, do it */ + if (disable == true) { + really_do_it = 0; + } /* - * Start with allocation bitmap (a fixed size). + * Start with space for the boot blocks and Volume Header. + * 1536 = byte offset from start of volume to end of volume header: + * 1024 bytes is the offset from the start of the volume to the + * start of the volume header (defined by the volume format) + * + 512 bytes (the size of the volume header). */ - zonesize = roundup(vcb->totalBlocks / 8, vcb->vcbVBMIOSize); - + zonesize = roundup(1536, hfsmp->blockSize); + /* - * Overflow Extents file gets 4 MB per 100 GB. + * Add the on-disk size of allocation bitmap. */ - items = fs_size / ((u_int64_t)100 * GIGABYTE); - filesize = (u_int64_t)(items + 1) * OVERFLOW_DEFAULT_SIZE; - if (filesize > OVERFLOW_MAXIMUM_SIZE) - filesize = OVERFLOW_MAXIMUM_SIZE; - zonesize += filesize; - hfsmp->hfs_overflow_maxblks = filesize / vcb->blockSize; - + zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize; + + /* + * Add space for the Journal Info Block and Journal (if they're in + * this file system). + */ + if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) { + zonesize += hfsmp->blockSize + hfsmp->jnl_size; + } + /* - * Plan for at least 8 MB of journal for each - * 100 GB of disk space (up to a 512 MB). + * Add the existing size of the Extents Overflow B-tree. + * (It rarely grows, so don't bother reserving additional room for it.) */ - items = fs_size / ((u_int64_t)100 * GIGABYTE); - filesize = (u_int64_t)(items + 1) * JOURNAL_DEFAULT_SIZE; - if (filesize > JOURNAL_MAXIMUM_SIZE) - filesize = JOURNAL_MAXIMUM_SIZE; - zonesize += filesize; - + zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize; + /* - * Catalog file gets 10 MB per 1 GB. - * - * How about considering the current catalog size (used nodes * node size) - * and the current file data size to help estimate the required - * catalog size. + * If there is an Attributes B-tree, leave room for 11 clumps worth. + * newfs_hfs allocates one clump, and leaves a gap of 10 clumps. + * When installing a full OS install onto a 20GB volume, we use + * 7 to 8 clumps worth of space (depending on packages), so that leaves + * us with another 3 or 4 clumps worth before we need another extent. */ - filesize = MIN((fs_size / 1024) * 10, GIGABYTE); - hfsmp->hfs_catalog_maxblks = filesize / vcb->blockSize; - zonesize += filesize; - + if (hfsmp->hfs_attribute_cp) { + zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize; + } + + /* + * Leave room for 11 clumps of the Catalog B-tree. + * Again, newfs_hfs allocates one clump plus a gap of 10 clumps. + * When installing a full OS install onto a 20GB volume, we use + * 7 to 8 clumps worth of space (depending on packages), so that leaves + * us with another 3 or 4 clumps worth before we need another extent. + */ + zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize; + /* * Add space for hot file region. * @@ -2229,38 +2849,40 @@ hfs_metadatazone_init(struct hfsmount *hfsmp) /* * Calculate user quota file requirements. */ - items = QF_USERS_PER_GB * (fs_size / GIGABYTE); - if (items < QF_MIN_USERS) - items = QF_MIN_USERS; - else if (items > QF_MAX_USERS) - items = QF_MAX_USERS; - if (!powerof2(items)) { - int x = items; - items = 4; - while (x>>1 != 1) { - x = x >> 1; - items = items << 1; - } - } - filesize += (items + 1) * sizeof(struct dqblk); - /* - * Calculate group quota file requirements. - * - */ - items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE); - if (items < QF_MIN_GROUPS) - items = QF_MIN_GROUPS; - else if (items > QF_MAX_GROUPS) - items = QF_MAX_GROUPS; - if (!powerof2(items)) { - int x = items; - items = 4; - while (x>>1 != 1) { - x = x >> 1; - items = items << 1; - } - } - filesize += (items + 1) * sizeof(struct dqblk); + if (hfsmp->hfs_flags & HFS_QUOTAS) { + items = QF_USERS_PER_GB * (fs_size / GIGABYTE); + if (items < QF_MIN_USERS) + items = QF_MIN_USERS; + else if (items > QF_MAX_USERS) + items = QF_MAX_USERS; + if (!powerof2(items)) { + int x = items; + items = 4; + while (x>>1 != 1) { + x = x >> 1; + items = items << 1; + } + } + filesize += (items + 1) * sizeof(struct dqblk); + /* + * Calculate group quota file requirements. + * + */ + items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE); + if (items < QF_MIN_GROUPS) + items = QF_MIN_GROUPS; + else if (items > QF_MAX_GROUPS) + items = QF_MAX_GROUPS; + if (!powerof2(items)) { + int x = items; + items = 4; + while (x>>1 != 1) { + x = x >> 1; + items = items << 1; + } + } + filesize += (items + 1) * sizeof(struct dqblk); + } zonesize += filesize; /* @@ -2269,6 +2891,40 @@ hfs_metadatazone_init(struct hfsmount *hfsmp) */ temp = zonesize; zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize); + hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize; + /* + * If doing the round up for hfs_min_alloc_start would push us past + * allocLimit, then just reset it back to 0. Though using a value + * bigger than allocLimit would not cause damage in the block allocator + * code, this value could get stored in the volume header and make it out + * to disk, making the volume header technically corrupt. + */ + if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) { + hfsmp->hfs_min_alloc_start = 0; + } + + if (really_do_it == 0) { + /* If metadata zone needs to be disabled because the + * volume was truncated, clear the bit and zero out + * the values that are no longer needed. + */ + if (hfsmp->hfs_flags & HFS_METADATA_ZONE) { + /* Disable metadata zone */ + hfsmp->hfs_flags &= ~HFS_METADATA_ZONE; + + /* Zero out mount point values that are not required */ + hfsmp->hfs_catalog_maxblks = 0; + hfsmp->hfs_hotfile_maxblks = 0; + hfsmp->hfs_hotfile_start = 0; + hfsmp->hfs_hotfile_end = 0; + hfsmp->hfs_hotfile_freeblks = 0; + hfsmp->hfs_metazone_start = 0; + hfsmp->hfs_metazone_end = 0; + } + + return; + } + temp = zonesize - temp; /* temp has extra space */ filesize += temp / 3; hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize; @@ -2283,13 +2939,20 @@ hfs_metadatazone_init(struct hfsmount *hfsmp) hfsmp->hfs_metazone_end = blk - 1; /* The default hotfile area is at the end of the zone. */ - hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize); - hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end; - hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp); + if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) { + hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize); + hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end; + hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp); + } + else { + hfsmp->hfs_hotfile_start = 0; + hfsmp->hfs_hotfile_end = 0; + hfsmp->hfs_hotfile_freeblks = 0; + } #if 0 - printf("HFS: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end); - printf("HFS: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end); - printf("HFS: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks); + printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end); + printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end); + printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks); #endif hfsmp->hfs_flags |= HFS_METADATA_ZONE; } @@ -2322,7 +2985,6 @@ hfs_hotfile_freeblocks(struct hfsmount *hfsmp) * Determine if a file is a "virtual" metadata file. * This includes journal and quota files. */ -__private_extern__ int hfs_virtualmetafile(struct cnode *cp) { @@ -2346,7 +3008,50 @@ hfs_virtualmetafile(struct cnode *cp) return (0); } +__private_extern__ +void hfs_syncer_lock(struct hfsmount *hfsmp) +{ + hfs_lock_mount(hfsmp); +} + +__private_extern__ +void hfs_syncer_unlock(struct hfsmount *hfsmp) +{ + hfs_unlock_mount(hfsmp); +} + +__private_extern__ +void hfs_syncer_wait(struct hfsmount *hfsmp) +{ + msleep(&hfsmp->hfs_sync_incomplete, &hfsmp->hfs_mutex, PWAIT, + "hfs_syncer_wait", NULL); +} + +__private_extern__ +void hfs_syncer_wakeup(struct hfsmount *hfsmp) +{ + wakeup(&hfsmp->hfs_sync_incomplete); +} + +__private_extern__ +uint64_t hfs_usecs_to_deadline(uint64_t usecs) +{ + uint64_t deadline; + clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline); + return deadline; +} +__private_extern__ +void hfs_syncer_queue(thread_call_t syncer) +{ + if (thread_call_enter_delayed_with_leeway(syncer, + NULL, + hfs_usecs_to_deadline(HFS_META_DELAY), + 0, + THREAD_CALL_DELAY_SYS_BACKGROUND)) { + printf ("hfs: syncer already scheduled!"); + } +} // // Fire off a timed callback to sync the disk if the @@ -2356,38 +3061,31 @@ hfs_virtualmetafile(struct cnode *cp) void hfs_sync_ejectable(struct hfsmount *hfsmp) { - if (hfsmp->hfs_syncer) { - uint32_t secs, usecs; - uint64_t now; + // If we don't have a syncer or we get called by the syncer, just return + if (!hfsmp->hfs_syncer || current_thread() == hfsmp->hfs_syncer_thread) + return; - clock_get_calendar_microtime(&secs, &usecs); - now = ((uint64_t)secs * 1000000) + usecs; + hfs_syncer_lock(hfsmp); - if (hfsmp->hfs_sync_scheduled == 0) { - uint64_t deadline; + if (!timerisset(&hfsmp->hfs_sync_req_oldest)) + microuptime(&hfsmp->hfs_sync_req_oldest); - hfsmp->hfs_last_sync_request_time = now; + /* If hfs_unmount is running, it will set hfs_syncer to NULL. Also we + don't want to queue again if there is a sync outstanding. */ + if (!hfsmp->hfs_syncer || hfsmp->hfs_sync_incomplete) { + hfs_syncer_unlock(hfsmp); + return; + } - clock_interval_to_deadline(HFS_META_DELAY, HFS_MILLISEC_SCALE, &deadline); + hfsmp->hfs_sync_incomplete = TRUE; - /* - * Increment hfs_sync_scheduled on the assumption that we're the - * first thread to schedule the timer. If some other thread beat - * us, then we'll decrement it. If we *were* the first to - * schedule the timer, then we need to keep track that the - * callback is waiting to complete. - */ - OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled); - if (thread_call_enter_delayed(hfsmp->hfs_syncer, deadline)) - OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled); - else - OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete); - } - } -} + thread_call_t syncer = hfsmp->hfs_syncer; + hfs_syncer_unlock(hfsmp); + + hfs_syncer_queue(syncer); +} -__private_extern__ int hfs_start_transaction(struct hfsmount *hfsmp) { @@ -2412,11 +3110,11 @@ hfs_start_transaction(struct hfsmount *hfsmp) } #endif /* HFS_CHECK_LOCK_ORDER */ - if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) { - lck_rw_lock_shared(&hfsmp->hfs_global_lock); - OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads); - unlock_on_err = 1; - } + if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) { + hfs_lock_global (hfsmp, HFS_SHARED_LOCK); + OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads); + unlock_on_err = 1; + } /* If a downgrade to read-only mount is in progress, no other * process than the downgrade process is allowed to modify @@ -2428,48 +3126,317 @@ hfs_start_transaction(struct hfsmount *hfsmp) goto out; } - if (hfsmp->jnl) { - ret = journal_start_transaction(hfsmp->jnl); - if (ret == 0) { - OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_global_lock_nesting); + if (hfsmp->jnl) { + ret = journal_start_transaction(hfsmp->jnl); + if (ret == 0) { + OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting); + } + } else { + ret = 0; } - } else { - ret = 0; - } out: - if (ret != 0 && unlock_on_err) { - lck_rw_unlock_shared(&hfsmp->hfs_global_lock); - OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads); - } + if (ret != 0 && unlock_on_err) { + hfs_unlock_global (hfsmp); + OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads); + } return ret; } -__private_extern__ int hfs_end_transaction(struct hfsmount *hfsmp) { int need_unlock=0, ret; - if ( hfsmp->jnl == NULL - || ( journal_owner(hfsmp->jnl) == current_thread() - && (OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_global_lock_nesting) == 1)) ) { - + if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread() + && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) { need_unlock = 1; } - if (hfsmp->jnl) { - ret = journal_end_transaction(hfsmp->jnl); - } else { - ret = 0; - } + if (hfsmp->jnl) { + ret = journal_end_transaction(hfsmp->jnl); + } else { + ret = 0; + } - if (need_unlock) { - OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads); - lck_rw_unlock_shared(&hfsmp->hfs_global_lock); - hfs_sync_ejectable(hfsmp); - } + if (need_unlock) { + OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads); + hfs_unlock_global (hfsmp); + hfs_sync_ejectable(hfsmp); + } return ret; } + + +/* + * Flush the contents of the journal to the disk. + * + * Input: + * wait_for_IO - + * If TRUE, wait to write in-memory journal to the disk + * consistently, and also wait to write all asynchronous + * metadata blocks to its corresponding locations + * consistently on the disk. This means that the journal + * is empty at this point and does not contain any + * transactions. This is overkill in normal scenarios + * but is useful whenever the metadata blocks are required + * to be consistent on-disk instead of just the journal + * being consistent; like before live verification + * and live volume resizing. + * + * If FALSE, only wait to write in-memory journal to the + * disk consistently. This means that the journal still + * contains uncommitted transactions and the file system + * metadata blocks in the journal transactions might be + * written asynchronously to the disk. But there is no + * guarantee that they are written to the disk before + * returning to the caller. Note that this option is + * sufficient for file system data integrity as it + * guarantees consistent journal content on the disk. + */ +int +hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO) +{ + int ret; + + /* Only peek at hfsmp->jnl while holding the global lock */ + hfs_lock_global (hfsmp, HFS_SHARED_LOCK); + if (hfsmp->jnl) { + ret = journal_flush(hfsmp->jnl, wait_for_IO); + } else { + ret = 0; + } + hfs_unlock_global (hfsmp); + + return ret; +} + + +/* + * hfs_erase_unused_nodes + * + * Check wheter a volume may suffer from unused Catalog B-tree nodes that + * are not zeroed (due to ). If so, just write + * zeroes to the unused nodes. + * + * How do we detect when a volume needs this repair? We can't always be + * certain. If a volume was created after a certain date, then it may have + * been created with the faulty newfs_hfs. Since newfs_hfs only created one + * clump, we can assume that if a Catalog B-tree is larger than its clump size, + * that means that the entire first clump must have been written to, which means + * there shouldn't be unused and unwritten nodes in that first clump, and this + * repair is not needed. + * + * We have defined a bit in the Volume Header's attributes to indicate when the + * unused nodes have been repaired. A newer newfs_hfs will set this bit. + * As will fsck_hfs when it repairs the unused nodes. + */ +int hfs_erase_unused_nodes(struct hfsmount *hfsmp) +{ + int result; + struct filefork *catalog; + int lockflags; + + if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask) + { + /* This volume has already been checked and repaired. */ + return 0; + } + + if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate)) + { + /* This volume is too old to have had the problem. */ + hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask; + return 0; + } + + catalog = hfsmp->hfs_catalog_cp->c_datafork; + if (catalog->ff_size > catalog->ff_clumpsize) + { + /* The entire first clump must have been in use at some point. */ + hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask; + return 0; + } + + /* + * If we get here, we need to zero out those unused nodes. + * + * We start a transaction and lock the catalog since we're going to be + * making on-disk changes. But note that BTZeroUnusedNodes doens't actually + * do its writing via the journal, because that would be too much I/O + * to fit in a transaction, and it's a pain to break it up into multiple + * transactions. (It behaves more like growing a B-tree would.) + */ + printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN); + result = hfs_start_transaction(hfsmp); + if (result) + goto done; + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + result = BTZeroUnusedNodes(catalog); + vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes"); + hfs_systemfile_unlock(hfsmp, lockflags); + hfs_end_transaction(hfsmp); + if (result == 0) + hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask; + printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN); + +done: + return result; +} + + +extern time_t snapshot_timestamp; + +int +check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg) +{ + int tracked_error = 0, snapshot_error = 0; + + if (vp == NULL) { + return 0; + } + + /* Swap files are special; skip them */ + if (vnode_isswap(vp)) { + return 0; + } + + if (VTOC(vp)->c_bsdflags & UF_TRACKED) { + // the file has the tracked bit set, so send an event to the tracked-file handler + int error; + + // printf("hfs: tracked-file: encountered a file with the tracked bit set! (vp %p)\n", vp); + error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_TRACK_EVENT); + if (error) { + if (error == EAGAIN) { + printf("hfs: tracked-file: timed out waiting for namespace handler...\n"); + + } else if (error == EINTR) { + // printf("hfs: tracked-file: got a signal while waiting for namespace handler...\n"); + tracked_error = EINTR; + } + } + } + + if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) { + // the change time is within this epoch + int error; + + error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg); + if (error == EDEADLK) { + snapshot_error = 0; + } else if (error) { + if (error == EAGAIN) { + printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n"); + } else if (error == EINTR) { + // printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n"); + snapshot_error = EINTR; + } + } + } + + if (tracked_error) return tracked_error; + if (snapshot_error) return snapshot_error; + + return 0; +} + +int +check_for_dataless_file(struct vnode *vp, uint64_t op_type) +{ + int error; + + if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) { + // there's nothing to do, it's not dataless + return 0; + } + + /* Swap files are special; ignore them */ + if (vnode_isswap(vp)) { + return 0; + } + + // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp); + error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT); + if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) { + error = 0; + } else if (error) { + if (error == EAGAIN) { + printf("hfs: dataless: timed out waiting for namespace handler...\n"); + // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)? + return 0; + } else if (error == EINTR) { + // printf("hfs: dataless: got a signal while waiting for namespace handler...\n"); + return EINTR; + } + } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) { + // + // if we're here, the dataless bit is still set on the file + // which means it didn't get handled. we return an error + // but it's presently ignored by all callers of this function. + // + // XXXdbg - EDATANOTPRESENT is what we really need... + // + return EBADF; + } + + return error; +} + + +// +// NOTE: this function takes care of starting a transaction and +// acquiring the systemfile lock so that it can call +// cat_update(). +// +// NOTE: do NOT hold and cnode locks while calling this function +// to avoid deadlocks (because we take a lock on the root +// cnode) +// +int +hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid) +{ + struct vnode *rvp; + struct cnode *cp; + int error; + + error = VFS_ROOT(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel()); + if (error) { + return error; + } + + cp = VTOC(rvp); + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) { + return error; + } + struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16)); + + int lockflags; + if (hfs_start_transaction(hfsmp) != 0) { + return error; + } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + + if (extinfo->document_id == 0) { + // initialize this to start at 3 (one greater than the root-dir id) + extinfo->document_id = 3; + } + + *docid = extinfo->document_id++; + + // mark the root cnode dirty + cp->c_flag |= C_MODIFIED | C_FORCEUPDATE; + (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL); + + hfs_systemfile_unlock (hfsmp, lockflags); + (void) hfs_end_transaction(hfsmp); + + (void) hfs_unlock(cp); + + vnode_put(rvp); + rvp = NULL; + + return 0; +}