X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/de355530ae67247cbd0da700edb3a2a1dae884c2..7e41aa883dd258f888d0470250eead40a53ef1f5:/bsd/hfs/hfs_vfsutils.c?ds=sidebyside diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c index 247b36129..ade6d0ca0 100644 --- a/bsd/hfs/hfs_vfsutils.c +++ b/bsd/hfs/hfs_vfsutils.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @(#)hfs_vfsutils.c 4.0 * @@ -32,11 +38,23 @@ #include #include #include -#include -#include +#include #include +#include #include #include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* for parsing boot-args */ +#include #include "hfs.h" #include "hfs_catalog.h" @@ -44,26 +62,24 @@ #include "hfs_mount.h" #include "hfs_endian.h" #include "hfs_cnode.h" +#include "hfs_fsctl.h" +#include "hfs_cprotect.h" #include "hfscommon/headers/FileMgrInternal.h" #include "hfscommon/headers/BTreesInternal.h" #include "hfscommon/headers/HFSUnicodeWrappers.h" - -extern int count_lock_queue __P((void)); -extern uid_t console_user; - +/* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */ +extern int hfs_resize_debug; static void ReleaseMetaFileVNode(struct vnode *vp); static int hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args); -u_int32_t GetLogicalBlockSize(struct vnode *vp); +static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *); +static void hfs_thaw_locked(struct hfsmount *hfsmp); + +#define HFS_MOUNT_DEBUG 1 -/* BTree accessor routines */ -extern OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions options, BlockDescriptor *block); -extern OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, ItemCount minBlockCount); -extern OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF); -extern OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options); //******************************************************************************* // Note: Finder information in the HFS/HFS+ metadata are considered opaque and @@ -76,15 +92,15 @@ extern OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, Relea // // //******************************************************************************* -char hfs_catname[] = "Catalog B-tree"; -char hfs_extname[] = "Extents B-tree"; -char hfs_vbmname[] = "Volume Bitmap"; - -char hfs_privdirname[] = - "\xE2\x90\x80\xE2\x90\x80\xE2\x90\x80\xE2\x90\x80HFS+ Private Data"; +unsigned char hfs_catname[] = "Catalog B-tree"; +unsigned char hfs_extname[] = "Extents B-tree"; +unsigned char hfs_vbmname[] = "Volume Bitmap"; +unsigned char hfs_attrname[] = "Attribute B-tree"; +unsigned char hfs_startupname[] = "Startup File"; +#if CONFIG_HFS_STD OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, - struct proc *p) + __unused struct proc *p) { ExtendedVCB *vcb = HFSTOVCB(hfsmp); int error; @@ -92,6 +108,7 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, struct cat_desc cndesc; struct cat_attr cnattr; struct cat_fork fork; + int newvnode_flags = 0; /* Block size must be a multiple of 512 */ if (SWAP_BE32(mdb->drAlBlkSiz) == 0 || @@ -99,16 +116,18 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, return (EINVAL); /* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */ - if ((hfsmp->hfs_fs_ronly == 0) && ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) + if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) && + ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) { return (EINVAL); - + } + hfsmp->hfs_flags |= HFS_STANDARD; /* * The MDB seems OK: transfer info from it into VCB * Note - the VCB starts out clear (all zeros) * */ vcb->vcbSigWord = SWAP_BE16 (mdb->drSigWord); - vcb->vcbCrDate = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate))); + vcb->hfs_itime = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate))); vcb->localCreateDate = SWAP_BE32 (mdb->drCrDate); vcb->vcbLsMod = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod))); vcb->vcbAtrb = SWAP_BE16 (mdb->drAtrb); @@ -116,6 +135,7 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, vcb->vcbVBMSt = SWAP_BE16 (mdb->drVBMSt); vcb->nextAllocation = SWAP_BE16 (mdb->drAllocPtr); vcb->totalBlocks = SWAP_BE16 (mdb->drNmAlBlks); + vcb->allocLimit = vcb->totalBlocks; vcb->blockSize = SWAP_BE32 (mdb->drAlBlkSiz); vcb->vcbClpSiz = SWAP_BE32 (mdb->drClpSiz); vcb->vcbAlBlSt = SWAP_BE16 (mdb->drAlBlSt); @@ -127,7 +147,7 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, vcb->vcbFilCnt = SWAP_BE32 (mdb->drFilCnt); vcb->vcbDirCnt = SWAP_BE32 (mdb->drDirCnt); bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo)); - if (!hfsmp->hfs_fs_ronly) + if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) vcb->vcbWrCnt++; /* Compensate for write of MDB on last flush */ /* convert hfs encoded name into UTF-8 string */ @@ -136,18 +156,29 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, * When an HFS name cannot be encoded with the current * volume encoding we use MacRoman as a fallback. */ - if (error || (utf8chars == 0)) - (void) mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN); + if (error || (utf8chars == 0)) { + error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN); + /* If we fail to encode to UTF8 from Mac Roman, the name is bad. Deny the mount */ + if (error) { + goto MtVolErr; + } + } - hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_phys_block_size); + hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size); vcb->vcbVBMIOSize = kHFSBlockSize; - VCB_LOCK_INIT(vcb); + /* Generate the partition-based AVH location */ + hfsmp->hfs_partition_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, + hfsmp->hfs_logical_block_count); + + /* HFS standard is read-only, so just stuff the FS location in here, too */ + hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector; bzero(&cndesc, sizeof(cndesc)); - cndesc.cd_parentcnid = kRootParID; + cndesc.cd_parentcnid = kHFSRootParentID; + cndesc.cd_flags |= CD_ISMETA; bzero(&cnattr, sizeof(cnattr)); - cnattr.ca_nlink = 1; + cnattr.ca_linkcount = 1; cnattr.ca_mode = S_IFREG; bzero(&fork, sizeof(fork)); @@ -155,11 +186,12 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, * Set up Extents B-tree vnode */ cndesc.cd_nameptr = hfs_extname; - cndesc.cd_namelen = strlen(hfs_extname); + cndesc.cd_namelen = strlen((char *)hfs_extname); cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID; fork.cf_size = SWAP_BE32(mdb->drXTFlSize); fork.cf_blocks = fork.cf_size / vcb->blockSize; fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz); + fork.cf_vblocks = 0; fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock); fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount); fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock); @@ -168,27 +200,35 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount); cnattr.ca_blocks = fork.cf_blocks; - error = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, &fork, - &vcb->extentsRefNum); - if (error) goto MtVolErr; - error = MacToVFSError(BTOpenPath(VTOF(vcb->extentsRefNum), - (KeyCompareProcPtr)CompareExtentKeys, - GetBTreeBlock, ReleaseBTreeBlock, - ExtendBTreeFile, SetBTreeBlockSize)); + error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork, + &hfsmp->hfs_extents_vp, &newvnode_flags); + if (error) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error); + } + goto MtVolErr; + } + error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp), + (KeyCompareProcPtr)CompareExtentKeys)); if (error) { - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error); + } + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto MtVolErr; } + hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp); /* * Set up Catalog B-tree vnode... */ cndesc.cd_nameptr = hfs_catname; - cndesc.cd_namelen = strlen(hfs_catname); + cndesc.cd_namelen = strlen((char *)hfs_catname); cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID; fork.cf_size = SWAP_BE32(mdb->drCTFlSize); fork.cf_blocks = fork.cf_size / vcb->blockSize; fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz); + fork.cf_vblocks = 0; fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock); fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount); fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock); @@ -197,49 +237,142 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount); cnattr.ca_blocks = fork.cf_blocks; - error = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, &fork, - &vcb->catalogRefNum); + error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork, + &hfsmp->hfs_catalog_vp, &newvnode_flags); + if (error) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error); + } + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); + goto MtVolErr; + } + error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp), + (KeyCompareProcPtr)CompareCatalogKeys)); if (error) { - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error); + } + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto MtVolErr; } - error = MacToVFSError(BTOpenPath(VTOF(vcb->catalogRefNum), - (KeyCompareProcPtr)CompareCatalogKeys, - GetBTreeBlock, ReleaseBTreeBlock, - ExtendBTreeFile, SetBTreeBlockSize)); + hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp); + + /* + * Set up dummy Allocation file vnode (used only for locking bitmap) + */ + cndesc.cd_nameptr = hfs_vbmname; + cndesc.cd_namelen = strlen((char *)hfs_vbmname); + cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID; + bzero(&fork, sizeof(fork)); + cnattr.ca_blocks = 0; + + error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork, + &hfsmp->hfs_allocation_vp, &newvnode_flags); if (error) { - VOP_UNLOCK(vcb->catalogRefNum, 0, p); - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error); + } + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto MtVolErr; } + hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp); /* mark the volume dirty (clear clean unmount bit) */ vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask; + if (error == noErr) { + error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL); + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error); + } + } + + if (error == noErr) { + /* If the disk isn't write protected.. */ + if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) { + MarkVCBDirty (vcb); // mark VCB dirty so it will be written + } + } + /* - * all done with b-trees so we can unlock now... + * all done with system files so we can unlock now... */ - VOP_UNLOCK(vcb->catalogRefNum, 0, p); - VOP_UNLOCK(vcb->extentsRefNum, 0, p); - - if ( error == noErr ) - { - if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected - { - MarkVCBDirty( vcb ); // mark VCB dirty so it will be written - } - } - goto CmdDone; + hfs_unlock(VTOC(hfsmp->hfs_allocation_vp)); + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); + + if (error == noErr) { + /* If successful, then we can just return once we've unlocked the cnodes */ + return error; + } //-- Release any resources allocated so far before exiting with an error: MtVolErr: - ReleaseMetaFileVNode(vcb->catalogRefNum); - ReleaseMetaFileVNode(vcb->extentsRefNum); + hfsUnmount(hfsmp, NULL); -CmdDone: return (error); } +#endif + +//******************************************************************************* +// +// Sanity check Volume Header Block: +// Input argument *vhp is a pointer to a HFSPlusVolumeHeader block that has +// not been endian-swapped and represents the on-disk contents of this sector. +// This routine will not change the endianness of vhp block. +// +//******************************************************************************* +OSErr hfs_ValidateHFSPlusVolumeHeader(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp) +{ + u_int16_t signature; + u_int16_t hfs_version; + u_int32_t blockSize; + + signature = SWAP_BE16(vhp->signature); + hfs_version = SWAP_BE16(vhp->version); + + if (signature == kHFSPlusSigWord) { + if (hfs_version != kHFSPlusVersion) { + printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFS+ version: %x\n", hfs_version); + return (EINVAL); + } + } else if (signature == kHFSXSigWord) { + if (hfs_version != kHFSXVersion) { + printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFSX version: %x\n", hfs_version); + return (EINVAL); + } + } else { + /* Removed printf for invalid HFS+ signature because it gives + * false error for UFS root volume + */ + if (HFS_MOUNT_DEBUG) { + printf("hfs_ValidateHFSPlusVolumeHeader: unknown Volume Signature : %x\n", signature); + } + return (EINVAL); + } + + /* Block size must be at least 512 and a power of 2 */ + blockSize = SWAP_BE32(vhp->blockSize); + if (blockSize < 512 || !powerof2(blockSize)) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_ValidateHFSPlusVolumeHeader: invalid blocksize (%d) \n", blockSize); + } + return (EINVAL); + } + + if (blockSize < hfsmp->hfs_logical_block_size) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_ValidateHFSPlusVolumeHeader: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n", + blockSize, hfsmp->hfs_logical_block_size); + } + return (EINVAL); + } + return 0; +} + //******************************************************************************* // Routine: hfs_MountHFSPlusVolume // @@ -247,55 +380,91 @@ CmdDone: //******************************************************************************* OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, - off_t embeddedOffset, u_int64_t disksize, struct proc *p, void *args) + off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred) { register ExtendedVCB *vcb; struct cat_desc cndesc; struct cat_attr cnattr; - UInt32 blockSize; + struct cat_fork cfork; + u_int32_t blockSize; + daddr64_t spare_sectors; + struct BTreeInfoRec btinfo; + u_int16_t signature; + u_int16_t hfs_version; + int newvnode_flags = 0; + int i; OSErr retval; + char converted_volname[256]; + size_t volname_length = 0; + size_t conv_volname_length = 0; - // XXXdbg - added the kHFSJSigWord case - if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord && - SWAP_BE16(vhp->signature) != kHFSJSigWord) || - SWAP_BE16(vhp->version) != kHFSPlusVersion) { - // XXXdbg - printf("hfs: mount: sig 0x%x and version 0x%x are not HFS or HFS+.\n", - vhp->signature, vhp->version); - return (EINVAL); + signature = SWAP_BE16(vhp->signature); + hfs_version = SWAP_BE16(vhp->version); + + retval = hfs_ValidateHFSPlusVolumeHeader(hfsmp, vhp); + if (retval) + return retval; + + if (signature == kHFSXSigWord) { + /* The in-memory signature is always 'H+'. */ + signature = kHFSPlusSigWord; + hfsmp->hfs_flags |= HFS_X; } - /* Block size must be at least 512 and a power of 2 */ blockSize = SWAP_BE32(vhp->blockSize); - if (blockSize < 512 || (blockSize & (blockSize-1)) != 0) - return (EINVAL); - /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */ - if (hfsmp->hfs_fs_ronly == 0 && hfsmp->jnl == NULL && (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) + if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL && + (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n"); + } return (EINVAL); + } /* Make sure we can live with the physical block size. */ - if ((disksize & (hfsmp->hfs_phys_block_size - 1)) || - (embeddedOffset & (hfsmp->hfs_phys_block_size - 1)) || - (SWAP_BE32(vhp->blockSize) < hfsmp->hfs_phys_block_size)) { + if ((disksize & (hfsmp->hfs_logical_block_size - 1)) || + (embeddedOffset & (hfsmp->hfs_logical_block_size - 1))) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_logical_blocksize (%d) \n", + hfsmp->hfs_logical_block_size); + } return (ENXIO); } + + /* + * If allocation block size is less than the physical block size, + * same data could be cached in two places and leads to corruption. + * + * HFS Plus reserves one allocation block for the Volume Header. + * If the physical size is larger, then when we read the volume header, + * we will also end up reading in the next allocation block(s). + * If those other allocation block(s) is/are modified, and then the volume + * header is modified, the write of the volume header's buffer will write + * out the old contents of the other allocation blocks. + * + * We assume that the physical block size is same as logical block size. + * The physical block size value is used to round down the offsets for + * reading and writing the primary and alternate volume headers. + * + * The same logic to ensure good hfs_physical_block_size is also in + * hfs_mountfs so that hfs_mountfs, hfs_MountHFSPlusVolume and + * later are doing the I/Os using same block size. + */ + if (blockSize < hfsmp->hfs_physical_block_size) { + hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size; + hfsmp->hfs_log_per_phys = 1; + } + /* * The VolumeHeader seems OK: transfer info from it into VCB * Note - the VCB starts out clear (all zeros) */ vcb = HFSTOVCB(hfsmp); - vcb->vcbSigWord = SWAP_BE16(vhp->signature); - - // XXXdbg - remap this in case we've mounted a dirty journaled volume - if (vcb->vcbSigWord == kHFSJSigWord) { - vcb->vcbSigWord = kHFSPlusSigWord; - } - + vcb->vcbSigWord = signature; vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock); vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate)); - vcb->vcbAtrb = (UInt16)SWAP_BE32(vhp->attributes); + vcb->vcbAtrb = SWAP_BE32(vhp->attributes); vcb->vcbClpSiz = SWAP_BE32(vhp->rsrcClumpSize); vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID); vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate)); @@ -307,16 +476,15 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo)); vcb->vcbAlBlSt = 0; /* hfs+ allocation blocks start at first block of volume */ - if (!hfsmp->hfs_fs_ronly) + if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) vcb->vcbWrCnt++; /* compensate for write of Volume Header on last flush */ - VCB_LOCK_INIT(vcb); - /* Now fill in the Extended VCB info */ vcb->nextAllocation = SWAP_BE32(vhp->nextAllocation); vcb->totalBlocks = SWAP_BE32(vhp->totalBlocks); + vcb->allocLimit = vcb->totalBlocks; vcb->freeBlocks = SWAP_BE32(vhp->freeBlocks); - vcb->blockSize = SWAP_BE32(vhp->blockSize); + vcb->blockSize = blockSize; vcb->encodingsBitmap = SWAP_BE64(vhp->encodingsBitmap); vcb->localCreateDate = SWAP_BE32(vhp->createDate); @@ -330,124 +498,348 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, * (currently set up from the wrapper MDB) using the * new blocksize value: */ - hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_phys_block_size); + hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size); vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO); + /* + * Validate and initialize the location of the alternate volume header. + * + * Note that there may be spare sectors beyond the end of the filesystem that still + * belong to our partition. + */ + + spare_sectors = hfsmp->hfs_logical_block_count - + (((daddr64_t)vcb->totalBlocks * blockSize) / + hfsmp->hfs_logical_block_size); + + /* + * Differentiate between "innocuous" spare sectors and the more unusual + * degenerate case: + * + * *** Innocuous spare sectors exist if: + * + * A) the number of bytes assigned to the partition (by multiplying logical + * block size * logical block count) is greater than the filesystem size + * (by multiplying allocation block count and allocation block size) + * + * and + * + * B) the remainder is less than the size of a full allocation block's worth of bytes. + * + * This handles the normal case where there may be a few extra sectors, but the two + * are fundamentally in sync. + * + * *** Degenerate spare sectors exist if: + * A) The number of bytes assigned to the partition (by multiplying logical + * block size * logical block count) is greater than the filesystem size + * (by multiplying allocation block count and block size). + * + * and + * + * B) the remainder is greater than a full allocation's block worth of bytes. + * In this case, a smaller file system exists in a larger partition. + * This can happen in various ways, including when volume is resized but the + * partition is yet to be resized. Under this condition, we have to assume that + * a partition management software may resize the partition to match + * the file system size in the future. Therefore we should update + * alternate volume header at two locations on the disk, + * a. 1024 bytes before end of the partition + * b. 1024 bytes before end of the file system + */ + + if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) { + /* + * Handle the degenerate case above. FS < partition size. + * AVH located at 1024 bytes from the end of the partition + */ + hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + + HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count); + + /* AVH located at 1024 bytes from the end of the filesystem */ + hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + + HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, + (((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size)); + } + else { + /* Innocuous spare sectors; Partition & FS notion are in sync */ + hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + + HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count); + + hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector; + } + if (hfs_resize_debug) { + printf ("hfs_MountHFSPlusVolume: partition_avh_sector=%qu, fs_avh_sector=%qu\n", + hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector); + } + bzero(&cndesc, sizeof(cndesc)); - cndesc.cd_parentcnid = kRootParID; + cndesc.cd_parentcnid = kHFSRootParentID; + cndesc.cd_flags |= CD_ISMETA; bzero(&cnattr, sizeof(cnattr)); - cnattr.ca_nlink = 1; + cnattr.ca_linkcount = 1; cnattr.ca_mode = S_IFREG; /* * Set up Extents B-tree vnode */ cndesc.cd_nameptr = hfs_extname; - cndesc.cd_namelen = strlen(hfs_extname); + cndesc.cd_namelen = strlen((char *)hfs_extname); cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID; - SWAP_HFS_PLUS_FORK_DATA (&vhp->extentsFile); - cnattr.ca_blocks = vhp->extentsFile.totalBlocks; + cfork.cf_size = SWAP_BE64 (vhp->extentsFile.logicalSize); + cfork.cf_new_size= 0; + cfork.cf_clump = SWAP_BE32 (vhp->extentsFile.clumpSize); + cfork.cf_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks); + cfork.cf_vblocks = 0; + cnattr.ca_blocks = cfork.cf_blocks; + for (i = 0; i < kHFSPlusExtentDensity; i++) { + cfork.cf_extents[i].startBlock = + SWAP_BE32 (vhp->extentsFile.extents[i].startBlock); + cfork.cf_extents[i].blockCount = + SWAP_BE32 (vhp->extentsFile.extents[i].blockCount); + } + retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, + &hfsmp->hfs_extents_vp, &newvnode_flags); + if (retval) + { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval); + } + goto ErrorExit; + } - retval = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, - (struct cat_fork *)&vhp->extentsFile, - &vcb->extentsRefNum); - SWAP_HFS_PLUS_FORK_DATA (&vhp->extentsFile); + hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp); + hfs_unlock(hfsmp->hfs_extents_cp); - if (retval) goto ErrorExit; - retval = MacToVFSError(BTOpenPath(VTOF(vcb->extentsRefNum), - (KeyCompareProcPtr) CompareExtentKeysPlus, - GetBTreeBlock, ReleaseBTreeBlock, - ExtendBTreeFile, SetBTreeBlockSize)); - if (retval) { - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp), + (KeyCompareProcPtr) CompareExtentKeysPlus)); + if (retval) + { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval); + } goto ErrorExit; } - /* * Set up Catalog B-tree vnode */ cndesc.cd_nameptr = hfs_catname; - cndesc.cd_namelen = strlen(hfs_catname); + cndesc.cd_namelen = strlen((char *)hfs_catname); cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID; - SWAP_HFS_PLUS_FORK_DATA(&vhp->catalogFile); - cnattr.ca_blocks = vhp->catalogFile.totalBlocks; - - retval = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, - (struct cat_fork *)&vhp->catalogFile, - &vcb->catalogRefNum); - SWAP_HFS_PLUS_FORK_DATA(&vhp->catalogFile); + cfork.cf_size = SWAP_BE64 (vhp->catalogFile.logicalSize); + cfork.cf_clump = SWAP_BE32 (vhp->catalogFile.clumpSize); + cfork.cf_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks); + cfork.cf_vblocks = 0; + cnattr.ca_blocks = cfork.cf_blocks; + for (i = 0; i < kHFSPlusExtentDensity; i++) { + cfork.cf_extents[i].startBlock = + SWAP_BE32 (vhp->catalogFile.extents[i].startBlock); + cfork.cf_extents[i].blockCount = + SWAP_BE32 (vhp->catalogFile.extents[i].blockCount); + } + retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, + &hfsmp->hfs_catalog_vp, &newvnode_flags); if (retval) { - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval); + } goto ErrorExit; } - retval = MacToVFSError(BTOpenPath(VTOF(vcb->catalogRefNum), - (KeyCompareProcPtr) CompareExtendedCatalogKeys, - GetBTreeBlock, ReleaseBTreeBlock, - ExtendBTreeFile, SetBTreeBlockSize)); + hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp); + hfs_unlock(hfsmp->hfs_catalog_cp); + + retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp), + (KeyCompareProcPtr) CompareExtendedCatalogKeys)); if (retval) { - VOP_UNLOCK(vcb->catalogRefNum, 0, p); - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval); + } goto ErrorExit; } + if ((hfsmp->hfs_flags & HFS_X) && + BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) { + if (btinfo.keyCompareType == kHFSBinaryCompare) { + hfsmp->hfs_flags |= HFS_CASE_SENSITIVE; + /* Install a case-sensitive key compare */ + (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp), + (KeyCompareProcPtr)cat_binarykeycompare); + } + } /* * Set up Allocation file vnode */ cndesc.cd_nameptr = hfs_vbmname; - cndesc.cd_namelen = strlen(hfs_vbmname); + cndesc.cd_namelen = strlen((char *)hfs_vbmname); cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID; - SWAP_HFS_PLUS_FORK_DATA(&vhp->allocationFile); - cnattr.ca_blocks = vhp->allocationFile.totalBlocks; - - retval = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, - (struct cat_fork *)&vhp->allocationFile, - &vcb->allocationsRefNum); - SWAP_HFS_PLUS_FORK_DATA(&vhp->allocationFile); + cfork.cf_size = SWAP_BE64 (vhp->allocationFile.logicalSize); + cfork.cf_clump = SWAP_BE32 (vhp->allocationFile.clumpSize); + cfork.cf_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks); + cfork.cf_vblocks = 0; + cnattr.ca_blocks = cfork.cf_blocks; + for (i = 0; i < kHFSPlusExtentDensity; i++) { + cfork.cf_extents[i].startBlock = + SWAP_BE32 (vhp->allocationFile.extents[i].startBlock); + cfork.cf_extents[i].blockCount = + SWAP_BE32 (vhp->allocationFile.extents[i].blockCount); + } + retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, + &hfsmp->hfs_allocation_vp, &newvnode_flags); if (retval) { - VOP_UNLOCK(vcb->catalogRefNum, 0, p); - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval); + } goto ErrorExit; } + hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp); + hfs_unlock(hfsmp->hfs_allocation_cp); + + /* + * Set up Attribute B-tree vnode + */ + if (vhp->attributesFile.totalBlocks != 0) { + cndesc.cd_nameptr = hfs_attrname; + cndesc.cd_namelen = strlen((char *)hfs_attrname); + cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID; + + cfork.cf_size = SWAP_BE64 (vhp->attributesFile.logicalSize); + cfork.cf_clump = SWAP_BE32 (vhp->attributesFile.clumpSize); + cfork.cf_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks); + cfork.cf_vblocks = 0; + cnattr.ca_blocks = cfork.cf_blocks; + for (i = 0; i < kHFSPlusExtentDensity; i++) { + cfork.cf_extents[i].startBlock = + SWAP_BE32 (vhp->attributesFile.extents[i].startBlock); + cfork.cf_extents[i].blockCount = + SWAP_BE32 (vhp->attributesFile.extents[i].blockCount); + } + retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, + &hfsmp->hfs_attribute_vp, &newvnode_flags); + if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval); + } + goto ErrorExit; + } + hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp); + hfs_unlock(hfsmp->hfs_attribute_cp); + retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp), + (KeyCompareProcPtr) hfs_attrkeycompare)); + if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval); + } + goto ErrorExit; + } + + /* Initialize vnode for virtual attribute data file that spans the + * entire file system space for performing I/O to attribute btree + * We hold iocount on the attrdata vnode for the entire duration + * of mount (similar to btree vnodes) + */ + retval = init_attrdata_vnode(hfsmp); + if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval); + } + goto ErrorExit; + } + } - /* Pick up volume name and create date */ - retval = cat_idlookup(hfsmp, kHFSRootFolderID, &cndesc, &cnattr, NULL); + /* + * Set up Startup file vnode + */ + if (vhp->startupFile.totalBlocks != 0) { + cndesc.cd_nameptr = hfs_startupname; + cndesc.cd_namelen = strlen((char *)hfs_startupname); + cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID; + + cfork.cf_size = SWAP_BE64 (vhp->startupFile.logicalSize); + cfork.cf_clump = SWAP_BE32 (vhp->startupFile.clumpSize); + cfork.cf_blocks = SWAP_BE32 (vhp->startupFile.totalBlocks); + cfork.cf_vblocks = 0; + cnattr.ca_blocks = cfork.cf_blocks; + for (i = 0; i < kHFSPlusExtentDensity; i++) { + cfork.cf_extents[i].startBlock = + SWAP_BE32 (vhp->startupFile.extents[i].startBlock); + cfork.cf_extents[i].blockCount = + SWAP_BE32 (vhp->startupFile.extents[i].blockCount); + } + retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, + &hfsmp->hfs_startup_vp, &newvnode_flags); + if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval); + } + goto ErrorExit; + } + hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp); + hfs_unlock(hfsmp->hfs_startup_cp); + } + + /* + * Pick up volume name and create date + * + * Acquiring the volume name should not manipulate the bitmap, only the catalog + * btree and possibly the extents overflow b-tree. + */ + retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL); if (retval) { - VOP_UNLOCK(vcb->allocationsRefNum, 0, p); - VOP_UNLOCK(vcb->catalogRefNum, 0, p); - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval); + } goto ErrorExit; } - vcb->vcbCrDate = cnattr.ca_itime; + vcb->hfs_itime = cnattr.ca_itime; vcb->volumeNameEncodingHint = cndesc.cd_encoding; bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen)); + volname_length = strlen ((const char*)vcb->vcbVN); cat_releasedesc(&cndesc); + + /* Send the volume name down to CoreStorage if necessary */ + retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED); + if (retval == 0) { + (void) VNOP_IOCTL (hfsmp->hfs_devvp, _DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current()); + } + + /* reset retval == 0. we don't care about errors in volname conversion */ + retval = 0; - /* mark the volume dirty (clear clean unmount bit) */ - vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask; - if (hfsmp->jnl && hfsmp->hfs_fs_ronly == 0) { - hfs_flushvolumeheader(hfsmp, TRUE, TRUE); + + /* + * We now always initiate a full bitmap scan even if the volume is read-only because this is + * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily + * expects. TRIMs will not be delivered to the underlying media if the volume is not + * read-write though. + */ + thread_t allocator_scanner; + hfsmp->scan_var = 0; + + /* Take the HFS mount mutex and wait on scan_var */ + hfs_lock_mount (hfsmp); + + kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner); + /* Wait until it registers that it's got the appropriate locks (or that it is finished) */ + while ((hfsmp->scan_var & (HFS_ALLOCATOR_SCAN_INFLIGHT|HFS_ALLOCATOR_SCAN_COMPLETED)) == 0) { + msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_scan_blocks", 0); } - /* - * all done with metadata files so we can unlock now... - */ - VOP_UNLOCK(vcb->allocationsRefNum, 0, p); - VOP_UNLOCK(vcb->catalogRefNum, 0, p); - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + hfs_unlock_mount(hfsmp); - /* setup private/hidden directory for unlinked files */ - hfsmp->hfs_private_metadata_dir = FindMetaDataDirectory(vcb); - if (hfsmp->jnl && (hfsmp->hfs_fs_ronly == 0)) - hfs_remove_orphans(hfsmp); + thread_deallocate (allocator_scanner); - if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected - { - MarkVCBDirty( vcb ); // mark VCB dirty so it will be written + /* mark the volume dirty (clear clean unmount bit) */ + vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask; + if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) { + hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT); } + /* kHFSHasFolderCount is only supported/updated on HFSX volumes */ + if ((hfsmp->hfs_flags & HFS_X) != 0) { + hfsmp->hfs_flags |= HFS_FOLDERCOUNT; + } // // Check if we need to do late journal initialization. This only @@ -461,14 +853,68 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, retval = hfs_late_journal_init(hfsmp, vhp, args); if (retval != 0) { + if (retval == EROFS) { + // EROFS is a special error code that means the volume has an external + // journal which we couldn't find. in that case we do not want to + // rewrite the volume header - we'll just refuse to mount the volume. + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval); + } + retval = EINVAL; + goto ErrorExit; + } + hfsmp->jnl = NULL; + + // if the journal failed to open, then set the lastMountedVersion + // to be "FSK!" which fsck_hfs will see and force the fsck instead + // of just bailing out because the volume is journaled. + if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) { + HFSPlusVolumeHeader *jvhp; + daddr64_t mdb_offset; + struct buf *bp = NULL; + + hfsmp->hfs_flags |= HFS_NEED_JNL_RESET; + + mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize)); + + bp = NULL; + retval = (int)buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, cred, &bp); + if (retval == 0) { + jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size)); + + if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) { + printf ("hfs(3): Journal replay fail. Writing lastMountVersion as FSK!\n"); + jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion); + buf_bwrite(bp); + } else { + buf_brelse(bp); + } + bp = NULL; + } else if (bp) { + buf_brelse(bp); + // clear this so the error exit path won't try to use it + bp = NULL; + } + } + + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval); + } + retval = EINVAL; goto ErrorExit; } else if (hfsmp->jnl) { - hfsmp->hfs_mp->mnt_flag |= MNT_JOURNALED; + vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); } - } else if (hfsmp->jnl) { + } else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) { struct cat_attr jinfo_attr, jnl_attr; + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + vcb->vcbAtrb &= ~kHFSVolumeJournaledMask; + } + // if we're here we need to fill in the fileid's for the // journal and journal_info_block. hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL); @@ -477,156 +923,723 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n"); printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid); } - } + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + vcb->vcbAtrb |= kHFSVolumeJournaledMask; + } - return (0); + if (hfsmp->jnl == NULL) { + vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); + } + } -ErrorExit: + if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected + { + MarkVCBDirty( vcb ); // mark VCB dirty so it will be written + } + + if (hfsmp->hfs_flags & HFS_CS_METADATA_PIN) { + hfs_pin_fs_metadata(hfsmp); + } /* - * A fatal error occured and the volume cannot be mounted - * release any resources that we aquired... + * Distinguish 3 potential cases involving content protection: + * 1. mount point bit set; vcbAtrb does not support it. Fail. + * 2. mount point bit set; vcbattrb supports it. we're good. + * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good. */ + if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) { + /* Does the mount point support it ? */ + if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) { + /* Case 1 above */ + retval = EINVAL; + goto ErrorExit; + } + } + else { + /* not requested in the mount point. Is it in FS? */ + if (vcb->vcbAtrb & kHFSContentProtectionMask) { + /* Case 3 above */ + vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT); + } + } - InvalidateCatalogCache(vcb); - ReleaseMetaFileVNode(vcb->allocationsRefNum); - ReleaseMetaFileVNode(vcb->catalogRefNum); - ReleaseMetaFileVNode(vcb->extentsRefNum); - - return (retval); -} + /* At this point, if the mount point flag is set, we can enable it. */ + if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) { + /* Cases 2+3 above */ +#if CONFIG_PROTECT + /* Get the EAs as needed. */ + int cperr = 0; + struct cp_root_xattr *xattr = NULL; + MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK); + + /* go get the EA to get the version information */ + cperr = cp_getrootxattr (hfsmp, xattr); + /* + * If there was no EA there, then write one out. + * Assuming EA is not present on the root means + * this is an erase install or a very old FS + */ + if (cperr == 0) { + /* Have to run a valid CP version. */ + if (!cp_is_supported_version(xattr->major_version)) { + cperr = EINVAL; + } + } + else if (cperr == ENOATTR) { + printf("No root EA set, creating new EA with new version: %d\n", CP_CURRENT_VERS); + bzero(xattr, sizeof(struct cp_root_xattr)); + xattr->major_version = CP_CURRENT_VERS; + xattr->minor_version = CP_MINOR_VERS; + cperr = cp_setrootxattr (hfsmp, xattr); + } -/* - * ReleaseMetaFileVNode - * - * vp L - - - */ -static void ReleaseMetaFileVNode(struct vnode *vp) -{ - struct filefork *fp; + if (cperr) { + FREE(xattr, M_TEMP); + retval = EPERM; + goto ErrorExit; + } - if (vp && (fp = VTOF(vp))) { - if (fp->fcbBTCBPtr != NULL) - (void) BTClosePath(fp); + /* If we got here, then the CP version is valid. Set it in the mount point */ + hfsmp->hfs_running_cp_major_vers = xattr->major_version; + printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version); + hfsmp->cproot_flags = xattr->flags; + hfsmp->cp_crypto_generation = ISSET(xattr->flags, CP_ROOT_CRYPTOG1) ? 1 : 0; - /* release the node even if BTClosePath fails */ - vrele(vp); - vgone(vp); - } -} + FREE(xattr, M_TEMP); + /* + * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree. + * Ensure that the boot-arg's value is valid for FILES (not directories), + * since only files are actually protected for now. + */ -/************************************************************* -* -* Unmounts a hfs volume. -* At this point vflush() has been called (to dump all non-metadata files) -* -*************************************************************/ + PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class)); -short hfsUnmount( register struct hfsmount *hfsmp, struct proc *p) -{ - ExtendedVCB *vcb = HFSTOVCB(hfsmp); - int retval = E_NONE; + if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) { + PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class)); + } - InvalidateCatalogCache( vcb ); +#if HFS_TMPDBG +#if !SECURE_KERNEL + PE_parse_boot_argn("aks_verbose", &hfsmp->hfs_cp_verbose, sizeof(hfsmp->hfs_cp_verbose)); +#endif +#endif - if (vcb->vcbSigWord == kHFSPlusSigWord) - ReleaseMetaFileVNode(vcb->allocationsRefNum); + if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) { + hfsmp->default_cp_class = PROTECTION_CLASS_C; + } - ReleaseMetaFileVNode(vcb->catalogRefNum); - ReleaseMetaFileVNode(vcb->extentsRefNum); +#else + /* If CONFIG_PROTECT not built, ignore CP */ + vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT); +#endif + } - return (retval); -} + /* + * Establish a metadata allocation zone. + */ + hfs_metadatazone_init(hfsmp, false); + /* + * Make any metadata zone adjustments. + */ + if (hfsmp->hfs_flags & HFS_METADATA_ZONE) { + /* Keep the roving allocator out of the metadata zone. */ + if (vcb->nextAllocation >= hfsmp->hfs_metazone_start && + vcb->nextAllocation <= hfsmp->hfs_metazone_end) { + HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1); + } + } else { + if (vcb->nextAllocation <= 1) { + vcb->nextAllocation = hfsmp->hfs_min_alloc_start; + } + } + vcb->sparseAllocation = hfsmp->hfs_min_alloc_start; -/* - * Some 3rd party kexts link against hfs_getcatalog so keep a stub for now. - */ -short -hfs_getcatalog(void *p1, u_long p2, void *p3, short p4, void *p5) -{ - return ENOENT; -} + /* Setup private/hidden directories for hardlinks. */ + hfs_privatedir_init(hfsmp, FILE_HARDLINKS); + hfs_privatedir_init(hfsmp, DIR_HARDLINKS); + if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) + hfs_remove_orphans(hfsmp); -int overflow_extents(struct filefork *fp) -{ - u_long blocks; + /* See if we need to erase unused Catalog nodes due to . */ + if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) + { + retval = hfs_erase_unused_nodes(hfsmp); + if (retval) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN); + } - if (VTOVCB(FTOV(fp))->vcbSigWord == kHFSPlusSigWord) { - if (fp->ff_extents[7].blockCount == 0) - return (0); + goto ErrorExit; + } + } + + /* + * Allow hot file clustering if conditions allow. + */ + if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && !(hfsmp->hfs_flags & HFS_READ_ONLY) && + ((hfsmp->hfs_flags & HFS_SSD) == 0 || (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN))) { + // + // Wait until the bitmap scan completes before we initializes the + // hotfile area so that we do not run into any issues with the + // bitmap being read while hotfiles is initializing itself. On + // some older/slower machines, without this interlock, the bitmap + // would sometimes get corrupted at boot time. + // + hfs_lock_mount(hfsmp); + while(!(hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED)) { + (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_hotfile_bitmap_interlock", 0); + } + hfs_unlock_mount(hfsmp); + + /* + * Note: at this point we are not allowed to fail the + * mount operation because the HotFile init code + * in hfs_recording_init() will lookup vnodes with + * VNOP_LOOKUP() which hangs vnodes off the mount + * (and if we were to fail, VFS is not prepared to + * clean that up at this point. Since HotFiles are + * optional, this is not a big deal. + */ + (void) hfs_recording_init(hfsmp); + } - blocks = fp->ff_extents[0].blockCount + - fp->ff_extents[1].blockCount + - fp->ff_extents[2].blockCount + - fp->ff_extents[3].blockCount + - fp->ff_extents[4].blockCount + - fp->ff_extents[5].blockCount + - fp->ff_extents[6].blockCount + - fp->ff_extents[7].blockCount; - } else { + /* Force ACLs on HFS+ file systems. */ + vfs_setextendedsecurity(HFSTOVFS(hfsmp)); + + /* Enable extent-based extended attributes by default */ + hfsmp->hfs_flags |= HFS_XATTR_EXTENTS; + + return (0); + +ErrorExit: + /* + * A fatal error occurred and the volume cannot be mounted, so + * release any resources that we acquired... + */ + hfsUnmount(hfsmp, NULL); + + if (HFS_MOUNT_DEBUG) { + printf("hfs_mounthfsplus: encountered error (%d)\n", retval); + } + return (retval); +} + +static int +_pin_metafile(struct hfsmount *hfsmp, vnode_t vp) +{ + int err; + + err = hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); + if (err == 0) { + err = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, NULL, vfs_context_kernel()); + hfs_unlock(VTOC(vp)); + } + + return err; +} + +void +hfs_pin_fs_metadata(struct hfsmount *hfsmp) +{ + ExtendedVCB *vcb; + int err; + + vcb = HFSTOVCB(hfsmp); + + err = _pin_metafile(hfsmp, hfsmp->hfs_extents_vp); + if (err != 0) { + printf("hfs: failed to pin extents overflow file %d\n", err); + } + err = _pin_metafile(hfsmp, hfsmp->hfs_catalog_vp); + if (err != 0) { + printf("hfs: failed to pin catalog file %d\n", err); + } + err = _pin_metafile(hfsmp, hfsmp->hfs_allocation_vp); + if (err != 0) { + printf("hfs: failed to pin bitmap file %d\n", err); + } + err = _pin_metafile(hfsmp, hfsmp->hfs_attribute_vp); + if (err != 0) { + printf("hfs: failed to pin extended attr file %d\n", err); + } + + hfs_pin_block_range(hfsmp, HFS_PIN_IT, 0, 1, vfs_context_kernel()); + hfs_pin_block_range(hfsmp, HFS_PIN_IT, vcb->totalBlocks-1, 1, vfs_context_kernel()); + + if (vfs_flags(hfsmp->hfs_mp) & MNT_JOURNALED) { + // and hey, if we've got a journal, let's pin that too! + hfs_pin_block_range(hfsmp, HFS_PIN_IT, hfsmp->jnl_start, howmany(hfsmp->jnl_size, vcb->blockSize), vfs_context_kernel()); + } +} + +/* + * ReleaseMetaFileVNode + * + * vp L - - + */ +static void ReleaseMetaFileVNode(struct vnode *vp) +{ + struct filefork *fp; + + if (vp && (fp = VTOF(vp))) { + if (fp->fcbBTCBPtr != NULL) { + (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + (void) BTClosePath(fp); + hfs_unlock(VTOC(vp)); + } + + /* release the node even if BTClosePath fails */ + vnode_recycle(vp); + vnode_put(vp); + } +} + + +/************************************************************* +* +* Unmounts a hfs volume. +* At this point vflush() has been called (to dump all non-metadata files) +* +*************************************************************/ + +int +hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p) +{ + /* Get rid of our attribute data vnode (if any). This is done + * after the vflush() during mount, so we don't need to worry + * about any locks. + */ + if (hfsmp->hfs_attrdata_vp) { + ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp); + hfsmp->hfs_attrdata_vp = NULLVP; + } + + if (hfsmp->hfs_startup_vp) { + ReleaseMetaFileVNode(hfsmp->hfs_startup_vp); + hfsmp->hfs_startup_cp = NULL; + hfsmp->hfs_startup_vp = NULL; + } + + if (hfsmp->hfs_attribute_vp) { + ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp); + hfsmp->hfs_attribute_cp = NULL; + hfsmp->hfs_attribute_vp = NULL; + } + + if (hfsmp->hfs_catalog_vp) { + ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp); + hfsmp->hfs_catalog_cp = NULL; + hfsmp->hfs_catalog_vp = NULL; + } + + if (hfsmp->hfs_extents_vp) { + ReleaseMetaFileVNode(hfsmp->hfs_extents_vp); + hfsmp->hfs_extents_cp = NULL; + hfsmp->hfs_extents_vp = NULL; + } + + if (hfsmp->hfs_allocation_vp) { + ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp); + hfsmp->hfs_allocation_cp = NULL; + hfsmp->hfs_allocation_vp = NULL; + } + + return (0); +} + + +/* + * Test if fork has overflow extents. + * + * Returns: + * non-zero - overflow extents exist + * zero - overflow extents do not exist + */ +__private_extern__ +bool overflow_extents(struct filefork *fp) +{ + u_int32_t blocks; + + // + // If the vnode pointer is NULL then we're being called + // from hfs_remove_orphans() with a faked-up filefork + // and therefore it has to be an HFS+ volume. Otherwise + // we check through the volume header to see what type + // of volume we're on. + // + +#if CONFIG_HFS_STD + if (FTOV(fp) && VTOVCB(FTOV(fp))->vcbSigWord == kHFSSigWord) { if (fp->ff_extents[2].blockCount == 0) return false; - + blocks = fp->ff_extents[0].blockCount + - fp->ff_extents[1].blockCount + - fp->ff_extents[2].blockCount; - } + fp->ff_extents[1].blockCount + + fp->ff_extents[2].blockCount; + + return fp->ff_blocks > blocks; + } +#endif + + if (fp->ff_extents[7].blockCount == 0) + return false; + + blocks = fp->ff_extents[0].blockCount + + fp->ff_extents[1].blockCount + + fp->ff_extents[2].blockCount + + fp->ff_extents[3].blockCount + + fp->ff_extents[4].blockCount + + fp->ff_extents[5].blockCount + + fp->ff_extents[6].blockCount + + fp->ff_extents[7].blockCount; + + return fp->ff_blocks > blocks; +} + +static __attribute__((pure)) +boolean_t hfs_is_frozen(struct hfsmount *hfsmp) +{ + return (hfsmp->hfs_freeze_state == HFS_FROZEN + || (hfsmp->hfs_freeze_state == HFS_FREEZING + && current_thread() != hfsmp->hfs_freezing_thread)); +} + +/* + * Lock the HFS global journal lock + */ +int +hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype) +{ + thread_t thread = current_thread(); + + if (hfsmp->hfs_global_lockowner == thread) { + panic ("hfs_lock_global: locking against myself!"); + } + + /* + * This check isn't really necessary but this stops us taking + * the mount lock in most cases. The essential check is below. + */ + if (hfs_is_frozen(hfsmp)) { + /* + * Unfortunately, there is no easy way of getting a notification + * for when a process is exiting and it's possible for the exiting + * process to get blocked somewhere else. To catch this, we + * periodically monitor the frozen process here and thaw if + * we spot that it's exiting. + */ +frozen: + hfs_lock_mount(hfsmp); + + struct timespec ts = { 0, 500 * NSEC_PER_MSEC }; + + while (hfs_is_frozen(hfsmp)) { + if (hfsmp->hfs_freeze_state == HFS_FROZEN + && proc_exiting(hfsmp->hfs_freezing_proc)) { + hfs_thaw_locked(hfsmp); + break; + } + + msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex, + PWAIT, "hfs_lock_global (frozen)", &ts); + } + hfs_unlock_mount(hfsmp); + } - return (fp->ff_blocks > blocks); + /* HFS_SHARED_LOCK */ + if (locktype == HFS_SHARED_LOCK) { + lck_rw_lock_shared (&hfsmp->hfs_global_lock); + hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER; + } + /* HFS_EXCLUSIVE_LOCK */ + else { + lck_rw_lock_exclusive (&hfsmp->hfs_global_lock); + hfsmp->hfs_global_lockowner = thread; + } + + /* + * We have to check if we're frozen again because of the time + * between when we checked and when we took the global lock. + */ + if (hfs_is_frozen(hfsmp)) { + hfs_unlock_global(hfsmp); + goto frozen; + } + + return 0; +} + + +/* + * Unlock the HFS global journal lock + */ +void +hfs_unlock_global (struct hfsmount *hfsmp) +{ + thread_t thread = current_thread(); + + /* HFS_LOCK_EXCLUSIVE */ + if (hfsmp->hfs_global_lockowner == thread) { + hfsmp->hfs_global_lockowner = NULL; + lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock); + } + /* HFS_LOCK_SHARED */ + else { + lck_rw_unlock_shared (&hfsmp->hfs_global_lock); + } +} + +/* + * Lock the HFS mount lock + * + * Note: this is a mutex, not a rw lock! + */ +inline +void hfs_lock_mount (struct hfsmount *hfsmp) { + lck_mtx_lock (&(hfsmp->hfs_mutex)); } +/* + * Unlock the HFS mount lock + * + * Note: this is a mutex, not a rw lock! + */ +inline +void hfs_unlock_mount (struct hfsmount *hfsmp) { + lck_mtx_unlock (&(hfsmp->hfs_mutex)); +} -/* __private_extern__ */ +/* + * Lock HFS system file(s). + * + * This function accepts a @flags parameter which indicates which + * system file locks are required. The value it returns should be + * used in a subsequent call to hfs_systemfile_unlock. The caller + * should treat this value as opaque; it may or may not have a + * relation to the @flags field that is passed in. The *only* + * guarantee that we make is that a value of zero means that no locks + * were taken and that there is no need to call hfs_systemfile_unlock + * (although it is harmless to do so). Recursion is supported but + * care must still be taken to ensure correct lock ordering. Note + * that requests for certain locks may cause other locks to also be + * taken, including locks that are not possible to ask for via the + * @flags parameter. + */ int -hfs_metafilelocking(struct hfsmount *hfsmp, u_long fileID, u_int flags, struct proc *p) +hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype) { - ExtendedVCB *vcb; - struct vnode *vp = NULL; - int numOfLockedBuffs; - int retval = 0; + /* + * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file + */ + if (flags & SFL_CATALOG) { + if (hfsmp->hfs_catalog_cp + && hfsmp->hfs_catalog_cp->c_lockowner != current_thread()) { +#ifdef HFS_CHECK_LOCK_ORDER + if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)"); + } + if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)"); + } + if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)"); + } +#endif /* HFS_CHECK_LOCK_ORDER */ - vcb = HFSTOVCB(hfsmp); + (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT); + /* + * When the catalog file has overflow extents then + * also acquire the extents b-tree lock if its not + * already requested. + */ + if (((flags & SFL_EXTENTS) == 0) && + (hfsmp->hfs_catalog_vp != NULL) && + (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) { + flags |= SFL_EXTENTS; + } + } else { + flags &= ~SFL_CATALOG; + } + } - switch (fileID) { - case kHFSExtentsFileID: - vp = vcb->extentsRefNum; - break; + if (flags & SFL_ATTRIBUTE) { + if (hfsmp->hfs_attribute_cp + && hfsmp->hfs_attribute_cp->c_lockowner != current_thread()) { +#ifdef HFS_CHECK_LOCK_ORDER + if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)"); + } + if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)"); + } +#endif /* HFS_CHECK_LOCK_ORDER */ + + (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT); + /* + * When the attribute file has overflow extents then + * also acquire the extents b-tree lock if its not + * already requested. + */ + if (((flags & SFL_EXTENTS) == 0) && + (hfsmp->hfs_attribute_vp != NULL) && + (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) { + flags |= SFL_EXTENTS; + } + } else { + flags &= ~SFL_ATTRIBUTE; + } + } - case kHFSCatalogFileID: - vp = vcb->catalogRefNum; - break; + if (flags & SFL_STARTUP) { + if (hfsmp->hfs_startup_cp + && hfsmp->hfs_startup_cp->c_lockowner != current_thread()) { +#ifdef HFS_CHECK_LOCK_ORDER + if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Extents before Startup)"); + } +#endif /* HFS_CHECK_LOCK_ORDER */ - case kHFSAllocationFileID: - /* bitmap is covered by Extents B-tree locking */ - /* FALL THROUGH */ - default: - panic("hfs_lockmetafile: invalid fileID"); + (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT); + /* + * When the startup file has overflow extents then + * also acquire the extents b-tree lock if its not + * already requested. + */ + if (((flags & SFL_EXTENTS) == 0) && + (hfsmp->hfs_startup_vp != NULL) && + (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) { + flags |= SFL_EXTENTS; + } + } else { + flags &= ~SFL_STARTUP; + } } - /* Release, if necesary any locked buffer caches */ - if ((flags & LK_TYPE_MASK) == LK_RELEASE) { - struct timeval tv = time; - u_int32_t lastfsync = tv.tv_sec; - - (void) BTGetLastSync((FCB*)VTOF(vp), &lastfsync); - - numOfLockedBuffs = count_lock_queue(); - if ((numOfLockedBuffs > kMaxLockedMetaBuffers) || ((numOfLockedBuffs>1) && ((tv.tv_sec - lastfsync) > kMaxSecsForFsync))) { - hfs_btsync(vp, HFS_SYNCTRANS); + /* + * To prevent locks being taken in the wrong order, the extent lock + * gets a bitmap lock as well. + */ + if (flags & (SFL_BITMAP | SFL_EXTENTS)) { + if (hfsmp->hfs_allocation_cp) { + (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + /* + * The bitmap lock is also grabbed when only extent lock + * was requested. Set the bitmap lock bit in the lock + * flags which callers will use during unlock. + */ + flags |= SFL_BITMAP; + } else { + flags &= ~SFL_BITMAP; } - } else { - flags |= LK_RETRY; } - - retval = lockmgr(&VTOC(vp)->c_lock, flags, &vp->v_interlock, p); - return (retval); + if (flags & SFL_EXTENTS) { + /* + * Since the extents btree lock is recursive we always + * need exclusive access. + */ + if (hfsmp->hfs_extents_cp) { + (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + + if (hfsmp->hfs_mp->mnt_kern_flag & MNTK_SWAP_MOUNT) { + /* + * because we may need this lock on the pageout path (if a swapfile allocation + * spills into the extents overflow tree), we will grant the holder of this + * lock the privilege of dipping into the reserve free pool in order to prevent + * a deadlock from occurring if we need those pageouts to complete before we + * will make any new pages available on the free list... the deadlock can occur + * if this thread needs to allocate memory while this lock is held + */ + if (set_vm_privilege(TRUE) == FALSE) { + /* + * indicate that we need to drop vm_privilege + * when we unlock + */ + flags |= SFL_VM_PRIV; + } + } + } else { + flags &= ~SFL_EXTENTS; + } + } + + return (flags); +} + +/* + * unlock HFS system file(s). + */ +void +hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags) +{ + if (!flags) + return; + + struct timeval tv; + u_int32_t lastfsync; + int numOfLockedBuffs; + + if (hfsmp->jnl == NULL) { + microuptime(&tv); + lastfsync = tv.tv_sec; + } + if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) { + hfs_unlock(hfsmp->hfs_startup_cp); + } + if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) { + if (hfsmp->jnl == NULL) { + BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync); + numOfLockedBuffs = count_lock_queue(); + if ((numOfLockedBuffs > kMaxLockedMetaBuffers) || + ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) > + kMaxSecsForFsync))) { + hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS); + } + } + hfs_unlock(hfsmp->hfs_attribute_cp); + } + if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) { + if (hfsmp->jnl == NULL) { + BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync); + numOfLockedBuffs = count_lock_queue(); + if ((numOfLockedBuffs > kMaxLockedMetaBuffers) || + ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) > + kMaxSecsForFsync))) { + hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS); + } + } + hfs_unlock(hfsmp->hfs_catalog_cp); + } + if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) { + hfs_unlock(hfsmp->hfs_allocation_cp); + } + if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) { + if (hfsmp->jnl == NULL) { + BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync); + numOfLockedBuffs = count_lock_queue(); + if ((numOfLockedBuffs > kMaxLockedMetaBuffers) || + ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) > + kMaxSecsForFsync))) { + hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS); + } + } + hfs_unlock(hfsmp->hfs_extents_cp); + + if (flags & SFL_VM_PRIV) { + /* + * revoke the vm_privilege we granted this thread + * now that we have unlocked the overflow extents + */ + set_vm_privilege(FALSE); + } + } } + /* * RequireFileLock * @@ -636,37 +1649,34 @@ hfs_metafilelocking(struct hfsmount *hfsmp, u_long fileID, u_int flags, struct p #if HFS_DIAGNOSTIC void RequireFileLock(FileReference vp, int shareable) { - struct lock__bsd__ *lkp; - int locked = false; - pid_t pid; - void * self; - - pid = current_proc()->p_pid; - self = (void *) current_thread(); - lkp = &VTOC(vp)->c_lock; + int locked; - simple_lock(&lkp->lk_interlock); + /* The extents btree and allocation bitmap are always exclusive. */ + if (VTOC(vp)->c_fileid == kHFSExtentsFileID || + VTOC(vp)->c_fileid == kHFSAllocationFileID) { + shareable = 0; + } - if (shareable && (lkp->lk_sharecount > 0) && (lkp->lk_lockholder == LK_NOPROC)) - locked = true; - else if ((lkp->lk_exclusivecount > 0) && (lkp->lk_lockholder == pid) && (lkp->lk_lockthread == self)) - locked = true; - - simple_unlock(&lkp->lk_interlock); + locked = VTOC(vp)->c_lockowner == current_thread(); - if (!locked) { + if (!locked && !shareable) { switch (VTOC(vp)->c_fileid) { - case 3: - DEBUG_BREAK_MSG((" #\n # RequireFileLock: extent btree vnode not locked! v: 0x%08X\n #\n", (u_int)vp)); - break; - - case 4: - DEBUG_BREAK_MSG((" #\n # RequireFileLock: catalog btree vnode not locked! v: 0x%08X\n #\n", (u_int)vp)); - break; - - default: - DEBUG_BREAK_MSG((" #\n # RequireFileLock: file (%d) not locked! v: 0x%08X\n #\n", VTOC(vp)->c_fileid, (u_int)vp)); - break; + case kHFSExtentsFileID: + panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp); + break; + case kHFSCatalogFileID: + panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp); + break; + case kHFSAllocationFileID: + /* The allocation file can hide behind the jornal lock. */ + if (VTOHFS(vp)->jnl == NULL) + panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp); + break; + case kHFSStartupFileID: + panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp); + case kHFSAttributesFileID: + panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp); + break; } } } @@ -677,27 +1687,24 @@ void RequireFileLock(FileReference vp, int shareable) * There are three ways to qualify for ownership rights on an object: * * 1. (a) Your UID matches the cnode's UID. - * (b) The object in question is owned by "unknown" and - * your UID matches the console user's UID. + * (b) The object in question is owned by "unknown" * 2. (a) Permissions on the filesystem are being ignored and * your UID matches the replacement UID. * (b) Permissions on the filesystem are being ignored and - * the replacement UID is "unknown" and - * your UID matches the console user UID. + * the replacement UID is "unknown". * 3. You are root. * */ int -hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, struct ucred *cred, - struct proc *p, int invokesuperuserstatus) +hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred, + __unused struct proc *p, int invokesuperuserstatus) { - if ((cred->cr_uid == cnode_uid) || /* [1a] */ - ((cnode_uid == UNKNOWNUID) && (cred->cr_uid == console_user)) || /* [1b] */ - ((HFSTOVFS(hfsmp)->mnt_flag & MNT_UNKNOWNPERMISSIONS) && /* [2] */ - ((cred->cr_uid == hfsmp->hfs_uid) || /* [2a] */ - ((hfsmp->hfs_uid == UNKNOWNUID) && /* [2b] */ - (cred->cr_uid == console_user)))) || - (invokesuperuserstatus && (suser(cred, &p->p_acflag) == 0))) { /* [3] */ + if ((kauth_cred_getuid(cred) == cnode_uid) || /* [1a] */ + (cnode_uid == UNKNOWNUID) || /* [1b] */ + ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) && /* [2] */ + ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) || /* [2a] */ + (hfsmp->hfs_uid == UNKNOWNUID))) || /* [2b] */ + (invokesuperuserstatus && (suser(cred, 0) == 0))) { /* [3] */ return (0); } else { return (EPERM); @@ -705,15 +1712,15 @@ hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, struct ucred *cred, } -unsigned long BestBlockSizeFit(unsigned long allocationBlockSize, - unsigned long blockSizeLimit, - unsigned long baseMultiple) { +u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize, + u_int32_t blockSizeLimit, + u_int32_t baseMultiple) { /* Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the specified limit but still an even multiple of the baseMultiple. */ int baseBlockCount, blockCount; - unsigned long trialBlockSize; + u_int32_t trialBlockSize; if (allocationBlockSize % baseMultiple != 0) { /* @@ -748,21 +1755,13 @@ unsigned long BestBlockSizeFit(unsigned long allocationBlockSize, } -/* - * To make the HFS Plus filesystem follow UFS unlink semantics, a remove - * of an active vnode is translated to a move/rename so the file appears - * deleted. The destination folder for these move/renames is setup here - * and a reference to it is place in hfsmp->hfs_private_metadata_dir. - */ -u_long -FindMetaDataDirectory(ExtendedVCB *vcb) +u_int32_t +GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name, + struct cat_attr *fattr, struct cat_fork *forkinfo) { struct hfsmount * hfsmp; - struct vnode * dvp = NULL; - struct cnode * dcp = NULL; - struct FndrDirInfo * fndrinfo; - struct cat_desc out_desc = {0}; - struct timeval tv; + struct cat_desc jdesc; + int lockflags; int error; if (vcb->vcbSigWord != kHFSPlusSigWord) @@ -770,142 +1769,31 @@ FindMetaDataDirectory(ExtendedVCB *vcb) hfsmp = VCBTOHFS(vcb); - if (hfsmp->hfs_privdir_desc.cd_parentcnid == 0) { - hfsmp->hfs_privdir_desc.cd_parentcnid = kRootDirID; - hfsmp->hfs_privdir_desc.cd_nameptr = hfs_privdirname; - hfsmp->hfs_privdir_desc.cd_namelen = strlen(hfs_privdirname); - hfsmp->hfs_privdir_desc.cd_flags = CD_ISDIR; - } - - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, current_proc()); - if (error) - return (0); + memset(&jdesc, 0, sizeof(struct cat_desc)); + jdesc.cd_parentcnid = kRootDirID; + jdesc.cd_nameptr = (const u_int8_t *)name; + jdesc.cd_namelen = strlen(name); - error = cat_lookup(hfsmp, &hfsmp->hfs_privdir_desc, 0, NULL, - &hfsmp->hfs_privdir_attr, NULL); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL); + hfs_systemfile_unlock(hfsmp, lockflags); if (error == 0) { - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, current_proc()); - hfsmp->hfs_metadata_createdate = hfsmp->hfs_privdir_attr.ca_itime; - return (hfsmp->hfs_privdir_attr.ca_fileid); - } else if (hfsmp->hfs_fs_ronly) { - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, current_proc()); + return (fattr->ca_fileid); + } else if (hfsmp->hfs_flags & HFS_READ_ONLY) { return (0); } - - /* Setup the default attributes */ - bzero(&hfsmp->hfs_privdir_attr, sizeof(struct cat_attr)); - hfsmp->hfs_privdir_attr.ca_mode = S_IFDIR; - hfsmp->hfs_privdir_attr.ca_flags = SF_IMMUTABLE; - hfsmp->hfs_privdir_attr.ca_nlink = 2; - hfsmp->hfs_privdir_attr.ca_itime = vcb->vcbCrDate; - hfsmp->hfs_privdir_attr.ca_mtime = time.tv_sec; - - /* hidden and off the desktop view */ - fndrinfo = (struct FndrDirInfo *)&hfsmp->hfs_privdir_attr.ca_finderinfo; - fndrinfo->frLocation.v = SWAP_BE16 (22460); - fndrinfo->frLocation.h = SWAP_BE16 (22460); - fndrinfo->frFlags |= SWAP_BE16 (kIsInvisible + kNameLocked); - - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if ((error = journal_start_transaction(hfsmp->jnl)) != 0) { - hfs_global_shared_lock_release(hfsmp); - return (0); - } - } - - error = cat_create(hfsmp, &hfsmp->hfs_privdir_desc, - &hfsmp->hfs_privdir_attr, &out_desc); - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, current_proc()); - if (error) { - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); - - return (0); - } - - hfsmp->hfs_privdir_desc.cd_hint = out_desc.cd_hint; - hfsmp->hfs_privdir_desc.cd_cnid = out_desc.cd_cnid; - hfsmp->hfs_privdir_attr.ca_fileid = out_desc.cd_cnid; - hfsmp->hfs_metadata_createdate = vcb->vcbCrDate; - - if (VFS_ROOT(HFSTOVFS(hfsmp), &dvp) == 0) { - dcp = VTOC(dvp); - dcp->c_childhint = out_desc.cd_hint; - dcp->c_nlink++; - dcp->c_entries++; - dcp->c_flag |= C_CHANGE | C_UPDATE; - tv = time; - (void) VOP_UPDATE(dvp, &tv, &tv, 0); - vput(dvp); - } - hfs_volupdate(hfsmp, VOL_MKDIR, 1); - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); - - cat_releasedesc(&out_desc); - - return (out_desc.cd_cnid); -} - -__private_extern__ -u_long -GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, char *name, - struct cat_attr *fattr, struct cat_fork *forkinfo) -{ - struct hfsmount * hfsmp; - struct vnode * dvp = NULL; - struct cnode * dcp = NULL; - struct FndrDirInfo * fndrinfo; - struct cat_desc jdesc; - struct timeval tv; - int error; - - if (vcb->vcbSigWord != kHFSPlusSigWord) - return (0); - - hfsmp = VCBTOHFS(vcb); - - memset(&jdesc, 0, sizeof(struct cat_desc)); - jdesc.cd_parentcnid = kRootDirID; - jdesc.cd_nameptr = name; - jdesc.cd_namelen = strlen(name); - - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, current_proc()); - if (error) - return (0); - - error = cat_lookup(hfsmp, &jdesc, 0, NULL, fattr, forkinfo); - - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, current_proc()); - - if (error == 0) { - return (fattr->ca_fileid); - } else if (hfsmp->hfs_fs_ronly) { - return (0); - } -} + return (0); /* XXX what callers expect on an error */ +} /* - * On Journaled HFS, there can be orphaned files. These - * are files that were unlinked while busy. If the volume - * was not cleanly unmounted then some of these files may + * On HFS Plus Volumes, there can be orphaned files or directories + * These are files or directories that were unlinked while busy. + * If the volume was not cleanly unmounted then some of these may * have persisted and need to be removed. */ -__private_extern__ void hfs_remove_orphans(struct hfsmount * hfsmp) { @@ -913,19 +1801,28 @@ hfs_remove_orphans(struct hfsmount * hfsmp) struct FSBufferDescriptor btdata; struct HFSPlusCatalogFile filerec; struct HFSPlusCatalogKey * keyp; + struct proc *p = current_proc(); FCB *fcb; ExtendedVCB *vcb; char filename[32]; char tempname[32]; size_t namelen; + cat_cookie_t cookie; int catlock = 0; - int result, started_tr = 0; - - if (hfsmp->hfs_orphans_cleaned) + int catreserve = 0; + bool started_tr = false; + int lockflags; + int result; + int orphaned_files = 0; + int orphaned_dirs = 0; + + bzero(&cookie, sizeof(cookie)); + + if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS) return; vcb = HFSTOVCB(hfsmp); - fcb = VTOF(vcb->catalogRefNum); + fcb = VTOF(hfsmp->hfs_catalog_vp); btdata.bufferAddress = &filerec; btdata.itemSize = sizeof(filerec); @@ -933,123 +1830,228 @@ hfs_remove_orphans(struct hfsmount * hfsmp) MALLOC(iterator, struct BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); bzero(iterator, sizeof(*iterator)); + + /* Build a key to "temp" */ keyp = (HFSPlusCatalogKey*)&iterator->key; - keyp->parentID = hfsmp->hfs_private_metadata_dir; - - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - hfs_global_shared_lock_release(hfsmp); - return; - } - started_tr = 1; - } - - /* Lock catalog b-tree */ - result = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, current_proc()); - if (result) - goto exit; - catlock = 1; + keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; + keyp->nodeName.length = 4; /* "temp" */ + keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2; + keyp->nodeName.unicode[0] = 't'; + keyp->nodeName.unicode[1] = 'e'; + keyp->nodeName.unicode[2] = 'm'; + keyp->nodeName.unicode[3] = 'p'; /* - * Position the iterator at the folder thread record. - * (i.e. one record before first child) + * Position the iterator just before the first real temp file/dir. */ - result = BTSearchRecord(fcb, iterator, NULL, NULL, iterator); - if (result) - goto exit; + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator); + hfs_systemfile_unlock(hfsmp, lockflags); - /* Visit all the children in the HFS+ private directory. */ + /* Visit all the temp files/dirs in the HFS+ private directory. */ for (;;) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); + hfs_systemfile_unlock(hfsmp, lockflags); if (result) break; - if (keyp->parentID != hfsmp->hfs_private_metadata_dir) + if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) break; - if (filerec.recordType != kHFSPlusFileRecord) - continue; (void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2, - filename, &namelen, sizeof(filename), 0, 0); + (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0); - (void) sprintf(tempname, "%s%d", HFS_DELETE_PREFIX, filerec.fileID); + (void) snprintf(tempname, sizeof(tempname), "%s%d", + HFS_DELETE_PREFIX, filerec.fileID); /* - * Delete all files named "tempxxx", where - * xxx is the file's cnid in decimal. + * Delete all files (and directories) named "tempxxx", + * where xxx is the file's cnid in decimal. * - * Delete all files named "iNodexxx", that - * have a link count of zero. */ - if (bcmp(tempname, filename, namelen) == 0) { - struct filefork fork = {0}; - struct cnode cnode = {0}; - - // XXXdebug - //printf("hfs_remove_orphans: removing %s\n", filename); - - /* Build a fake cnode */ - cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_metadata_dir; - cnode.c_desc.cd_nameptr = filename; - cnode.c_desc.cd_namelen = namelen; - cnode.c_desc.cd_cnid = filerec.fileID; - cnode.c_attr.ca_fileid = filerec.fileID; - cnode.c_blocks = filerec.dataFork.totalBlocks + - filerec.resourceFork.totalBlocks; - - /* Position iterator at previous entry */ - if (BTIterateRecord(fcb, kBTreePrevRecord, iterator, - NULL, NULL) != 0) - break; + if (bcmp(tempname, filename, namelen) != 0) + continue; + + struct filefork dfork; + struct filefork rfork; + struct cnode cnode; + int mode = 0; + + bzero(&dfork, sizeof(dfork)); + bzero(&rfork, sizeof(rfork)); + bzero(&cnode, sizeof(cnode)); - /* Truncate the file to zero (both forks) */ - if (filerec.dataFork.totalBlocks > 0) { - fork.ff_cp = &cnode; - cnode.c_datafork = ⋔ - bcopy(&filerec.dataFork, &fork.ff_data, sizeof(struct cat_fork)); - if (TruncateFileC(vcb, (FCB*)&fork, 0, false) != 0) { - printf("error truncting data fork!\n"); - break; + if (hfs_start_transaction(hfsmp) != 0) { + printf("hfs_remove_orphans: failed to start transaction\n"); + goto exit; + } + started_tr = true; + + /* + * Reserve some space in the Catalog file. + */ + if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) { + printf("hfs_remove_orphans: cat_preflight failed\n"); + goto exit; + } + catreserve = 1; + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + catlock = 1; + + /* Build a fake cnode */ + cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr, + &dfork.ff_data, &rfork.ff_data); + cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; + cnode.c_desc.cd_nameptr = (const u_int8_t *)filename; + cnode.c_desc.cd_namelen = namelen; + cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid; + cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks; + + /* Position iterator at previous entry */ + if (BTIterateRecord(fcb, kBTreePrevRecord, iterator, + NULL, NULL) != 0) { + break; + } + + /* Truncate the file to zero (both forks) */ + if (dfork.ff_blocks > 0) { + u_int64_t fsize; + + dfork.ff_cp = &cnode; + cnode.c_datafork = &dfork; + cnode.c_rsrcfork = NULL; + fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize; + while (fsize > 0) { + if (fsize > HFS_BIGFILE_SIZE) { + fsize -= HFS_BIGFILE_SIZE; + } else { + fsize = 0; } - } - if (filerec.resourceFork.totalBlocks > 0) { - fork.ff_cp = &cnode; - cnode.c_datafork = NULL; - cnode.c_rsrcfork = ⋔ - bcopy(&filerec.resourceFork, &fork.ff_data, sizeof(struct cat_fork)); - if (TruncateFileC(vcb, (FCB*)&fork, 0, false) != 0) { - printf("error truncting rsrc fork!\n"); + + if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0, + cnode.c_attr.ca_fileid, false) != 0) { + printf("hfs: error truncating data fork!\n"); break; } + + // + // if we're iteratively truncating this file down, + // then end the transaction and start a new one so + // that no one transaction gets too big. + // + if (fsize > 0) { + /* Drop system file locks before starting + * another transaction to preserve lock order. + */ + hfs_systemfile_unlock(hfsmp, lockflags); + catlock = 0; + hfs_end_transaction(hfsmp); + + if (hfs_start_transaction(hfsmp) != 0) { + started_tr = false; + goto exit; + } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + catlock = 1; + } } + } - /* Remove the file record from the Catalog */ - if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) { - printf("error deleting cat rec!\n"); + if (rfork.ff_blocks > 0) { + rfork.ff_cp = &cnode; + cnode.c_datafork = NULL; + cnode.c_rsrcfork = &rfork; + if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) { + printf("hfs: error truncating rsrc fork!\n"); break; } - - /* Update parent and volume counts */ - hfsmp->hfs_privdir_attr.ca_entries--; - (void)cat_update(hfsmp, &hfsmp->hfs_privdir_desc, - &hfsmp->hfs_privdir_attr, NULL, NULL); - hfs_volupdate(hfsmp, VOL_RMFILE, 0); } - } - + + // Deal with extended attributes + if (ISSET(cnode.c_attr.ca_recflags, kHFSHasAttributesMask)) { + // hfs_removeallattr uses its own transactions + hfs_systemfile_unlock(hfsmp, lockflags); + catlock = false; + hfs_end_transaction(hfsmp); + + hfs_removeallattr(hfsmp, cnode.c_attr.ca_fileid, &started_tr); + + if (!started_tr) { + if (hfs_start_transaction(hfsmp) != 0) { + printf("hfs_remove_orphans: failed to start transaction\n"); + goto exit; + } + started_tr = true; + } + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + catlock = 1; + } + + /* Remove the file or folder record from the Catalog */ + if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) { + printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid); + hfs_systemfile_unlock(hfsmp, lockflags); + catlock = 0; + hfs_volupdate(hfsmp, VOL_UPDATE, 0); + break; + } + + mode = cnode.c_attr.ca_mode & S_IFMT; + + if (mode == S_IFDIR) { + orphaned_dirs++; + } + else { + orphaned_files++; + } + + /* Update parent and volume counts */ + hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--; + if (mode == S_IFDIR) { + DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]); + } + + (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS], + &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL); + + /* Drop locks and end the transaction */ + hfs_systemfile_unlock(hfsmp, lockflags); + cat_postflight(hfsmp, &cookie, p); + catlock = catreserve = 0; + + /* + Now that Catalog is unlocked, update the volume info, making + sure to differentiate between files and directories + */ + if (mode == S_IFDIR) { + hfs_volupdate(hfsmp, VOL_RMDIR, 0); + } + else{ + hfs_volupdate(hfsmp, VOL_RMFILE, 0); + } + + hfs_end_transaction(hfsmp); + started_tr = false; + } /* end for */ + exit: - /* Unlock catalog b-tree */ - if (catlock) - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, current_proc()); + if (orphaned_files > 0 || orphaned_dirs > 0) + printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs); + if (catlock) { + hfs_systemfile_unlock(hfsmp, lockflags); + } + if (catreserve) { + cat_postflight(hfsmp, &cookie, p); + } if (started_tr) { - journal_end_transaction(hfsmp->jnl); + hfs_end_transaction(hfsmp); } - hfs_global_shared_lock_release(hfsmp); FREE(iterator, M_TEMP); - hfsmp->hfs_orphans_cleaned = 1; + hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS; } @@ -1068,7 +2070,7 @@ u_int32_t logBlockSize; /* start with default */ logBlockSize = VTOHFS(vp)->hfs_logBlockSize; - if (vp->v_flag & VSYSTEM) { + if (vnode_issystem(vp)) { if (VTOF(vp)->fcbBTCBPtr != NULL) { BTreeInfoRec bTreeInfo; @@ -1094,22 +2096,118 @@ u_int32_t logBlockSize; return logBlockSize; } -__private_extern__ +#if HFS_SPARSE_DEV +static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks) +{ + struct vfsstatfs *vfsp; /* 272 bytes */ + uint64_t vfreeblks; + struct timeval now; + + hfs_lock_mount(hfsmp); + + vnode_t backing_vp = hfsmp->hfs_backingfs_rootvp; + if (!backing_vp) { + hfs_unlock_mount(hfsmp); + return false; + } + + // usecount is not enough; we need iocount + if (vnode_get(backing_vp)) { + hfs_unlock_mount(hfsmp); + *pfree_blks = 0; + return true; + } + + uint32_t loanedblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks; + uint32_t bandblks = hfsmp->hfs_sparsebandblks; + uint64_t maxblks = hfsmp->hfs_backingfs_maxblocks; + + hfs_unlock_mount(hfsmp); + + mount_t backingfs_mp = vnode_mount(backing_vp); + + microtime(&now); + if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) { + vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT); + hfsmp->hfs_last_backingstatfs = now.tv_sec; + } + + if (!(vfsp = vfs_statfs(backingfs_mp))) { + vnode_put(backing_vp); + return false; + } + + vfreeblks = vfsp->f_bavail; + /* Normalize block count if needed. */ + if (vfsp->f_bsize != hfsmp->blockSize) + vfreeblks = vfreeblks * vfsp->f_bsize / hfsmp->blockSize; + if (vfreeblks > bandblks) + vfreeblks -= bandblks; + else + vfreeblks = 0; + + /* + * Take into account any delayed allocations. It is not + * certain what the original reason for the "2 *" is. Most + * likely it is to allow for additional requirements in the + * host file system and metadata required by disk images. The + * number of loaned blocks is likely to be small and we will + * stop using them as we get close to the limit. + */ + loanedblks = 2 * loanedblks; + if (vfreeblks > loanedblks) + vfreeblks -= loanedblks; + else + vfreeblks = 0; + + if (maxblks) + vfreeblks = MIN(vfreeblks, maxblks); + + vnode_put(backing_vp); + + *pfree_blks = vfreeblks; + + return true; +} +#endif + u_int32_t hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) { - struct vcb_t *vcb = HFSTOVCB(hfsmp); u_int32_t freeblks; + u_int32_t rsrvblks; + u_int32_t loanblks; - freeblks = vcb->freeBlocks; + /* + * We don't bother taking the mount lock + * to look at these values since the values + * themselves are each updated atomically + * on aligned addresses. + */ + freeblks = hfsmp->freeBlocks; + rsrvblks = hfsmp->reserveBlocks; + loanblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks; if (wantreserve) { - if (freeblks > vcb->reserveBlocks) - freeblks -= vcb->reserveBlocks; + if (freeblks > rsrvblks) + freeblks -= rsrvblks; else freeblks = 0; } + if (freeblks > loanblks) + freeblks -= loanblks; + else + freeblks = 0; + +#if HFS_SPARSE_DEV + /* + * When the underlying device is sparse, check the + * available space on the backing store volume. + */ + uint64_t vfreeblks; + if (hfs_get_backing_free_blks(hfsmp, &vfreeblks)) + freeblks = MIN(freeblks, vfreeblks); +#endif /* HFS_SPARSE_DEV */ - freeblks -= vcb->loanedBlocks; return (freeblks); } @@ -1122,16 +2220,21 @@ short MacToVFSError(OSErr err) if (err >= 0) return err; + /* BSD/VFS internal errnos */ + switch (err) { + case ERESERVEDNAME: /* -8 */ + return err; + } + switch (err) { case dskFulErr: /* -34 */ + case btNoSpaceAvail: /* -32733 */ return ENOSPC; - case btNoSpaceAvail: /* -32733 */ - return EFBIG; case fxOvFlErr: /* -32750 */ return EOVERFLOW; case btBadNode: /* -32731 */ - return EBADF; + return EIO; case memFullErr: /* -108 */ return ENOMEM; /* +12 */ @@ -1170,96 +2273,161 @@ short MacToVFSError(OSErr err) /* - * Get the directory entry name hint for a given index. - * The directory cnode (dcp) must be locked. + * Find the current thread's directory hint for a given index. + * + * Requires an exclusive lock on directory cnode. + * + * Use detach if the cnode lock must be dropped while the hint is still active. */ __private_extern__ -char * -hfs_getnamehint(struct cnode *dcp, int index) +directoryhint_t * +hfs_getdirhint(struct cnode *dcp, int index, int detach) { - struct hfs_index *entry; - void *self; + struct timeval tv; + directoryhint_t *hint; + boolean_t need_remove, need_init; + const u_int8_t * name; - if (index > 0) { - self = current_thread(); - SLIST_FOREACH(entry, &dcp->c_indexlist, hi_link) { - if ((entry->hi_index == index) - && (entry->hi_thread == self)) - return (entry->hi_name); + microuptime(&tv); + + /* + * Look for an existing hint first. If not found, create a new one (when + * the list is not full) or recycle the oldest hint. Since new hints are + * always added to the head of the list, the last hint is always the + * oldest. + */ + TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) { + if (hint->dh_index == index) + break; + } + if (hint != NULL) { /* found an existing hint */ + need_init = false; + need_remove = true; + } else { /* cannot find an existing hint */ + need_init = true; + if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */ + /* Create a default directory hint */ + MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK); + ++dcp->c_dirhintcnt; + need_remove = false; + } else { /* recycle the last (i.e., the oldest) hint */ + hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); + if ((hint->dh_desc.cd_flags & CD_HASBUF) && + (name = hint->dh_desc.cd_nameptr)) { + hint->dh_desc.cd_nameptr = NULL; + hint->dh_desc.cd_namelen = 0; + hint->dh_desc.cd_flags &= ~CD_HASBUF; + vfs_removename((const char *)name); + } + need_remove = true; } } - return (NULL); + if (need_remove) + TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link); + + if (detach) + --dcp->c_dirhintcnt; + else + TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link); + + if (need_init) { + hint->dh_index = index; + hint->dh_desc.cd_flags = 0; + hint->dh_desc.cd_encoding = 0; + hint->dh_desc.cd_namelen = 0; + hint->dh_desc.cd_nameptr = NULL; + hint->dh_desc.cd_parentcnid = dcp->c_fileid; + hint->dh_desc.cd_hint = dcp->c_childhint; + hint->dh_desc.cd_cnid = 0; + } + hint->dh_time = tv.tv_sec; + return (hint); } /* - * Save a directory entry name hint for a given index. - * The directory cnode (dcp) must be locked. + * Release a single directory hint. + * + * Requires an exclusive lock on directory cnode. */ __private_extern__ void -hfs_savenamehint(struct cnode *dcp, int index, const char * namehint) +hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint) { - struct hfs_index *entry; - int len; - - if (index > 0) { - len = strlen(namehint); - MALLOC(entry, struct hfs_index *, len + sizeof(struct hfs_index), - M_TEMP, M_WAITOK); - entry->hi_index = index; - entry->hi_thread = current_thread(); - bcopy(namehint, entry->hi_name, len + 1); - SLIST_INSERT_HEAD(&dcp->c_indexlist, entry, hi_link); + const u_int8_t * name; + directoryhint_t *hint; + + /* Check if item is on list (could be detached) */ + TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) { + if (hint == relhint) { + TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link); + --dcp->c_dirhintcnt; + break; + } } + name = relhint->dh_desc.cd_nameptr; + if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) { + relhint->dh_desc.cd_nameptr = NULL; + relhint->dh_desc.cd_namelen = 0; + relhint->dh_desc.cd_flags &= ~CD_HASBUF; + vfs_removename((const char *)name); + } + FREE_ZONE(relhint, sizeof(directoryhint_t), M_HFSDIRHINT); } /* - * Release the directory entry name hint for a given index. - * The directory cnode (dcp) must be locked. + * Release directory hints for given directory + * + * Requires an exclusive lock on directory cnode. */ __private_extern__ void -hfs_relnamehint(struct cnode *dcp, int index) +hfs_reldirhints(struct cnode *dcp, int stale_hints_only) { - struct hfs_index *entry; - void *self; - - if (index > 0) { - self = current_thread(); - SLIST_FOREACH(entry, &dcp->c_indexlist, hi_link) { - if ((entry->hi_index == index) - && (entry->hi_thread == self)) { - SLIST_REMOVE(&dcp->c_indexlist, entry, hfs_index, - hi_link); - FREE(entry, M_TEMP); - break; - } + struct timeval tv; + directoryhint_t *hint, *prev; + const u_int8_t * name; + + if (stale_hints_only) + microuptime(&tv); + + /* searching from the oldest to the newest, so we can stop early when releasing stale hints only */ + for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) { + if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL) + break; /* stop here if this entry is too new */ + name = hint->dh_desc.cd_nameptr; + if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) { + hint->dh_desc.cd_nameptr = NULL; + hint->dh_desc.cd_namelen = 0; + hint->dh_desc.cd_flags &= ~CD_HASBUF; + vfs_removename((const char *)name); } + prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */ + TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link); + FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT); + --dcp->c_dirhintcnt; } } /* - * Release all directory entry name hints. + * Insert a detached directory hint back into the list of dirhints. + * + * Requires an exclusive lock on directory cnode. */ __private_extern__ void -hfs_relnamehints(struct cnode *dcp) +hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint) { - struct hfs_index *entry; - struct hfs_index *next; + directoryhint_t *test; - if (!SLIST_EMPTY(&dcp->c_indexlist)) { - for(entry = SLIST_FIRST(&dcp->c_indexlist); - entry != NULL; - entry = next) { - next = SLIST_NEXT(entry, hi_link); - SLIST_REMOVE(&dcp->c_indexlist, entry, hfs_index, hi_link); - FREE(entry, M_TEMP); - } + TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) { + if (test == hint) + panic("hfs_insertdirhint: hint %p already on list!", hint); } -} + TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link); + ++dcp->c_dirhintcnt; +} /* * Perform a case-insensitive compare of two UTF-8 filenames. @@ -1268,7 +2436,7 @@ hfs_relnamehints(struct cnode *dcp) */ __private_extern__ int -hfs_namecmp(const char *str1, size_t len1, const char *str2, size_t len2) +hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2) { u_int16_t *ustr1, *ustr2; size_t ulen1, ulen2; @@ -1294,20 +2462,149 @@ out: } -__private_extern__ +typedef struct jopen_cb_info { + off_t jsize; + char *desired_uuid; + struct vnode *jvp; + size_t blksize; + int need_clean; + int need_init; +} jopen_cb_info; + +static int +journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg) +{ + struct nameidata nd; + jopen_cb_info *ji = (jopen_cb_info *)arg; + char bsd_name[256]; + int error; + + strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name)); + strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5); + + if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) { + return 1; // keep iterating + } + + // if we're here, either the desired uuid matched or there was no + // desired uuid so let's try to open the device for writing and + // see if it works. if it does, we'll use it. + + NDINIT(&nd, LOOKUP, OP_LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel()); + if ((error = namei(&nd))) { + printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str); + return 1; // keep iterating + } + + ji->jvp = nd.ni_vp; + nameidone(&nd); + + if (ji->jvp == NULL) { + printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error); + } else { + error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel()); + if (error == 0) { + // if the journal is dirty and we didn't specify a desired + // journal device uuid, then do not use the journal. but + // if the journal is just invalid (e.g. it hasn't been + // initialized) then just set the need_init flag. + if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') { + error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize); + if (error == EBUSY) { + VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel()); + vnode_put(ji->jvp); + ji->jvp = NULL; + return 1; // keep iterating + } else if (error == EINVAL) { + ji->need_init = 1; + } + } + + if (ji->desired_uuid && ji->desired_uuid[0] == '\0') { + strlcpy(ji->desired_uuid, uuid_str, 128); + } + vnode_setmountedon(ji->jvp); + return 0; // stop iterating + } else { + vnode_put(ji->jvp); + ji->jvp = NULL; + } + } + + return 1; // keep iterating +} + +extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg); +kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len); + + +static vnode_t +open_journal_dev(const char *vol_device, + int need_clean, + char *uuid_str, + char *machine_serial_num, + off_t jsize, + size_t blksize, + int *need_init) +{ + int retry_counter=0; + jopen_cb_info ji; + + ji.jsize = jsize; + ji.desired_uuid = uuid_str; + ji.jvp = NULL; + ji.blksize = blksize; + ji.need_clean = need_clean; + ji.need_init = 0; + +// if (uuid_str[0] == '\0') { +// printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device); +// } else { +// printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str); +// } + while (ji.jvp == NULL && retry_counter++ < 4) { + if (retry_counter > 1) { + if (uuid_str[0]) { + printf("hfs: open_journal_dev: uuid %s not found. waiting 10sec.\n", uuid_str); + } else { + printf("hfs: open_journal_dev: no available external journal partition found. waiting 10sec.\n"); + } + delay_for_interval(10* 1000000, NSEC_PER_USEC); // wait for ten seconds and then try again + } + + IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji); + } + + if (ji.jvp == NULL) { + printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n", + vol_device, uuid_str, machine_serial_num); + } + + *need_init = ji.need_init; + + return ji.jvp; +} + + int hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, - void *_args, int embeddedOffset, int mdb_offset, - HFSMasterDirectoryBlock *mdbp, struct ucred *cred) + void *_args, off_t embeddedOffset, daddr64_t mdb_offset, + HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred) { JournalInfoBlock *jibp; struct buf *jinfo_bp, *bp; int sectors_per_fsblock, arg_flags=0, arg_tbufsz=0; - int retval, blksize = hfsmp->hfs_phys_block_size; + int retval, write_jibp = 0; + uint32_t blksize = hfsmp->hfs_logical_block_size; struct vnode *devvp; struct hfs_mount_args *args = _args; - + u_int32_t jib_flags; + u_int64_t jib_offset; + u_int64_t jib_size; + const char *dev_name; + devvp = hfsmp->hfs_devvp; + dev_name = vnode_getname_printable(devvp); if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) { arg_flags = args->journal_flags; @@ -1316,95 +2613,171 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize; - retval = meta_bread(devvp, - embeddedOffset/blksize + - (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock), - SWAP_BE32(vhp->blockSize), cred, &jinfo_bp); - if (retval) - return retval; - - jibp = (JournalInfoBlock *)jinfo_bp->b_data; - jibp->flags = SWAP_BE32(jibp->flags); - jibp->offset = SWAP_BE64(jibp->offset); - jibp->size = SWAP_BE64(jibp->size); + jinfo_bp = NULL; + retval = (int)buf_meta_bread(devvp, + (daddr64_t)((embeddedOffset/blksize) + + ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)), + hfsmp->hfs_physical_block_size, cred, &jinfo_bp); + if (retval) { + if (jinfo_bp) { + buf_brelse(jinfo_bp); + } + goto cleanup_dev_name; + } + + jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp); + jib_flags = SWAP_BE32(jibp->flags); + jib_size = SWAP_BE64(jibp->size); - if (jibp->flags & kJIJournalInFSMask) { + if (jib_flags & kJIJournalInFSMask) { hfsmp->jvp = hfsmp->hfs_devvp; + jib_offset = SWAP_BE64(jibp->offset); } else { - printf("hfs: journal not stored in fs! don't know what to do.\n"); - brelse(jinfo_bp); - return EINVAL; + int need_init=0; + + // if the volume was unmounted cleanly then we'll pick any + // available external journal partition + // + if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) { + *((char *)&jibp->ext_jnl_uuid[0]) = '\0'; + } + + hfsmp->jvp = open_journal_dev(dev_name, + !(jib_flags & kJIJournalNeedInitMask), + (char *)&jibp->ext_jnl_uuid[0], + (char *)&jibp->machine_serial_num[0], + jib_size, + hfsmp->hfs_logical_block_size, + &need_init); + if (hfsmp->jvp == NULL) { + buf_brelse(jinfo_bp); + retval = EROFS; + goto cleanup_dev_name; + } else { + if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) { + strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num)); + } + } + + jib_offset = 0; + write_jibp = 1; + if (need_init) { + jib_flags |= kJIJournalNeedInitMask; + } } // save this off for the hack-y check in hfs_remove() - hfsmp->jnl_start = jibp->offset / SWAP_BE32(vhp->blockSize); + hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize); + hfsmp->jnl_size = jib_size; + + if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) { + // if the file system is read-only, check if the journal is empty. + // if it is, then we can allow the mount. otherwise we have to + // return failure. + retval = journal_is_clean(hfsmp->jvp, + jib_offset + embeddedOffset, + jib_size, + devvp, + hfsmp->hfs_logical_block_size); + + hfsmp->jnl = NULL; + + buf_brelse(jinfo_bp); + + if (retval) { + const char *name = vnode_getname_printable(devvp); + printf("hfs: early journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n", + name); + vnode_putname_printable(name); + } + + goto cleanup_dev_name; + } - if (jibp->flags & kJIJournalNeedInitMask) { + if (jib_flags & kJIJournalNeedInitMask) { printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n", - jibp->offset + (off_t)embeddedOffset, jibp->size); + jib_offset + embeddedOffset, jib_size); hfsmp->jnl = journal_create(hfsmp->jvp, - jibp->offset + (off_t)embeddedOffset, - jibp->size, + jib_offset + embeddedOffset, + jib_size, devvp, blksize, arg_flags, arg_tbufsz, - hfs_sync_metadata, hfsmp->hfs_mp); + hfs_sync_metadata, hfsmp->hfs_mp, + hfsmp->hfs_mp); + if (hfsmp->jnl) + journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp); // no need to start a transaction here... if this were to fail // we'd just re-init it on the next mount. - jibp->flags &= ~kJIJournalNeedInitMask; - jibp->flags = SWAP_BE32(jibp->flags); - bwrite(jinfo_bp); + jib_flags &= ~kJIJournalNeedInitMask; + jibp->flags = SWAP_BE32(jib_flags); + buf_bwrite(jinfo_bp); jinfo_bp = NULL; jibp = NULL; } else { //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n", - // jibp->offset + (off_t)embeddedOffset, - // jibp->size, SWAP_BE32(vhp->blockSize)); + // jib_offset + embeddedOffset, + // jib_size, SWAP_BE32(vhp->blockSize)); hfsmp->jnl = journal_open(hfsmp->jvp, - jibp->offset + (off_t)embeddedOffset, - jibp->size, + jib_offset + embeddedOffset, + jib_size, devvp, blksize, arg_flags, arg_tbufsz, - hfs_sync_metadata, hfsmp->hfs_mp); - - brelse(jinfo_bp); + hfs_sync_metadata, hfsmp->hfs_mp, + hfsmp->hfs_mp); + if (hfsmp->jnl) + journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp); + + if (write_jibp) { + buf_bwrite(jinfo_bp); + } else { + buf_brelse(jinfo_bp); + } jinfo_bp = NULL; jibp = NULL; if (hfsmp->jnl && mdbp) { // reload the mdb because it could have changed // if the journal had to be replayed. - retval = meta_bread(devvp, mdb_offset, blksize, cred, &bp); + if (mdb_offset == 0) { + mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize)); + } + bp = NULL; + retval = (int)buf_meta_bread(devvp, + HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, cred, &bp); if (retval) { - brelse(bp); + if (bp) { + buf_brelse(bp); + } printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n", retval); - return retval; + goto cleanup_dev_name; } - bcopy(bp->b_data + HFS_PRI_OFFSET(blksize), mdbp, 512); - brelse(bp); + bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512); + buf_brelse(bp); bp = NULL; } } - - //printf("journal @ 0x%x\n", hfsmp->jnl); - // if we expected the journal to be there and we couldn't // create it or open it then we have to bail out. if (hfsmp->jnl == NULL) { - hfsmp->jnl_start = 0; - - printf("hfs: failed to open/create the journal (retval %d).\n", retval); - return EINVAL; + printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval); + retval = EINVAL; + goto cleanup_dev_name; } - return 0; + retval = 0; + +cleanup_dev_name: + vnode_putname_printable(dev_name); + return retval; } @@ -1426,15 +2799,18 @@ static int hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args) { JournalInfoBlock *jibp; - struct buf *jinfo_bp, *bp; + struct buf *jinfo_bp; int sectors_per_fsblock, arg_flags=0, arg_tbufsz=0; - int retval, need_flush = 0, write_jibp = 0; + int retval, write_jibp = 0, recreate_journal = 0; struct vnode *devvp; struct cat_attr jib_attr, jattr; struct cat_fork jib_fork, jfork; ExtendedVCB *vcb; - u_long fid; + u_int32_t fid; struct hfs_mount_args *args = _args; + u_int32_t jib_flags; + u_int64_t jib_offset; + u_int64_t jib_size; devvp = hfsmp->hfs_devvp; vcb = HFSTOVCB(hfsmp); @@ -1464,79 +2840,144 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a vcb->vcbJinfoBlock = jib_fork.cf_extents[0].startBlock; vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock); + recreate_journal = 1; } - sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_phys_block_size; - retval = meta_bread(devvp, - vcb->hfsPlusIOPosOffset / hfsmp->hfs_phys_block_size + - (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock), - SWAP_BE32(vhp->blockSize), NOCRED, &jinfo_bp); + sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size; + jinfo_bp = NULL; + retval = (int)buf_meta_bread(devvp, + (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size + + ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)), + hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp); if (retval) { + if (jinfo_bp) { + buf_brelse(jinfo_bp); + } printf("hfs: can't read journal info block. disabling journaling.\n"); vcb->vcbAtrb &= ~kHFSVolumeJournaledMask; return 0; } - jibp = (JournalInfoBlock *)jinfo_bp->b_data; - jibp->flags = SWAP_BE32(jibp->flags); - jibp->offset = SWAP_BE64(jibp->offset); - jibp->size = SWAP_BE64(jibp->size); + jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp); + jib_flags = SWAP_BE32(jibp->flags); + jib_offset = SWAP_BE64(jibp->offset); + jib_size = SWAP_BE64(jibp->size); fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork); if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) { printf("hfs: can't find the journal file! disabling journaling (start: %d)\n", jfork.cf_extents[0].startBlock); - brelse(jinfo_bp); + buf_brelse(jinfo_bp); vcb->vcbAtrb &= ~kHFSVolumeJournaledMask; return 0; } hfsmp->hfs_jnlfileid = fid; // make sure the journal file begins where we think it should. - if ((jibp->offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) { + if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) { printf("hfs: The journal file moved (was: %lld; is: %d). Fixing up\n", - (jibp->offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock); + (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock); - jibp->offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize; + jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize; write_jibp = 1; + recreate_journal = 1; } // check the size of the journal file. - if (jibp->size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) { + if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) { printf("hfs: The journal file changed size! (was %lld; is %lld). Fixing up.\n", - jibp->size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize); + jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize); - jibp->size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize; + jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize; write_jibp = 1; + recreate_journal = 1; } - if (jibp->flags & kJIJournalInFSMask) { + if (jib_flags & kJIJournalInFSMask) { hfsmp->jvp = hfsmp->hfs_devvp; + jib_offset += (off_t)vcb->hfsPlusIOPosOffset; } else { - printf("hfs: journal not stored in fs! don't know what to do.\n"); - brelse(jinfo_bp); - return EINVAL; + const char *dev_name; + int need_init = 0; + + dev_name = vnode_getname_printable(devvp); + + // since the journal is empty, just use any available external journal + *((char *)&jibp->ext_jnl_uuid[0]) = '\0'; + + // this fills in the uuid of the device we actually get + hfsmp->jvp = open_journal_dev(dev_name, + !(jib_flags & kJIJournalNeedInitMask), + (char *)&jibp->ext_jnl_uuid[0], + (char *)&jibp->machine_serial_num[0], + jib_size, + hfsmp->hfs_logical_block_size, + &need_init); + if (hfsmp->jvp == NULL) { + buf_brelse(jinfo_bp); + vnode_putname_printable(dev_name); + return EROFS; + } else { + if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) { + strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num)); + } + } + jib_offset = 0; + recreate_journal = 1; + write_jibp = 1; + if (need_init) { + jib_flags |= kJIJournalNeedInitMask; + } + vnode_putname_printable(dev_name); } // save this off for the hack-y check in hfs_remove() - hfsmp->jnl_start = jibp->offset / SWAP_BE32(vhp->blockSize); + hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize); + hfsmp->jnl_size = jib_size; + + if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) { + // if the file system is read-only, check if the journal is empty. + // if it is, then we can allow the mount. otherwise we have to + // return failure. + retval = journal_is_clean(hfsmp->jvp, + jib_offset, + jib_size, + devvp, + hfsmp->hfs_logical_block_size); + + hfsmp->jnl = NULL; + + buf_brelse(jinfo_bp); + + if (retval) { + const char *name = vnode_getname_printable(devvp); + printf("hfs: late journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n", + name); + vnode_putname_printable(name); + } + + return retval; + } - if (jibp->flags & kJIJournalNeedInitMask) { + if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) { printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n", - jibp->offset + (off_t)vcb->hfsPlusIOPosOffset, jibp->size); + jib_offset, jib_size); hfsmp->jnl = journal_create(hfsmp->jvp, - jibp->offset + (off_t)vcb->hfsPlusIOPosOffset, - jibp->size, + jib_offset, + jib_size, devvp, - hfsmp->hfs_phys_block_size, + hfsmp->hfs_logical_block_size, arg_flags, arg_tbufsz, - hfs_sync_metadata, hfsmp->hfs_mp); + hfs_sync_metadata, hfsmp->hfs_mp, + hfsmp->hfs_mp); + if (hfsmp->jnl) + journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp); // no need to start a transaction here... if this were to fail // we'd just re-init it on the next mount. - jibp->flags &= ~kJIJournalNeedInitMask; + jib_flags &= ~kJIJournalNeedInitMask; write_jibp = 1; } else { @@ -1551,42 +2992,1044 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a arg_flags |= JOURNAL_RESET; //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n", - // jibp->offset + (off_t)vcb->hfsPlusIOPosOffset, - // jibp->size, SWAP_BE32(vhp->blockSize)); + // jib_offset, + // jib_size, SWAP_BE32(vhp->blockSize)); hfsmp->jnl = journal_open(hfsmp->jvp, - jibp->offset + (off_t)vcb->hfsPlusIOPosOffset, - jibp->size, + jib_offset, + jib_size, devvp, - hfsmp->hfs_phys_block_size, + hfsmp->hfs_logical_block_size, arg_flags, arg_tbufsz, - hfs_sync_metadata, hfsmp->hfs_mp); + hfs_sync_metadata, hfsmp->hfs_mp, + hfsmp->hfs_mp); + if (hfsmp->jnl) + journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp); } if (write_jibp) { - jibp->flags = SWAP_BE32(jibp->flags); - jibp->offset = SWAP_BE64(jibp->offset); - jibp->size = SWAP_BE64(jibp->size); + jibp->flags = SWAP_BE32(jib_flags); + jibp->offset = SWAP_BE64(jib_offset); + jibp->size = SWAP_BE64(jib_size); - bwrite(jinfo_bp); + buf_bwrite(jinfo_bp); } else { - brelse(jinfo_bp); + buf_brelse(jinfo_bp); } jinfo_bp = NULL; jibp = NULL; - //printf("journal @ 0x%x\n", hfsmp->jnl); - // if we expected the journal to be there and we couldn't // create it or open it then we have to bail out. if (hfsmp->jnl == NULL) { - hfsmp->jnl_start = 0; - - printf("hfs: failed to open/create the journal (retval %d).\n", retval); + printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval); return EINVAL; } return 0; } + +/* + * Calculate the allocation zone for metadata. + * + * This zone includes the following: + * Allocation Bitmap file + * Overflow Extents file + * Journal file + * Quota files + * Clustered Hot files + * Catalog file + * + * METADATA ALLOCATION ZONE + * ____________________________________________________________________________ + * | | | | | | | + * | BM | JF | OEF | CATALOG |---> | HOT FILES | + * |____|____|_____|_______________|______________________________|___________| + * + * <------------------------------- N * 128 MB -------------------------------> + * + */ +#define GIGABYTE (u_int64_t)(1024*1024*1024) + +#define OVERFLOW_DEFAULT_SIZE (4*1024*1024) +#define OVERFLOW_MAXIMUM_SIZE (128*1024*1024) +#define JOURNAL_DEFAULT_SIZE (8*1024*1024) +#define JOURNAL_MAXIMUM_SIZE (512*1024*1024) +#define HOTBAND_MINIMUM_SIZE (10*1024*1024) +#define HOTBAND_MAXIMUM_SIZE (512*1024*1024) + +/* Initialize the metadata zone. + * + * If the size of the volume is less than the minimum size for + * metadata zone, metadata zone is disabled. + * + * If disable is true, disable metadata zone unconditionally. + */ +void +hfs_metadatazone_init(struct hfsmount *hfsmp, int disable) +{ + ExtendedVCB *vcb; + u_int64_t fs_size; + u_int64_t zonesize; + u_int64_t temp; + u_int64_t filesize; + u_int32_t blk; + int items, really_do_it=1; + + vcb = HFSTOVCB(hfsmp); + fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit; + + /* + * For volumes less than 10 GB, don't bother. + */ + if (fs_size < ((u_int64_t)10 * GIGABYTE)) { + really_do_it = 0; + } + + /* + * Skip non-journaled volumes as well. + */ + if (hfsmp->jnl == NULL) { + really_do_it = 0; + } + + /* If caller wants to disable metadata zone, do it */ + if (disable == true) { + really_do_it = 0; + } + + /* + * Start with space for the boot blocks and Volume Header. + * 1536 = byte offset from start of volume to end of volume header: + * 1024 bytes is the offset from the start of the volume to the + * start of the volume header (defined by the volume format) + * + 512 bytes (the size of the volume header). + */ + zonesize = roundup(1536, hfsmp->blockSize); + + /* + * Add the on-disk size of allocation bitmap. + */ + zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize; + + /* + * Add space for the Journal Info Block and Journal (if they're in + * this file system). + */ + if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) { + zonesize += hfsmp->blockSize + hfsmp->jnl_size; + } + + /* + * Add the existing size of the Extents Overflow B-tree. + * (It rarely grows, so don't bother reserving additional room for it.) + */ + zonesize += hfs_blk_to_bytes(hfsmp->hfs_extents_cp->c_datafork->ff_blocks, hfsmp->blockSize); + + /* + * If there is an Attributes B-tree, leave room for 11 clumps worth. + * newfs_hfs allocates one clump, and leaves a gap of 10 clumps. + * When installing a full OS install onto a 20GB volume, we use + * 7 to 8 clumps worth of space (depending on packages), so that leaves + * us with another 3 or 4 clumps worth before we need another extent. + */ + if (hfsmp->hfs_attribute_cp) { + zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize; + } + + /* + * Leave room for 11 clumps of the Catalog B-tree. + * Again, newfs_hfs allocates one clump plus a gap of 10 clumps. + * When installing a full OS install onto a 20GB volume, we use + * 7 to 8 clumps worth of space (depending on packages), so that leaves + * us with another 3 or 4 clumps worth before we need another extent. + */ + zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize; + + /* + * Add space for hot file region. + * + * ...for now, use 5 MB per 1 GB (0.5 %) + */ + filesize = (fs_size / 1024) * 5; + if (filesize > HOTBAND_MAXIMUM_SIZE) + filesize = HOTBAND_MAXIMUM_SIZE; + else if (filesize < HOTBAND_MINIMUM_SIZE) + filesize = HOTBAND_MINIMUM_SIZE; + /* + * Calculate user quota file requirements. + */ + if (hfsmp->hfs_flags & HFS_QUOTAS) { + items = QF_USERS_PER_GB * (fs_size / GIGABYTE); + if (items < QF_MIN_USERS) + items = QF_MIN_USERS; + else if (items > QF_MAX_USERS) + items = QF_MAX_USERS; + if (!powerof2(items)) { + int x = items; + items = 4; + while (x>>1 != 1) { + x = x >> 1; + items = items << 1; + } + } + filesize += (items + 1) * sizeof(struct dqblk); + /* + * Calculate group quota file requirements. + * + */ + items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE); + if (items < QF_MIN_GROUPS) + items = QF_MIN_GROUPS; + else if (items > QF_MAX_GROUPS) + items = QF_MAX_GROUPS; + if (!powerof2(items)) { + int x = items; + items = 4; + while (x>>1 != 1) { + x = x >> 1; + items = items << 1; + } + } + filesize += (items + 1) * sizeof(struct dqblk); + } + zonesize += filesize; + + /* + * Round up entire zone to a bitmap block's worth. + * The extra space goes to the catalog file and hot file area. + */ + temp = zonesize; + zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize); + hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize; + /* + * If doing the round up for hfs_min_alloc_start would push us past + * allocLimit, then just reset it back to 0. Though using a value + * bigger than allocLimit would not cause damage in the block allocator + * code, this value could get stored in the volume header and make it out + * to disk, making the volume header technically corrupt. + */ + if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) { + hfsmp->hfs_min_alloc_start = 0; + } + + if (really_do_it == 0) { + /* If metadata zone needs to be disabled because the + * volume was truncated, clear the bit and zero out + * the values that are no longer needed. + */ + if (hfsmp->hfs_flags & HFS_METADATA_ZONE) { + /* Disable metadata zone */ + hfsmp->hfs_flags &= ~HFS_METADATA_ZONE; + + /* Zero out mount point values that are not required */ + hfsmp->hfs_catalog_maxblks = 0; + hfsmp->hfs_hotfile_maxblks = 0; + hfsmp->hfs_hotfile_start = 0; + hfsmp->hfs_hotfile_end = 0; + hfsmp->hfs_hotfile_freeblks = 0; + hfsmp->hfs_metazone_start = 0; + hfsmp->hfs_metazone_end = 0; + } + + return; + } + + temp = zonesize - temp; /* temp has extra space */ + filesize += temp / 3; + hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize; + + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + hfsmp->hfs_hotfile_maxblks = (uint32_t) (hfsmp->hfs_cs_hotfile_size / HFSTOVCB(hfsmp)->blockSize); + } else { + hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize; + } + + /* Convert to allocation blocks. */ + blk = zonesize / vcb->blockSize; + + /* The default metadata zone location is at the start of volume. */ + hfsmp->hfs_metazone_start = 1; + hfsmp->hfs_metazone_end = blk - 1; + + /* The default hotfile area is at the end of the zone. */ + if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) { + hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize); + hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end; + hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp); + } + else { + hfsmp->hfs_hotfile_start = 0; + hfsmp->hfs_hotfile_end = 0; + hfsmp->hfs_hotfile_freeblks = 0; + } +#if DEBUG + printf("hfs:%s: metadata zone is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end); + printf("hfs:%s: hot file band is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end); + printf("hfs:%s: hot file band free blocks = %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_freeblks); +#endif + + hfsmp->hfs_flags |= HFS_METADATA_ZONE; +} + + +static u_int32_t +hfs_hotfile_freeblocks(struct hfsmount *hfsmp) +{ + ExtendedVCB *vcb = HFSTOVCB(hfsmp); + int lockflags; + int freeblocks; + + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + // + // This is only used at initialization time and on an ssd + // we'll get the real info from the hotfile btree user + // info + // + return 0; + } + + lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + freeblocks = MetaZoneFreeBlocks(vcb); + hfs_systemfile_unlock(hfsmp, lockflags); + + /* Minus Extents overflow file reserve. */ + if ((uint32_t)hfsmp->hfs_overflow_maxblks >= VTOF(hfsmp->hfs_extents_vp)->ff_blocks) { + freeblocks -= hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks; + } + + /* Minus catalog file reserve. */ + if ((uint32_t)hfsmp->hfs_catalog_maxblks >= VTOF(hfsmp->hfs_catalog_vp)->ff_blocks) { + freeblocks -= hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks; + } + + if (freeblocks < 0) + freeblocks = 0; + + // printf("hfs: hotfile_freeblocks: MIN(%d, %d) = %d\n", freeblocks, hfsmp->hfs_hotfile_maxblks, MIN(freeblocks, hfsmp->hfs_hotfile_maxblks)); + return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks); +} + +/* + * Determine if a file is a "virtual" metadata file. + * This includes journal and quota files. + */ +int +hfs_virtualmetafile(struct cnode *cp) +{ + const char * filename; + + + if (cp->c_parentcnid != kHFSRootFolderID) + return (0); + + filename = (const char *)cp->c_desc.cd_nameptr; + if (filename == NULL) + return (0); + + if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) || + (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) || + (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) || + (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) || + (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0)) + return (1); + + return (0); +} + +__private_extern__ +void hfs_syncer_lock(struct hfsmount *hfsmp) +{ + hfs_lock_mount(hfsmp); +} + +__private_extern__ +void hfs_syncer_unlock(struct hfsmount *hfsmp) +{ + hfs_unlock_mount(hfsmp); +} + +__private_extern__ +void hfs_syncer_wait(struct hfsmount *hfsmp) +{ + msleep(&hfsmp->hfs_sync_incomplete, &hfsmp->hfs_mutex, PWAIT, + "hfs_syncer_wait", NULL); +} + +__private_extern__ +void hfs_syncer_wakeup(struct hfsmount *hfsmp) +{ + wakeup(&hfsmp->hfs_sync_incomplete); +} + +__private_extern__ +uint64_t hfs_usecs_to_deadline(uint64_t usecs) +{ + uint64_t deadline; + clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline); + return deadline; +} + +__private_extern__ +void hfs_syncer_queue(thread_call_t syncer) +{ + if (thread_call_enter_delayed_with_leeway(syncer, + NULL, + hfs_usecs_to_deadline(HFS_META_DELAY), + 0, + THREAD_CALL_DELAY_SYS_BACKGROUND)) { + printf("hfs: syncer already scheduled!\n"); + } +} + +// +// Fire off a timed callback to sync the disk if the +// volume is on ejectable media. +// + __private_extern__ +void +hfs_sync_ejectable(struct hfsmount *hfsmp) +{ + // If we don't have a syncer or we get called by the syncer, just return + if (!hfsmp->hfs_syncer || current_thread() == hfsmp->hfs_syncer_thread) + return; + + hfs_syncer_lock(hfsmp); + + if (!timerisset(&hfsmp->hfs_sync_req_oldest)) + microuptime(&hfsmp->hfs_sync_req_oldest); + + /* If hfs_unmount is running, it will set hfs_syncer to NULL. Also we + don't want to queue again if there is a sync outstanding. */ + if (!hfsmp->hfs_syncer || hfsmp->hfs_sync_incomplete) { + hfs_syncer_unlock(hfsmp); + return; + } + + hfsmp->hfs_sync_incomplete = TRUE; + + thread_call_t syncer = hfsmp->hfs_syncer; + + hfs_syncer_unlock(hfsmp); + + hfs_syncer_queue(syncer); +} + +int +hfs_start_transaction(struct hfsmount *hfsmp) +{ + int ret = 0, unlock_on_err = 0; + thread_t thread = current_thread(); + +#ifdef HFS_CHECK_LOCK_ORDER + /* + * You cannot start a transaction while holding a system + * file lock. (unless the transaction is nested.) + */ + if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) { + if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) { + panic("hfs_start_transaction: bad lock order (cat before jnl)\n"); + } + if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) { + panic("hfs_start_transaction: bad lock order (attr before jnl)\n"); + } + if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) { + panic("hfs_start_transaction: bad lock order (ext before jnl)\n"); + } + } +#endif /* HFS_CHECK_LOCK_ORDER */ + +again: + + if (hfsmp->jnl) { + if (journal_owner(hfsmp->jnl) != thread) { + /* + * The global lock should be held shared if journal is + * active to prevent disabling. If we're not the owner + * of the journal lock, verify that we're not already + * holding the global lock exclusive before moving on. + */ + if (hfsmp->hfs_global_lockowner == thread) { + ret = EBUSY; + goto out; + } + + hfs_lock_global (hfsmp, HFS_SHARED_LOCK); + + // Things could have changed + if (!hfsmp->jnl) { + hfs_unlock_global(hfsmp); + goto again; + } + + OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads); + unlock_on_err = 1; + } + } else { + // No journal + if (hfsmp->hfs_global_lockowner != thread) { + hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK); + + // Things could have changed + if (hfsmp->jnl) { + hfs_unlock_global(hfsmp); + goto again; + } + + OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads); + unlock_on_err = 1; + } + } + + /* If a downgrade to read-only mount is in progress, no other + * thread than the downgrade thread is allowed to modify + * the file system. + */ + if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) && + hfsmp->hfs_downgrading_thread != thread) { + ret = EROFS; + goto out; + } + + if (hfsmp->jnl) { + ret = journal_start_transaction(hfsmp->jnl); + } else { + ret = 0; + } + + if (ret == 0) + ++hfsmp->hfs_transaction_nesting; + +out: + if (ret != 0 && unlock_on_err) { + hfs_unlock_global (hfsmp); + OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads); + } + + return ret; +} + +int +hfs_end_transaction(struct hfsmount *hfsmp) +{ + int ret; + + assert(!hfsmp->jnl || journal_owner(hfsmp->jnl) == current_thread()); + assert(hfsmp->hfs_transaction_nesting > 0); + + if (hfsmp->jnl && hfsmp->hfs_transaction_nesting == 1) + hfs_flushvolumeheader(hfsmp, HFS_FVH_FLUSH_IF_DIRTY); + + bool need_unlock = !--hfsmp->hfs_transaction_nesting; + + if (hfsmp->jnl) { + ret = journal_end_transaction(hfsmp->jnl); + } else { + ret = 0; + } + + if (need_unlock) { + OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads); + hfs_unlock_global (hfsmp); + hfs_sync_ejectable(hfsmp); + } + + return ret; +} + + +void +hfs_journal_lock(struct hfsmount *hfsmp) +{ + /* Only peek at hfsmp->jnl while holding the global lock */ + hfs_lock_global (hfsmp, HFS_SHARED_LOCK); + if (hfsmp->jnl) { + journal_lock(hfsmp->jnl); + } + hfs_unlock_global (hfsmp); +} + +void +hfs_journal_unlock(struct hfsmount *hfsmp) +{ + /* Only peek at hfsmp->jnl while holding the global lock */ + hfs_lock_global (hfsmp, HFS_SHARED_LOCK); + if (hfsmp->jnl) { + journal_unlock(hfsmp->jnl); + } + hfs_unlock_global (hfsmp); +} + +/* + * Flush the contents of the journal to the disk. + * + * - HFS_FLUSH_JOURNAL + * Wait to write in-memory journal to the disk consistently. + * This means that the journal still contains uncommitted + * transactions and the file system metadata blocks in + * the journal transactions might be written asynchronously + * to the disk. But there is no guarantee that they are + * written to the disk before returning to the caller. + * Note that this option is sufficient for file system + * data integrity as it guarantees consistent journal + * content on the disk. + * + * - HFS_FLUSH_JOURNAL_META + * Wait to write in-memory journal to the disk + * consistently, and also wait to write all asynchronous + * metadata blocks to its corresponding locations + * consistently on the disk. This is overkill in normal + * scenarios but is useful whenever the metadata blocks + * are required to be consistent on-disk instead of + * just the journalbeing consistent; like before live + * verification and live volume resizing. The update of the + * metadata doesn't include a barrier of track cache flush. + * + * - HFS_FLUSH_FULL + * HFS_FLUSH_JOURNAL + force a track cache flush to media + * + * - HFS_FLUSH_CACHE + * Force a track cache flush to media. + * + * - HFS_FLUSH_BARRIER + * Barrier-only flush to ensure write order + * + */ +errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode) +{ + errno_t error = 0; + journal_flush_options_t options = 0; + dk_synchronize_t sync_req = { .options = DK_SYNCHRONIZE_OPTION_BARRIER }; + + switch (mode) { + case HFS_FLUSH_JOURNAL_META: + // wait for journal, metadata blocks and previous async flush to finish + SET(options, JOURNAL_WAIT_FOR_IO); + + // no break + + case HFS_FLUSH_JOURNAL: + case HFS_FLUSH_JOURNAL_BARRIER: + case HFS_FLUSH_FULL: + + if (mode == HFS_FLUSH_JOURNAL_BARRIER && + !(hfsmp->hfs_flags & HFS_FEATURE_BARRIER)) + mode = HFS_FLUSH_FULL; + + if (mode == HFS_FLUSH_FULL) + SET(options, JOURNAL_FLUSH_FULL); + + /* Only peek at hfsmp->jnl while holding the global lock */ + hfs_lock_global (hfsmp, HFS_SHARED_LOCK); + + if (hfsmp->jnl) + error = journal_flush(hfsmp->jnl, options); + + hfs_unlock_global (hfsmp); + + /* + * This may result in a double barrier as + * journal_flush may have issued a barrier itself + */ + if (mode == HFS_FLUSH_JOURNAL_BARRIER) + error = VNOP_IOCTL(hfsmp->hfs_devvp, + DKIOCSYNCHRONIZE, (caddr_t)&sync_req, + FWRITE, vfs_context_kernel()); + + break; + + case HFS_FLUSH_CACHE: + // Do a full sync + sync_req.options = 0; + + // no break + + case HFS_FLUSH_BARRIER: + // If barrier only flush doesn't support, fall back to use full flush. + if (!(hfsmp->hfs_flags & HFS_FEATURE_BARRIER)) + sync_req.options = 0; + + error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZE, (caddr_t)&sync_req, + FWRITE, vfs_context_kernel()); + break; + + default: + error = EINVAL; + } + + return error; +} + +/* + * hfs_erase_unused_nodes + * + * Check wheter a volume may suffer from unused Catalog B-tree nodes that + * are not zeroed (due to ). If so, just write + * zeroes to the unused nodes. + * + * How do we detect when a volume needs this repair? We can't always be + * certain. If a volume was created after a certain date, then it may have + * been created with the faulty newfs_hfs. Since newfs_hfs only created one + * clump, we can assume that if a Catalog B-tree is larger than its clump size, + * that means that the entire first clump must have been written to, which means + * there shouldn't be unused and unwritten nodes in that first clump, and this + * repair is not needed. + * + * We have defined a bit in the Volume Header's attributes to indicate when the + * unused nodes have been repaired. A newer newfs_hfs will set this bit. + * As will fsck_hfs when it repairs the unused nodes. + */ +int hfs_erase_unused_nodes(struct hfsmount *hfsmp) +{ + int result; + struct filefork *catalog; + int lockflags; + + if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask) + { + /* This volume has already been checked and repaired. */ + return 0; + } + + if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate)) + { + /* This volume is too old to have had the problem. */ + hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask; + return 0; + } + + catalog = hfsmp->hfs_catalog_cp->c_datafork; + if (catalog->ff_size > catalog->ff_clumpsize) + { + /* The entire first clump must have been in use at some point. */ + hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask; + return 0; + } + + /* + * If we get here, we need to zero out those unused nodes. + * + * We start a transaction and lock the catalog since we're going to be + * making on-disk changes. But note that BTZeroUnusedNodes doens't actually + * do its writing via the journal, because that would be too much I/O + * to fit in a transaction, and it's a pain to break it up into multiple + * transactions. (It behaves more like growing a B-tree would.) + */ + printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN); + result = hfs_start_transaction(hfsmp); + if (result) + goto done; + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + result = BTZeroUnusedNodes(catalog); + vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes"); + hfs_systemfile_unlock(hfsmp, lockflags); + hfs_end_transaction(hfsmp); + if (result == 0) + hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask; + printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN); + +done: + return result; +} + + +extern time_t snapshot_timestamp; + +int +check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg) +{ + int snapshot_error = 0; + + if (vp == NULL) { + return 0; + } + + /* Swap files are special; skip them */ + if (vnode_isswap(vp)) { + return 0; + } + + if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) { + // the change time is within this epoch + int error; + + error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg); + if (error == EDEADLK) { + snapshot_error = 0; + } else if (error) { + if (error == EAGAIN) { + printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n"); + } else if (error == EINTR) { + // printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n"); + snapshot_error = EINTR; + } + } + } + + if (snapshot_error) return snapshot_error; + + return 0; +} + +int +check_for_dataless_file(struct vnode *vp, uint64_t op_type) +{ + int error; + + if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) { + // there's nothing to do, it's not dataless + return 0; + } + + /* Swap files are special; ignore them */ + if (vnode_isswap(vp)) { + return 0; + } + + // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp); + error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT); + if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) { + error = 0; + } else if (error) { + if (error == EAGAIN) { + printf("hfs: dataless: timed out waiting for namespace handler...\n"); + // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)? + return 0; + } else if (error == EINTR) { + // printf("hfs: dataless: got a signal while waiting for namespace handler...\n"); + return EINTR; + } + } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) { + // + // if we're here, the dataless bit is still set on the file + // which means it didn't get handled. we return an error + // but it's presently ignored by all callers of this function. + // + // XXXdbg - EDATANOTPRESENT is what we really need... + // + return EBADF; + } + + return error; +} + + +// +// NOTE: this function takes care of starting a transaction and +// acquiring the systemfile lock so that it can call +// cat_update(). +// +// NOTE: do NOT hold and cnode locks while calling this function +// to avoid deadlocks (because we take a lock on the root +// cnode) +// +int +hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid) +{ + struct vnode *rvp; + struct cnode *cp; + int error; + + error = VFS_ROOT(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel()); + if (error) { + return error; + } + + cp = VTOC(rvp); + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) { + return error; + } + struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16)); + + int lockflags; + if ((error = hfs_start_transaction(hfsmp)) != 0) { + return error; + } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + + if (extinfo->document_id == 0) { + // initialize this to start at 3 (one greater than the root-dir id) + extinfo->document_id = 3; + } + + *docid = extinfo->document_id++; + + // mark the root cnode dirty + cp->c_flag |= C_MODIFIED; + hfs_update(cp->c_vp, 0); + + hfs_systemfile_unlock (hfsmp, lockflags); + (void) hfs_end_transaction(hfsmp); + + (void) hfs_unlock(cp); + + vnode_put(rvp); + rvp = NULL; + + return 0; +} + + +/* + * Return information about number of file system allocation blocks + * taken by metadata on a volume. + * + * This function populates struct hfsinfo_metadata with allocation blocks + * used by extents overflow btree, catalog btree, bitmap, attribute btree, + * journal file, and sum of all of the above. + */ +int +hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo) +{ + int lockflags = 0; + int ret_lockflags = 0; + + /* Zero out the output buffer */ + bzero(hinfo, sizeof(struct hfsinfo_metadata)); + + /* + * Getting number of allocation blocks for all btrees + * should be a quick operation, so we grab locks for + * all of them at the same time + */ + lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE; + ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); + /* + * Make sure that we were able to acquire all locks requested + * to protect us against conditions like unmount in progress. + */ + if ((lockflags & ret_lockflags) != lockflags) { + /* Release any locks that were acquired */ + hfs_systemfile_unlock(hfsmp, ret_lockflags); + return EPERM; + } + + /* Get information about all the btrees */ + hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks; + hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks; + hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks; + hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks; + + /* Done with btrees, give up the locks */ + hfs_systemfile_unlock(hfsmp, ret_lockflags); + + /* Get information about journal file */ + hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize); + + /* Calculate total number of metadata blocks */ + hinfo->total = hinfo->extents + hinfo->catalog + + hinfo->allocation + hinfo->attribute + + hinfo->journal; + + return 0; +} + +static int +hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs) +{ + vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8"); + + return 0; +} + +__private_extern__ +int hfs_freeze(struct hfsmount *hfsmp) +{ + // First make sure some other process isn't freezing + hfs_lock_mount(hfsmp); + while (hfsmp->hfs_freeze_state != HFS_THAWED) { + if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex, + PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) { + hfs_unlock_mount(hfsmp); + return EINTR; + } + } + + // Stop new syncers from starting + hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE; + + // Now wait for all syncers to finish + while (hfsmp->hfs_syncers) { + if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex, + PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) { + hfs_thaw_locked(hfsmp); + hfs_unlock_mount(hfsmp); + return EINTR; + } + } + hfs_unlock_mount(hfsmp); + + // flush things before we get started to try and prevent + // dirty data from being paged out while we're frozen. + // note: we can't do this once we're in the freezing state because + // other threads will need to take the global lock + vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL); + + // Block everything in hfs_lock_global now + hfs_lock_mount(hfsmp); + hfsmp->hfs_freeze_state = HFS_FREEZING; + hfsmp->hfs_freezing_thread = current_thread(); + hfs_unlock_mount(hfsmp); + + /* Take the exclusive lock to flush out anything else that + might have the global lock at the moment and also so we + can flush the journal. */ + hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK); + journal_flush(hfsmp->jnl, JOURNAL_WAIT_FOR_IO); + hfs_unlock_global(hfsmp); + + // don't need to iterate on all vnodes, we just need to + // wait for writes to the system files and the device vnode + // + // Now that journal flush waits for all metadata blocks to + // be written out, waiting for btree writes is probably no + // longer required. + if (HFSTOVCB(hfsmp)->extentsRefNum) + vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3"); + if (HFSTOVCB(hfsmp)->catalogRefNum) + vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4"); + if (HFSTOVCB(hfsmp)->allocationsRefNum) + vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5"); + if (hfsmp->hfs_attribute_vp) + vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6"); + vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7"); + + // We're done, mark frozen + hfs_lock_mount(hfsmp); + hfsmp->hfs_freeze_state = HFS_FROZEN; + hfsmp->hfs_freezing_proc = current_proc(); + hfs_unlock_mount(hfsmp); + + return 0; +} + +__private_extern__ +int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process) +{ + hfs_lock_mount(hfsmp); + + if (hfsmp->hfs_freeze_state != HFS_FROZEN) { + hfs_unlock_mount(hfsmp); + return EINVAL; + } + if (process && hfsmp->hfs_freezing_proc != process) { + hfs_unlock_mount(hfsmp); + return EPERM; + } + + hfs_thaw_locked(hfsmp); + + hfs_unlock_mount(hfsmp); + + return 0; +} + +static void hfs_thaw_locked(struct hfsmount *hfsmp) +{ + hfsmp->hfs_freezing_proc = NULL; + hfsmp->hfs_freeze_state = HFS_THAWED; + + wakeup(&hfsmp->hfs_freeze_state); +}