X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/39236c6e673c41db228275375ab7fdb0f837b292..2dced7af2b695f87fe26496a3e73c219b7880cbc:/bsd/hfs/hfs_vfsutils.c diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c index f67adacc4..1015fbd91 100644 --- a/bsd/hfs/hfs_vfsutils.c +++ b/bsd/hfs/hfs_vfsutils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -49,16 +49,13 @@ #include #include #include +#include #include /* for parsing boot-args */ #include -#if CONFIG_PROTECT -#include -#endif - #include "hfs.h" #include "hfs_catalog.h" #include "hfs_dbg.h" @@ -66,15 +63,20 @@ #include "hfs_endian.h" #include "hfs_cnode.h" #include "hfs_fsctl.h" +#include "hfs_cprotect.h" #include "hfscommon/headers/FileMgrInternal.h" #include "hfscommon/headers/BTreesInternal.h" #include "hfscommon/headers/HFSUnicodeWrappers.h" +/* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */ +extern int hfs_resize_debug; + static void ReleaseMetaFileVNode(struct vnode *vp); static int hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args); static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *); +static void hfs_thaw_locked(struct hfsmount *hfsmp); #define HFS_MOUNT_DEBUG 1 @@ -165,8 +167,12 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size); vcb->vcbVBMIOSize = kHFSBlockSize; - hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, + /* Generate the partition-based AVH location */ + hfsmp->hfs_partition_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count); + + /* HFS standard is read-only, so just stuff the FS location in here, too */ + hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector; bzero(&cndesc, sizeof(cndesc)); cndesc.cd_parentcnid = kHFSRootParentID; @@ -311,6 +317,62 @@ MtVolErr: #endif +//******************************************************************************* +// +// Sanity check Volume Header Block: +// Input argument *vhp is a pointer to a HFSPlusVolumeHeader block that has +// not been endian-swapped and represents the on-disk contents of this sector. +// This routine will not change the endianness of vhp block. +// +//******************************************************************************* +OSErr hfs_ValidateHFSPlusVolumeHeader(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp) +{ + u_int16_t signature; + u_int16_t hfs_version; + u_int32_t blockSize; + + signature = SWAP_BE16(vhp->signature); + hfs_version = SWAP_BE16(vhp->version); + + if (signature == kHFSPlusSigWord) { + if (hfs_version != kHFSPlusVersion) { + printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFS+ version: %x\n", hfs_version); + return (EINVAL); + } + } else if (signature == kHFSXSigWord) { + if (hfs_version != kHFSXVersion) { + printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFSX version: %x\n", hfs_version); + return (EINVAL); + } + } else { + /* Removed printf for invalid HFS+ signature because it gives + * false error for UFS root volume + */ + if (HFS_MOUNT_DEBUG) { + printf("hfs_ValidateHFSPlusVolumeHeader: unknown Volume Signature : %x\n", signature); + } + return (EINVAL); + } + + /* Block size must be at least 512 and a power of 2 */ + blockSize = SWAP_BE32(vhp->blockSize); + if (blockSize < 512 || !powerof2(blockSize)) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_ValidateHFSPlusVolumeHeader: invalid blocksize (%d) \n", blockSize); + } + return (EINVAL); + } + + if (blockSize < hfsmp->hfs_logical_block_size) { + if (HFS_MOUNT_DEBUG) { + printf("hfs_ValidateHFSPlusVolumeHeader: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n", + blockSize, hfsmp->hfs_logical_block_size); + } + return (EINVAL); + } + return 0; +} + //******************************************************************************* // Routine: hfs_MountHFSPlusVolume // @@ -339,38 +401,17 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, signature = SWAP_BE16(vhp->signature); hfs_version = SWAP_BE16(vhp->version); - if (signature == kHFSPlusSigWord) { - if (hfs_version != kHFSPlusVersion) { - printf("hfs_mount: invalid HFS+ version: %x\n", hfs_version); - return (EINVAL); - } - } else if (signature == kHFSXSigWord) { - if (hfs_version != kHFSXVersion) { - printf("hfs_mount: invalid HFSX version: %x\n", hfs_version); - return (EINVAL); - } + retval = hfs_ValidateHFSPlusVolumeHeader(hfsmp, vhp); + if (retval) + return retval; + + if (signature == kHFSXSigWord) { /* The in-memory signature is always 'H+'. */ signature = kHFSPlusSigWord; hfsmp->hfs_flags |= HFS_X; - } else { - /* Removed printf for invalid HFS+ signature because it gives - * false error for UFS root volume - */ - if (HFS_MOUNT_DEBUG) { - printf("hfs_mounthfsplus: unknown Volume Signature : %x\n", signature); - } - return (EINVAL); } - /* Block size must be at least 512 and a power of 2 */ blockSize = SWAP_BE32(vhp->blockSize); - if (blockSize < 512 || !powerof2(blockSize)) { - if (HFS_MOUNT_DEBUG) { - printf("hfs_mounthfsplus: invalid blocksize (%d) \n", blockSize); - } - return (EINVAL); - } - /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */ if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL && (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) { @@ -382,22 +423,32 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, /* Make sure we can live with the physical block size. */ if ((disksize & (hfsmp->hfs_logical_block_size - 1)) || - (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) || - (blockSize < hfsmp->hfs_logical_block_size)) { + (embeddedOffset & (hfsmp->hfs_logical_block_size - 1))) { if (HFS_MOUNT_DEBUG) { - printf("hfs_mounthfsplus: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n", - blockSize, hfsmp->hfs_logical_block_size); + printf("hfs_mounthfsplus: hfs_logical_blocksize (%d) \n", + hfsmp->hfs_logical_block_size); } return (ENXIO); } - /* If allocation block size is less than the physical - * block size, we assume that the physical block size - * is same as logical block size. The physical block - * size value is used to round down the offsets for - * reading and writing the primary and alternate volume - * headers at physical block boundary and will cause - * problems if it is less than the block size. + /* + * If allocation block size is less than the physical block size, + * same data could be cached in two places and leads to corruption. + * + * HFS Plus reserves one allocation block for the Volume Header. + * If the physical size is larger, then when we read the volume header, + * we will also end up reading in the next allocation block(s). + * If those other allocation block(s) is/are modified, and then the volume + * header is modified, the write of the volume header's buffer will write + * out the old contents of the other allocation blocks. + * + * We assume that the physical block size is same as logical block size. + * The physical block size value is used to round down the offsets for + * reading and writing the primary and alternate volume headers. + * + * The same logic to ensure good hfs_physical_block_size is also in + * hfs_mountfs so that hfs_mountfs, hfs_MountHFSPlusVolume and + * later are doing the I/Os using same block size. */ if (blockSize < hfsmp->hfs_physical_block_size) { hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size; @@ -452,17 +503,73 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, /* * Validate and initialize the location of the alternate volume header. + * + * Note that there may be spare sectors beyond the end of the filesystem that still + * belong to our partition. */ + spare_sectors = hfsmp->hfs_logical_block_count - (((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size); + /* + * Differentiate between "innocuous" spare sectors and the more unusual + * degenerate case: + * + * *** Innocuous spare sectors exist if: + * + * A) the number of bytes assigned to the partition (by multiplying logical + * block size * logical block count) is greater than the filesystem size + * (by multiplying allocation block count and allocation block size) + * + * and + * + * B) the remainder is less than the size of a full allocation block's worth of bytes. + * + * This handles the normal case where there may be a few extra sectors, but the two + * are fundamentally in sync. + * + * *** Degenerate spare sectors exist if: + * A) The number of bytes assigned to the partition (by multiplying logical + * block size * logical block count) is greater than the filesystem size + * (by multiplying allocation block count and block size). + * + * and + * + * B) the remainder is greater than a full allocation's block worth of bytes. + * In this case, a smaller file system exists in a larger partition. + * This can happen in various ways, including when volume is resized but the + * partition is yet to be resized. Under this condition, we have to assume that + * a partition management software may resize the partition to match + * the file system size in the future. Therefore we should update + * alternate volume header at two locations on the disk, + * a. 1024 bytes before end of the partition + * b. 1024 bytes before end of the file system + */ + if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) { - hfsmp->hfs_alt_id_sector = 0; /* partition has grown! */ - } else { - hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + + /* + * Handle the degenerate case above. FS < partition size. + * AVH located at 1024 bytes from the end of the partition + */ + hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + + HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count); + + /* AVH located at 1024 bytes from the end of the filesystem */ + hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, - hfsmp->hfs_logical_block_count); + (((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size)); + } + else { + /* Innocuous spare sectors; Partition & FS notion are in sync */ + hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + + HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count); + + hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector; + } + if (hfs_resize_debug) { + printf ("hfs_MountHFSPlusVolume: partition_avh_sector=%qu, fs_avh_sector=%qu\n", + hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector); } bzero(&cndesc, sizeof(cndesc)); @@ -500,6 +607,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, } goto ErrorExit; } + hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp); hfs_unlock(hfsmp->hfs_extents_cp); @@ -690,13 +798,10 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, volname_length = strlen ((const char*)vcb->vcbVN); cat_releasedesc(&cndesc); -#define DKIOCCSSETLVNAME _IOW('d', 198, char[256]) - - /* Send the volume name down to CoreStorage if necessary */ retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED); if (retval == 0) { - (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current()); + (void) VNOP_IOCTL (hfsmp->hfs_devvp, _DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current()); } /* reset retval == 0. we don't care about errors in volname conversion */ @@ -716,23 +821,19 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, hfs_lock_mount (hfsmp); kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner); - /* Wait until it registers that it's got the appropriate locks */ - while ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) == 0) { - (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, (PDROP | PINOD), "hfs_scan_blocks", 0); - if (hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) { - break; - } - else { - hfs_lock_mount (hfsmp); - } + /* Wait until it registers that it's got the appropriate locks (or that it is finished) */ + while ((hfsmp->scan_var & (HFS_ALLOCATOR_SCAN_INFLIGHT|HFS_ALLOCATOR_SCAN_COMPLETED)) == 0) { + msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_scan_blocks", 0); } + hfs_unlock_mount(hfsmp); + thread_deallocate (allocator_scanner); /* mark the volume dirty (clear clean unmount bit) */ vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask; if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) { - hfs_flushvolumeheader(hfsmp, TRUE, 0); + hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT); } /* kHFSHasFolderCount is only supported/updated on HFSX volumes */ @@ -837,6 +938,9 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, MarkVCBDirty( vcb ); // mark VCB dirty so it will be written } + if (hfsmp->hfs_flags & HFS_CS_METADATA_PIN) { + hfs_pin_fs_metadata(hfsmp); + } /* * Distinguish 3 potential cases involving content protection: * 1. mount point bit set; vcbAtrb does not support it. Fail. @@ -865,16 +969,8 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, #if CONFIG_PROTECT /* Get the EAs as needed. */ int cperr = 0; - uint16_t majorversion; - uint16_t minorversion; - struct cp_root_xattr *xattr = NULL; MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK); - if (xattr == NULL) { - retval = ENOMEM; - goto ErrorExit; - } - bzero (xattr, sizeof(struct cp_root_xattr)); /* go get the EA to get the version information */ cperr = cp_getrootxattr (hfsmp, xattr); @@ -886,44 +982,54 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, if (cperr == 0) { /* Have to run a valid CP version. */ - if ((xattr->major_version < CP_PREV_MAJOR_VERS) || (xattr->major_version > CP_NEW_MAJOR_VERS)) { + if (!cp_is_supported_version(xattr->major_version)) { cperr = EINVAL; } } else if (cperr == ENOATTR) { - printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS); + printf("No root EA set, creating new EA with new version: %d\n", CP_CURRENT_VERS); bzero(xattr, sizeof(struct cp_root_xattr)); - xattr->major_version = CP_NEW_MAJOR_VERS; + xattr->major_version = CP_CURRENT_VERS; xattr->minor_version = CP_MINOR_VERS; - xattr->flags = 0; cperr = cp_setrootxattr (hfsmp, xattr); } - majorversion = xattr->major_version; - minorversion = xattr->minor_version; - if (xattr) { + + if (cperr) { FREE(xattr, M_TEMP); + retval = EPERM; + goto ErrorExit; } - /* Recheck for good status */ - if (cperr == 0) { - /* If we got here, then the CP version is valid. Set it in the mount point */ - hfsmp->hfs_running_cp_major_vers = majorversion; - printf("Running with CP root xattr: %d.%d\n", majorversion, minorversion); + /* If we got here, then the CP version is valid. Set it in the mount point */ + hfsmp->hfs_running_cp_major_vers = xattr->major_version; + printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version); + hfsmp->cproot_flags = xattr->flags; + hfsmp->cp_crypto_generation = ISSET(xattr->flags, CP_ROOT_CRYPTOG1) ? 1 : 0; - /* - * Acquire the boot-arg for the AKS default key. - * Ensure that the boot-arg's value is valid for FILES (not directories), - * since only files are actually protected for now. - */ - PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class)); - if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) { - hfsmp->default_cp_class = PROTECTION_CLASS_D; - } + FREE(xattr, M_TEMP); + + /* + * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree. + * Ensure that the boot-arg's value is valid for FILES (not directories), + * since only files are actually protected for now. + */ + + PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class)); + + if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) { + PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class)); } - else { - retval = EPERM; - goto ErrorExit; + +#if HFS_TMPDBG +#if !SECURE_KERNEL + PE_parse_boot_argn("aks_verbose", &hfsmp->hfs_cp_verbose, sizeof(hfsmp->hfs_cp_verbose)); +#endif +#endif + + if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) { + hfsmp->default_cp_class = PROTECTION_CLASS_C; } + #else /* If CONFIG_PROTECT not built, ignore CP */ vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT); @@ -974,8 +1080,30 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, /* * Allow hot file clustering if conditions allow. */ - if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && - ((hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_SSD)) == 0)) { + if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && !(hfsmp->hfs_flags & HFS_READ_ONLY) && + ((hfsmp->hfs_flags & HFS_SSD) == 0 || (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN))) { + // + // Wait until the bitmap scan completes before we initializes the + // hotfile area so that we do not run into any issues with the + // bitmap being read while hotfiles is initializing itself. On + // some older/slower machines, without this interlock, the bitmap + // would sometimes get corrupted at boot time. + // + hfs_lock_mount(hfsmp); + while(!(hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED)) { + (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_hotfile_bitmap_interlock", 0); + } + hfs_unlock_mount(hfsmp); + + /* + * Note: at this point we are not allowed to fail the + * mount operation because the HotFile init code + * in hfs_recording_init() will lookup vnodes with + * VNOP_LOOKUP() which hangs vnodes off the mount + * (and if we were to fail, VFS is not prepared to + * clean that up at this point. Since HotFiles are + * optional, this is not a big deal. + */ (void) hfs_recording_init(hfsmp); } @@ -1000,6 +1128,53 @@ ErrorExit: return (retval); } +static int +_pin_metafile(struct hfsmount *hfsmp, vnode_t vp) +{ + int err; + + err = hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); + if (err == 0) { + err = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, NULL, vfs_context_kernel()); + hfs_unlock(VTOC(vp)); + } + + return err; +} + +void +hfs_pin_fs_metadata(struct hfsmount *hfsmp) +{ + ExtendedVCB *vcb; + int err; + + vcb = HFSTOVCB(hfsmp); + + err = _pin_metafile(hfsmp, hfsmp->hfs_extents_vp); + if (err != 0) { + printf("hfs: failed to pin extents overflow file %d\n", err); + } + err = _pin_metafile(hfsmp, hfsmp->hfs_catalog_vp); + if (err != 0) { + printf("hfs: failed to pin catalog file %d\n", err); + } + err = _pin_metafile(hfsmp, hfsmp->hfs_allocation_vp); + if (err != 0) { + printf("hfs: failed to pin bitmap file %d\n", err); + } + err = _pin_metafile(hfsmp, hfsmp->hfs_attribute_vp); + if (err != 0) { + printf("hfs: failed to pin extended attr file %d\n", err); + } + + hfs_pin_block_range(hfsmp, HFS_PIN_IT, 0, 1, vfs_context_kernel()); + hfs_pin_block_range(hfsmp, HFS_PIN_IT, vcb->totalBlocks-1, 1, vfs_context_kernel()); + + if (vfs_flags(hfsmp->hfs_mp) & MNT_JOURNALED) { + // and hey, if we've got a journal, let's pin that too! + hfs_pin_block_range(hfsmp, HFS_PIN_IT, hfsmp->jnl_start, howmany(hfsmp->jnl_size, vcb->blockSize), vfs_context_kernel()); + } +} /* * ReleaseMetaFileVNode @@ -1085,8 +1260,7 @@ hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p) * zero - overflow extents do not exist */ __private_extern__ -int -overflow_extents(struct filefork *fp) +bool overflow_extents(struct filefork *fp) { u_int32_t blocks; @@ -1096,29 +1270,42 @@ overflow_extents(struct filefork *fp) // and therefore it has to be an HFS+ volume. Otherwise // we check through the volume header to see what type // of volume we're on. - // - if (FTOV(fp) == NULL || VTOVCB(FTOV(fp))->vcbSigWord == kHFSPlusSigWord) { - if (fp->ff_extents[7].blockCount == 0) - return (0); + // - blocks = fp->ff_extents[0].blockCount + - fp->ff_extents[1].blockCount + - fp->ff_extents[2].blockCount + - fp->ff_extents[3].blockCount + - fp->ff_extents[4].blockCount + - fp->ff_extents[5].blockCount + - fp->ff_extents[6].blockCount + - fp->ff_extents[7].blockCount; - } else { +#if CONFIG_HFS_STD + if (FTOV(fp) && VTOVCB(FTOV(fp))->vcbSigWord == kHFSSigWord) { if (fp->ff_extents[2].blockCount == 0) return false; - + blocks = fp->ff_extents[0].blockCount + - fp->ff_extents[1].blockCount + - fp->ff_extents[2].blockCount; - } + fp->ff_extents[1].blockCount + + fp->ff_extents[2].blockCount; + + return fp->ff_blocks > blocks; + } +#endif + + if (fp->ff_extents[7].blockCount == 0) + return false; + + blocks = fp->ff_extents[0].blockCount + + fp->ff_extents[1].blockCount + + fp->ff_extents[2].blockCount + + fp->ff_extents[3].blockCount + + fp->ff_extents[4].blockCount + + fp->ff_extents[5].blockCount + + fp->ff_extents[6].blockCount + + fp->ff_extents[7].blockCount; + + return fp->ff_blocks > blocks; +} - return (fp->ff_blocks > blocks); +static __attribute__((pure)) +boolean_t hfs_is_frozen(struct hfsmount *hfsmp) +{ + return (hfsmp->hfs_freeze_state == HFS_FROZEN + || (hfsmp->hfs_freeze_state == HFS_FREEZING + && current_thread() != hfsmp->hfs_freezing_thread)); } /* @@ -1127,23 +1314,62 @@ overflow_extents(struct filefork *fp) int hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype) { - void *thread = current_thread(); + thread_t thread = current_thread(); if (hfsmp->hfs_global_lockowner == thread) { panic ("hfs_lock_global: locking against myself!"); } - /* HFS_SHARED_LOCK */ + /* + * This check isn't really necessary but this stops us taking + * the mount lock in most cases. The essential check is below. + */ + if (hfs_is_frozen(hfsmp)) { + /* + * Unfortunately, there is no easy way of getting a notification + * for when a process is exiting and it's possible for the exiting + * process to get blocked somewhere else. To catch this, we + * periodically monitor the frozen process here and thaw if + * we spot that it's exiting. + */ +frozen: + hfs_lock_mount(hfsmp); + + struct timespec ts = { 0, 500 * NSEC_PER_MSEC }; + + while (hfs_is_frozen(hfsmp)) { + if (hfsmp->hfs_freeze_state == HFS_FROZEN + && proc_exiting(hfsmp->hfs_freezing_proc)) { + hfs_thaw_locked(hfsmp); + break; + } + + msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex, + PWAIT, "hfs_lock_global (frozen)", &ts); + } + hfs_unlock_mount(hfsmp); + } + + /* HFS_SHARED_LOCK */ if (locktype == HFS_SHARED_LOCK) { lck_rw_lock_shared (&hfsmp->hfs_global_lock); hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER; } - /* HFS_EXCLUSIVE_LOCK */ + /* HFS_EXCLUSIVE_LOCK */ else { lck_rw_lock_exclusive (&hfsmp->hfs_global_lock); hfsmp->hfs_global_lockowner = thread; } + /* + * We have to check if we're frozen again because of the time + * between when we checked and when we took the global lock. + */ + if (hfs_is_frozen(hfsmp)) { + hfs_unlock_global(hfsmp); + goto frozen; + } + return 0; } @@ -1153,16 +1379,15 @@ hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype) */ void hfs_unlock_global (struct hfsmount *hfsmp) -{ - - void *thread = current_thread(); +{ + thread_t thread = current_thread(); - /* HFS_LOCK_EXCLUSIVE */ + /* HFS_LOCK_EXCLUSIVE */ if (hfsmp->hfs_global_lockowner == thread) { hfsmp->hfs_global_lockowner = NULL; lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock); } - /* HFS_LOCK_SHARED */ + /* HFS_LOCK_SHARED */ else { lck_rw_unlock_shared (&hfsmp->hfs_global_lock); } @@ -1190,6 +1415,19 @@ void hfs_unlock_mount (struct hfsmount *hfsmp) { /* * Lock HFS system file(s). + * + * This function accepts a @flags parameter which indicates which + * system file locks are required. The value it returns should be + * used in a subsequent call to hfs_systemfile_unlock. The caller + * should treat this value as opaque; it may or may not have a + * relation to the @flags field that is passed in. The *only* + * guarantee that we make is that a value of zero means that no locks + * were taken and that there is no need to call hfs_systemfile_unlock + * (although it is harmless to do so). Recursion is supported but + * care must still be taken to ensure correct lock ordering. Note + * that requests for certain locks may cause other locks to also be + * taken, including locks that are not possible to ask for via the + * @flags parameter. */ int hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype) @@ -1198,19 +1436,20 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktyp * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file */ if (flags & SFL_CATALOG) { + if (hfsmp->hfs_catalog_cp + && hfsmp->hfs_catalog_cp->c_lockowner != current_thread()) { #ifdef HFS_CHECK_LOCK_ORDER - if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) { - panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)"); - } - if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) { - panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)"); - } - if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { - panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)"); - } + if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)"); + } + if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)"); + } + if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)"); + } #endif /* HFS_CHECK_LOCK_ORDER */ - if (hfsmp->hfs_catalog_cp) { (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT); /* * When the catalog file has overflow extents then @@ -1228,16 +1467,17 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktyp } if (flags & SFL_ATTRIBUTE) { + if (hfsmp->hfs_attribute_cp + && hfsmp->hfs_attribute_cp->c_lockowner != current_thread()) { #ifdef HFS_CHECK_LOCK_ORDER - if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) { - panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)"); - } - if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { - panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)"); - } + if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)"); + } + if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)"); + } #endif /* HFS_CHECK_LOCK_ORDER */ - - if (hfsmp->hfs_attribute_cp) { + (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT); /* * When the attribute file has overflow extents then @@ -1255,13 +1495,14 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktyp } if (flags & SFL_STARTUP) { + if (hfsmp->hfs_startup_cp + && hfsmp->hfs_startup_cp->c_lockowner != current_thread()) { #ifdef HFS_CHECK_LOCK_ORDER - if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { - panic("hfs_systemfile_lock: bad lock order (Extents before Startup)"); - } + if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { + panic("hfs_systemfile_lock: bad lock order (Extents before Startup)"); + } #endif /* HFS_CHECK_LOCK_ORDER */ - if (hfsmp->hfs_startup_cp) { (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT); /* * When the startup file has overflow extents then @@ -1303,6 +1544,24 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktyp */ if (hfsmp->hfs_extents_cp) { (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + + if (hfsmp->hfs_mp->mnt_kern_flag & MNTK_SWAP_MOUNT) { + /* + * because we may need this lock on the pageout path (if a swapfile allocation + * spills into the extents overflow tree), we will grant the holder of this + * lock the privilege of dipping into the reserve free pool in order to prevent + * a deadlock from occurring if we need those pageouts to complete before we + * will make any new pages available on the free list... the deadlock can occur + * if this thread needs to allocate memory while this lock is held + */ + if (set_vm_privilege(TRUE) == FALSE) { + /* + * indicate that we need to drop vm_privilege + * when we unlock + */ + flags |= SFL_VM_PRIV; + } + } } else { flags &= ~SFL_EXTENTS; } @@ -1317,6 +1576,9 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktyp void hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags) { + if (!flags) + return; + struct timeval tv; u_int32_t lastfsync; int numOfLockedBuffs; @@ -1366,6 +1628,14 @@ hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags) } } hfs_unlock(hfsmp->hfs_extents_cp); + + if (flags & SFL_VM_PRIV) { + /* + * revoke the vm_privilege we granted this thread + * now that we have unlocked the overflow extents + */ + set_vm_privilege(FALSE); + } } } @@ -1387,7 +1657,7 @@ void RequireFileLock(FileReference vp, int shareable) shareable = 0; } - locked = VTOC(vp)->c_lockowner == (void *)current_thread(); + locked = VTOC(vp)->c_lockowner == current_thread(); if (!locked && !shareable) { switch (VTOC(vp)->c_fileid) { @@ -1540,7 +1810,7 @@ hfs_remove_orphans(struct hfsmount * hfsmp) cat_cookie_t cookie; int catlock = 0; int catreserve = 0; - int started_tr = 0; + bool started_tr = false; int lockflags; int result; int orphaned_files = 0; @@ -1599,159 +1869,177 @@ hfs_remove_orphans(struct hfsmount * hfsmp) * where xxx is the file's cnid in decimal. * */ - if (bcmp(tempname, filename, namelen) == 0) { - struct filefork dfork; - struct filefork rfork; - struct cnode cnode; - int mode = 0; - - bzero(&dfork, sizeof(dfork)); - bzero(&rfork, sizeof(rfork)); - bzero(&cnode, sizeof(cnode)); - - /* Delete any attributes, ignore errors */ - (void) hfs_removeallattr(hfsmp, filerec.fileID); - - if (hfs_start_transaction(hfsmp) != 0) { - printf("hfs_remove_orphans: failed to start transaction\n"); - goto exit; - } - started_tr = 1; - - /* - * Reserve some space in the Catalog file. - */ - if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) { - printf("hfs_remove_orphans: cat_preflight failed\n"); - goto exit; - } - catreserve = 1; + if (bcmp(tempname, filename, namelen) != 0) + continue; - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - catlock = 1; + struct filefork dfork; + struct filefork rfork; + struct cnode cnode; + int mode = 0; - /* Build a fake cnode */ - cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr, - &dfork.ff_data, &rfork.ff_data); - cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; - cnode.c_desc.cd_nameptr = (const u_int8_t *)filename; - cnode.c_desc.cd_namelen = namelen; - cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid; - cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks; - - /* Position iterator at previous entry */ - if (BTIterateRecord(fcb, kBTreePrevRecord, iterator, - NULL, NULL) != 0) { - break; - } + bzero(&dfork, sizeof(dfork)); + bzero(&rfork, sizeof(rfork)); + bzero(&cnode, sizeof(cnode)); + + if (hfs_start_transaction(hfsmp) != 0) { + printf("hfs_remove_orphans: failed to start transaction\n"); + goto exit; + } + started_tr = true; + + /* + * Reserve some space in the Catalog file. + */ + if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) { + printf("hfs_remove_orphans: cat_preflight failed\n"); + goto exit; + } + catreserve = 1; + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + catlock = 1; + + /* Build a fake cnode */ + cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr, + &dfork.ff_data, &rfork.ff_data); + cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; + cnode.c_desc.cd_nameptr = (const u_int8_t *)filename; + cnode.c_desc.cd_namelen = namelen; + cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid; + cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks; + + /* Position iterator at previous entry */ + if (BTIterateRecord(fcb, kBTreePrevRecord, iterator, + NULL, NULL) != 0) { + break; + } - /* Truncate the file to zero (both forks) */ - if (dfork.ff_blocks > 0) { - u_int64_t fsize; + /* Truncate the file to zero (both forks) */ + if (dfork.ff_blocks > 0) { + u_int64_t fsize; - dfork.ff_cp = &cnode; - cnode.c_datafork = &dfork; - cnode.c_rsrcfork = NULL; - fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize; - while (fsize > 0) { - if (fsize > HFS_BIGFILE_SIZE && overflow_extents(&dfork)) { - fsize -= HFS_BIGFILE_SIZE; - } else { - fsize = 0; - } - - if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0, - cnode.c_attr.ca_fileid, false) != 0) { - printf("hfs: error truncating data fork!\n"); - break; - } - - // - // if we're iteratively truncating this file down, - // then end the transaction and start a new one so - // that no one transaction gets too big. - // - if (fsize > 0 && started_tr) { - /* Drop system file locks before starting - * another transaction to preserve lock order. - */ - hfs_systemfile_unlock(hfsmp, lockflags); - catlock = 0; - hfs_end_transaction(hfsmp); - - if (hfs_start_transaction(hfsmp) != 0) { - started_tr = 0; - break; - } - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - catlock = 1; - } + dfork.ff_cp = &cnode; + cnode.c_datafork = &dfork; + cnode.c_rsrcfork = NULL; + fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize; + while (fsize > 0) { + if (fsize > HFS_BIGFILE_SIZE) { + fsize -= HFS_BIGFILE_SIZE; + } else { + fsize = 0; } - } - if (rfork.ff_blocks > 0) { - rfork.ff_cp = &cnode; - cnode.c_datafork = NULL; - cnode.c_rsrcfork = &rfork; - if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) { - printf("hfs: error truncating rsrc fork!\n"); + if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0, + cnode.c_attr.ca_fileid, false) != 0) { + printf("hfs: error truncating data fork!\n"); break; } + + // + // if we're iteratively truncating this file down, + // then end the transaction and start a new one so + // that no one transaction gets too big. + // + if (fsize > 0) { + /* Drop system file locks before starting + * another transaction to preserve lock order. + */ + hfs_systemfile_unlock(hfsmp, lockflags); + catlock = 0; + hfs_end_transaction(hfsmp); + + if (hfs_start_transaction(hfsmp) != 0) { + started_tr = false; + goto exit; + } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + catlock = 1; + } } + } - /* Remove the file or folder record from the Catalog */ - if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) { - printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid); - hfs_systemfile_unlock(hfsmp, lockflags); - catlock = 0; - hfs_volupdate(hfsmp, VOL_UPDATE, 0); + if (rfork.ff_blocks > 0) { + rfork.ff_cp = &cnode; + cnode.c_datafork = NULL; + cnode.c_rsrcfork = &rfork; + if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) { + printf("hfs: error truncating rsrc fork!\n"); break; } - - mode = cnode.c_attr.ca_mode & S_IFMT; + } - if (mode == S_IFDIR) { - orphaned_dirs++; - } - else { - orphaned_files++; - } + // Deal with extended attributes + if (ISSET(cnode.c_attr.ca_recflags, kHFSHasAttributesMask)) { + // hfs_removeallattr uses its own transactions + hfs_systemfile_unlock(hfsmp, lockflags); + catlock = false; + hfs_end_transaction(hfsmp); - /* Update parent and volume counts */ - hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--; - if (mode == S_IFDIR) { - DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]); + hfs_removeallattr(hfsmp, cnode.c_attr.ca_fileid, &started_tr); + + if (!started_tr) { + if (hfs_start_transaction(hfsmp) != 0) { + printf("hfs_remove_orphans: failed to start transaction\n"); + goto exit; + } + started_tr = true; } - (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS], - &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + catlock = 1; + } - /* Drop locks and end the transaction */ + /* Remove the file or folder record from the Catalog */ + if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) { + printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid); hfs_systemfile_unlock(hfsmp, lockflags); - cat_postflight(hfsmp, &cookie, p); - catlock = catreserve = 0; + catlock = 0; + hfs_volupdate(hfsmp, VOL_UPDATE, 0); + break; + } - /* - Now that Catalog is unlocked, update the volume info, making - sure to differentiate between files and directories - */ - if (mode == S_IFDIR) { - hfs_volupdate(hfsmp, VOL_RMDIR, 0); - } - else{ - hfs_volupdate(hfsmp, VOL_RMFILE, 0); - } + mode = cnode.c_attr.ca_mode & S_IFMT; - if (started_tr) { - hfs_end_transaction(hfsmp); - started_tr = 0; - } + if (mode == S_IFDIR) { + orphaned_dirs++; + } + else { + orphaned_files++; + } + + /* Update parent and volume counts */ + hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--; + if (mode == S_IFDIR) { + DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]); + } + + (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS], + &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL); - } /* end if */ + /* Drop locks and end the transaction */ + hfs_systemfile_unlock(hfsmp, lockflags); + cat_postflight(hfsmp, &cookie, p); + catlock = catreserve = 0; + + /* + Now that Catalog is unlocked, update the volume info, making + sure to differentiate between files and directories + */ + if (mode == S_IFDIR) { + hfs_volupdate(hfsmp, VOL_RMDIR, 0); + } + else{ + hfs_volupdate(hfsmp, VOL_RMFILE, 0); + } + + hfs_end_transaction(hfsmp); + started_tr = false; } /* end for */ + +exit: + if (orphaned_files > 0 || orphaned_dirs > 0) printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs); -exit: if (catlock) { hfs_systemfile_unlock(hfsmp, lockflags); } @@ -1808,6 +2096,81 @@ u_int32_t logBlockSize; return logBlockSize; } +#if HFS_SPARSE_DEV +static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks) +{ + struct vfsstatfs *vfsp; /* 272 bytes */ + uint64_t vfreeblks; + struct timeval now; + + hfs_lock_mount(hfsmp); + + vnode_t backing_vp = hfsmp->hfs_backingfs_rootvp; + if (!backing_vp) { + hfs_unlock_mount(hfsmp); + return false; + } + + // usecount is not enough; we need iocount + if (vnode_get(backing_vp)) { + hfs_unlock_mount(hfsmp); + *pfree_blks = 0; + return true; + } + + uint32_t loanedblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks; + uint32_t bandblks = hfsmp->hfs_sparsebandblks; + uint64_t maxblks = hfsmp->hfs_backingfs_maxblocks; + + hfs_unlock_mount(hfsmp); + + mount_t backingfs_mp = vnode_mount(backing_vp); + + microtime(&now); + if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) { + vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT); + hfsmp->hfs_last_backingstatfs = now.tv_sec; + } + + if (!(vfsp = vfs_statfs(backingfs_mp))) { + vnode_put(backing_vp); + return false; + } + + vfreeblks = vfsp->f_bavail; + /* Normalize block count if needed. */ + if (vfsp->f_bsize != hfsmp->blockSize) + vfreeblks = vfreeblks * vfsp->f_bsize / hfsmp->blockSize; + if (vfreeblks > bandblks) + vfreeblks -= bandblks; + else + vfreeblks = 0; + + /* + * Take into account any delayed allocations. It is not + * certain what the original reason for the "2 *" is. Most + * likely it is to allow for additional requirements in the + * host file system and metadata required by disk images. The + * number of loaned blocks is likely to be small and we will + * stop using them as we get close to the limit. + */ + loanedblks = 2 * loanedblks; + if (vfreeblks > loanedblks) + vfreeblks -= loanedblks; + else + vfreeblks = 0; + + if (maxblks) + vfreeblks = MIN(vfreeblks, maxblks); + + vnode_put(backing_vp); + + *pfree_blks = vfreeblks; + + return true; +} +#endif + u_int32_t hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) { @@ -1823,7 +2186,7 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) */ freeblks = hfsmp->freeBlocks; rsrvblks = hfsmp->reserveBlocks; - loanblks = hfsmp->loanedBlocks; + loanblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks; if (wantreserve) { if (freeblks > rsrvblks) freeblks -= rsrvblks; @@ -1840,61 +2203,10 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) * When the underlying device is sparse, check the * available space on the backing store volume. */ - if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) { - struct vfsstatfs *vfsp; /* 272 bytes */ - u_int64_t vfreeblks; - u_int32_t loanedblks; - struct mount * backingfs_mp; - struct timeval now; - - backingfs_mp = vnode_mount(hfsmp->hfs_backingfs_rootvp); - - microtime(&now); - if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) { - vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT); - hfsmp->hfs_last_backingstatfs = now.tv_sec; - } - - if ((vfsp = vfs_statfs(backingfs_mp))) { - hfs_lock_mount (hfsmp); - vfreeblks = vfsp->f_bavail; - /* Normalize block count if needed. */ - if (vfsp->f_bsize != hfsmp->blockSize) { - vfreeblks = ((u_int64_t)vfreeblks * (u_int64_t)(vfsp->f_bsize)) / hfsmp->blockSize; - } - if (vfreeblks > (unsigned int)hfsmp->hfs_sparsebandblks) - vfreeblks -= hfsmp->hfs_sparsebandblks; - else - vfreeblks = 0; - - /* Take into account any delayed allocations. */ - loanedblks = 2 * hfsmp->loanedBlocks; - if (vfreeblks > loanedblks) - vfreeblks -= loanedblks; - else - vfreeblks = 0; - - if (hfsmp->hfs_backingfs_maxblocks) { - vfreeblks = MIN(vfreeblks, hfsmp->hfs_backingfs_maxblocks); - } - freeblks = MIN(vfreeblks, freeblks); - hfs_unlock_mount (hfsmp); - } - } + uint64_t vfreeblks; + if (hfs_get_backing_free_blks(hfsmp, &vfreeblks)) + freeblks = MIN(freeblks, vfreeblks); #endif /* HFS_SPARSE_DEV */ - if (hfsmp->hfs_flags & HFS_CS) { - uint64_t cs_free_bytes; - uint64_t cs_free_blks; - if (VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSGETFREEBYTES, - (caddr_t)&cs_free_bytes, 0, vfs_context_kernel()) == 0) { - cs_free_blks = cs_free_bytes / hfsmp->blockSize; - if (cs_free_blks > loanblks) - cs_free_blks -= loanblks; - else - cs_free_blks = 0; - freeblks = MIN(cs_free_blks, freeblks); - } - } return (freeblks); } @@ -2814,7 +3126,7 @@ hfs_metadatazone_init(struct hfsmount *hfsmp, int disable) * Add the existing size of the Extents Overflow B-tree. * (It rarely grows, so don't bother reserving additional room for it.) */ - zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize; + zonesize += hfs_blk_to_bytes(hfsmp->hfs_extents_cp->c_datafork->ff_blocks, hfsmp->blockSize); /* * If there is an Attributes B-tree, leave room for 11 clumps worth. @@ -2929,7 +3241,11 @@ hfs_metadatazone_init(struct hfsmount *hfsmp, int disable) filesize += temp / 3; hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize; - hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize; + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + hfsmp->hfs_hotfile_maxblks = (uint32_t) (hfsmp->hfs_cs_hotfile_size / HFSTOVCB(hfsmp)->blockSize); + } else { + hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize; + } /* Convert to allocation blocks. */ blk = zonesize / vcb->blockSize; @@ -2949,11 +3265,12 @@ hfs_metadatazone_init(struct hfsmount *hfsmp, int disable) hfsmp->hfs_hotfile_end = 0; hfsmp->hfs_hotfile_freeblks = 0; } -#if 0 - printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end); - printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end); - printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks); +#if DEBUG + printf("hfs:%s: metadata zone is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end); + printf("hfs:%s: hot file band is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end); + printf("hfs:%s: hot file band free blocks = %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_freeblks); #endif + hfsmp->hfs_flags |= HFS_METADATA_ZONE; } @@ -2965,19 +3282,33 @@ hfs_hotfile_freeblocks(struct hfsmount *hfsmp) int lockflags; int freeblocks; + if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) { + // + // This is only used at initialization time and on an ssd + // we'll get the real info from the hotfile btree user + // info + // + return 0; + } + lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); freeblocks = MetaZoneFreeBlocks(vcb); hfs_systemfile_unlock(hfsmp, lockflags); /* Minus Extents overflow file reserve. */ - freeblocks -= - hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks; + if ((uint32_t)hfsmp->hfs_overflow_maxblks >= VTOF(hfsmp->hfs_extents_vp)->ff_blocks) { + freeblocks -= hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks; + } + /* Minus catalog file reserve. */ - freeblocks -= - hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks; + if ((uint32_t)hfsmp->hfs_catalog_maxblks >= VTOF(hfsmp->hfs_catalog_vp)->ff_blocks) { + freeblocks -= hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks; + } + if (freeblocks < 0) freeblocks = 0; + // printf("hfs: hotfile_freeblocks: MIN(%d, %d) = %d\n", freeblocks, hfsmp->hfs_hotfile_maxblks, MIN(freeblocks, hfsmp->hfs_hotfile_maxblks)); return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks); } @@ -3008,6 +3339,50 @@ hfs_virtualmetafile(struct cnode *cp) return (0); } +__private_extern__ +void hfs_syncer_lock(struct hfsmount *hfsmp) +{ + hfs_lock_mount(hfsmp); +} + +__private_extern__ +void hfs_syncer_unlock(struct hfsmount *hfsmp) +{ + hfs_unlock_mount(hfsmp); +} + +__private_extern__ +void hfs_syncer_wait(struct hfsmount *hfsmp) +{ + msleep(&hfsmp->hfs_sync_incomplete, &hfsmp->hfs_mutex, PWAIT, + "hfs_syncer_wait", NULL); +} + +__private_extern__ +void hfs_syncer_wakeup(struct hfsmount *hfsmp) +{ + wakeup(&hfsmp->hfs_sync_incomplete); +} + +__private_extern__ +uint64_t hfs_usecs_to_deadline(uint64_t usecs) +{ + uint64_t deadline; + clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline); + return deadline; +} + +__private_extern__ +void hfs_syncer_queue(thread_call_t syncer) +{ + if (thread_call_enter_delayed_with_leeway(syncer, + NULL, + hfs_usecs_to_deadline(HFS_META_DELAY), + 0, + THREAD_CALL_DELAY_SYS_BACKGROUND)) { + printf("hfs: syncer already scheduled!\n"); + } +} // // Fire off a timed callback to sync the disk if the @@ -3017,50 +3392,36 @@ hfs_virtualmetafile(struct cnode *cp) void hfs_sync_ejectable(struct hfsmount *hfsmp) { - if (hfsmp->hfs_syncer) { - clock_sec_t secs; - clock_usec_t usecs; - uint64_t now; + // If we don't have a syncer or we get called by the syncer, just return + if (!hfsmp->hfs_syncer || current_thread() == hfsmp->hfs_syncer_thread) + return; - clock_get_calendar_microtime(&secs, &usecs); - now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + hfs_syncer_lock(hfsmp); - if (hfsmp->hfs_sync_incomplete && hfsmp->hfs_mp->mnt_pending_write_size >= hfsmp->hfs_max_pending_io) { - // if we have a sync scheduled but i/o is starting to pile up, - // don't call thread_call_enter_delayed() again because that - // will defer the sync. - return; - } + if (!timerisset(&hfsmp->hfs_sync_req_oldest)) + microuptime(&hfsmp->hfs_sync_req_oldest); - if (hfsmp->hfs_sync_scheduled == 0) { - uint64_t deadline; + /* If hfs_unmount is running, it will set hfs_syncer to NULL. Also we + don't want to queue again if there is a sync outstanding. */ + if (!hfsmp->hfs_syncer || hfsmp->hfs_sync_incomplete) { + hfs_syncer_unlock(hfsmp); + return; + } - hfsmp->hfs_last_sync_request_time = now; + hfsmp->hfs_sync_incomplete = TRUE; - clock_interval_to_deadline(HFS_META_DELAY, NSEC_PER_USEC, &deadline); + thread_call_t syncer = hfsmp->hfs_syncer; - /* - * Increment hfs_sync_scheduled on the assumption that we're the - * first thread to schedule the timer. If some other thread beat - * us, then we'll decrement it. If we *were* the first to - * schedule the timer, then we need to keep track that the - * callback is waiting to complete. - */ - OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled); - if (thread_call_enter_delayed(hfsmp->hfs_syncer, deadline)) - OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled); - else - OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete); - } - } -} + hfs_syncer_unlock(hfsmp); + hfs_syncer_queue(syncer); +} int hfs_start_transaction(struct hfsmount *hfsmp) { - int ret, unlock_on_err=0; - void * thread = current_thread(); + int ret = 0, unlock_on_err = 0; + thread_t thread = current_thread(); #ifdef HFS_CHECK_LOCK_ORDER /* @@ -3080,31 +3441,67 @@ hfs_start_transaction(struct hfsmount *hfsmp) } #endif /* HFS_CHECK_LOCK_ORDER */ - if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) { - hfs_lock_global (hfsmp, HFS_SHARED_LOCK); - OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads); - unlock_on_err = 1; +again: + + if (hfsmp->jnl) { + if (journal_owner(hfsmp->jnl) != thread) { + /* + * The global lock should be held shared if journal is + * active to prevent disabling. If we're not the owner + * of the journal lock, verify that we're not already + * holding the global lock exclusive before moving on. + */ + if (hfsmp->hfs_global_lockowner == thread) { + ret = EBUSY; + goto out; + } + + hfs_lock_global (hfsmp, HFS_SHARED_LOCK); + + // Things could have changed + if (!hfsmp->jnl) { + hfs_unlock_global(hfsmp); + goto again; + } + + OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads); + unlock_on_err = 1; + } + } else { + // No journal + if (hfsmp->hfs_global_lockowner != thread) { + hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK); + + // Things could have changed + if (hfsmp->jnl) { + hfs_unlock_global(hfsmp); + goto again; + } + + OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads); + unlock_on_err = 1; + } } /* If a downgrade to read-only mount is in progress, no other - * process than the downgrade process is allowed to modify + * thread than the downgrade thread is allowed to modify * the file system. */ if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) && - (hfsmp->hfs_downgrading_proc != thread)) { + hfsmp->hfs_downgrading_thread != thread) { ret = EROFS; goto out; } if (hfsmp->jnl) { ret = journal_start_transaction(hfsmp->jnl); - if (ret == 0) { - OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting); - } } else { ret = 0; } + if (ret == 0) + ++hfsmp->hfs_transaction_nesting; + out: if (ret != 0 && unlock_on_err) { hfs_unlock_global (hfsmp); @@ -3117,12 +3514,15 @@ out: int hfs_end_transaction(struct hfsmount *hfsmp) { - int need_unlock=0, ret; + int ret; - if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread() - && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) { - need_unlock = 1; - } + assert(!hfsmp->jnl || journal_owner(hfsmp->jnl) == current_thread()); + assert(hfsmp->hfs_transaction_nesting > 0); + + if (hfsmp->jnl && hfsmp->hfs_transaction_nesting == 1) + hfs_flushvolumeheader(hfsmp, HFS_FVH_FLUSH_IF_DIRTY); + + bool need_unlock = !--hfsmp->hfs_transaction_nesting; if (hfsmp->jnl) { ret = journal_end_transaction(hfsmp->jnl); @@ -3140,49 +3540,127 @@ hfs_end_transaction(struct hfsmount *hfsmp) } -/* - * Flush the contents of the journal to the disk. - * - * Input: - * wait_for_IO - - * If TRUE, wait to write in-memory journal to the disk - * consistently, and also wait to write all asynchronous - * metadata blocks to its corresponding locations - * consistently on the disk. This means that the journal - * is empty at this point and does not contain any - * transactions. This is overkill in normal scenarios - * but is useful whenever the metadata blocks are required - * to be consistent on-disk instead of just the journal - * being consistent; like before live verification - * and live volume resizing. - * - * If FALSE, only wait to write in-memory journal to the - * disk consistently. This means that the journal still - * contains uncommitted transactions and the file system - * metadata blocks in the journal transactions might be - * written asynchronously to the disk. But there is no - * guarantee that they are written to the disk before - * returning to the caller. Note that this option is - * sufficient for file system data integrity as it - * guarantees consistent journal content on the disk. - */ -int -hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO) +void +hfs_journal_lock(struct hfsmount *hfsmp) { - int ret; + /* Only peek at hfsmp->jnl while holding the global lock */ + hfs_lock_global (hfsmp, HFS_SHARED_LOCK); + if (hfsmp->jnl) { + journal_lock(hfsmp->jnl); + } + hfs_unlock_global (hfsmp); +} +void +hfs_journal_unlock(struct hfsmount *hfsmp) +{ /* Only peek at hfsmp->jnl while holding the global lock */ hfs_lock_global (hfsmp, HFS_SHARED_LOCK); if (hfsmp->jnl) { - ret = journal_flush(hfsmp->jnl, wait_for_IO); - } else { - ret = 0; + journal_unlock(hfsmp->jnl); } hfs_unlock_global (hfsmp); - - return ret; } +/* + * Flush the contents of the journal to the disk. + * + * - HFS_FLUSH_JOURNAL + * Wait to write in-memory journal to the disk consistently. + * This means that the journal still contains uncommitted + * transactions and the file system metadata blocks in + * the journal transactions might be written asynchronously + * to the disk. But there is no guarantee that they are + * written to the disk before returning to the caller. + * Note that this option is sufficient for file system + * data integrity as it guarantees consistent journal + * content on the disk. + * + * - HFS_FLUSH_JOURNAL_META + * Wait to write in-memory journal to the disk + * consistently, and also wait to write all asynchronous + * metadata blocks to its corresponding locations + * consistently on the disk. This is overkill in normal + * scenarios but is useful whenever the metadata blocks + * are required to be consistent on-disk instead of + * just the journalbeing consistent; like before live + * verification and live volume resizing. The update of the + * metadata doesn't include a barrier of track cache flush. + * + * - HFS_FLUSH_FULL + * HFS_FLUSH_JOURNAL + force a track cache flush to media + * + * - HFS_FLUSH_CACHE + * Force a track cache flush to media. + * + * - HFS_FLUSH_BARRIER + * Barrier-only flush to ensure write order + * + */ +errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode) +{ + errno_t error = 0; + journal_flush_options_t options = 0; + dk_synchronize_t sync_req = { .options = DK_SYNCHRONIZE_OPTION_BARRIER }; + + switch (mode) { + case HFS_FLUSH_JOURNAL_META: + // wait for journal, metadata blocks and previous async flush to finish + SET(options, JOURNAL_WAIT_FOR_IO); + + // no break + + case HFS_FLUSH_JOURNAL: + case HFS_FLUSH_JOURNAL_BARRIER: + case HFS_FLUSH_FULL: + + if (mode == HFS_FLUSH_JOURNAL_BARRIER && + !(hfsmp->hfs_flags & HFS_FEATURE_BARRIER)) + mode = HFS_FLUSH_FULL; + + if (mode == HFS_FLUSH_FULL) + SET(options, JOURNAL_FLUSH_FULL); + + /* Only peek at hfsmp->jnl while holding the global lock */ + hfs_lock_global (hfsmp, HFS_SHARED_LOCK); + + if (hfsmp->jnl) + error = journal_flush(hfsmp->jnl, options); + + hfs_unlock_global (hfsmp); + + /* + * This may result in a double barrier as + * journal_flush may have issued a barrier itself + */ + if (mode == HFS_FLUSH_JOURNAL_BARRIER) + error = VNOP_IOCTL(hfsmp->hfs_devvp, + DKIOCSYNCHRONIZE, (caddr_t)&sync_req, + FWRITE, vfs_context_kernel()); + + break; + + case HFS_FLUSH_CACHE: + // Do a full sync + sync_req.options = 0; + + // no break + + case HFS_FLUSH_BARRIER: + // If barrier only flush doesn't support, fall back to use full flush. + if (!(hfsmp->hfs_flags & HFS_FEATURE_BARRIER)) + sync_req.options = 0; + + error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZE, (caddr_t)&sync_req, + FWRITE, vfs_context_kernel()); + break; + + default: + error = EINVAL; + } + + return error; +} /* * hfs_erase_unused_nodes @@ -3262,7 +3740,7 @@ extern time_t snapshot_timestamp; int check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg) { - int tracked_error = 0, snapshot_error = 0; + int snapshot_error = 0; if (vp == NULL) { return 0; @@ -3273,23 +3751,6 @@ check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *a return 0; } - if (VTOC(vp)->c_bsdflags & UF_TRACKED) { - // the file has the tracked bit set, so send an event to the tracked-file handler - int error; - - // printf("hfs: tracked-file: encountered a file with the tracked bit set! (vp %p)\n", vp); - error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_TRACK_EVENT); - if (error) { - if (error == EAGAIN) { - printf("hfs: tracked-file: timed out waiting for namespace handler...\n"); - - } else if (error == EINTR) { - // printf("hfs: tracked-file: got a signal while waiting for namespace handler...\n"); - tracked_error = EINTR; - } - } - } - if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) { // the change time is within this epoch int error; @@ -3307,7 +3768,6 @@ check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *a } } - if (tracked_error) return tracked_error; if (snapshot_error) return snapshot_error; return 0; @@ -3354,3 +3814,222 @@ check_for_dataless_file(struct vnode *vp, uint64_t op_type) return error; } + + +// +// NOTE: this function takes care of starting a transaction and +// acquiring the systemfile lock so that it can call +// cat_update(). +// +// NOTE: do NOT hold and cnode locks while calling this function +// to avoid deadlocks (because we take a lock on the root +// cnode) +// +int +hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid) +{ + struct vnode *rvp; + struct cnode *cp; + int error; + + error = VFS_ROOT(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel()); + if (error) { + return error; + } + + cp = VTOC(rvp); + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) { + return error; + } + struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16)); + + int lockflags; + if (hfs_start_transaction(hfsmp) != 0) { + return error; + } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + + if (extinfo->document_id == 0) { + // initialize this to start at 3 (one greater than the root-dir id) + extinfo->document_id = 3; + } + + *docid = extinfo->document_id++; + + // mark the root cnode dirty + cp->c_flag |= C_MODIFIED; + hfs_update(cp->c_vp, 0); + + hfs_systemfile_unlock (hfsmp, lockflags); + (void) hfs_end_transaction(hfsmp); + + (void) hfs_unlock(cp); + + vnode_put(rvp); + rvp = NULL; + + return 0; +} + + +/* + * Return information about number of file system allocation blocks + * taken by metadata on a volume. + * + * This function populates struct hfsinfo_metadata with allocation blocks + * used by extents overflow btree, catalog btree, bitmap, attribute btree, + * journal file, and sum of all of the above. + */ +int +hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo) +{ + int lockflags = 0; + int ret_lockflags = 0; + + /* Zero out the output buffer */ + bzero(hinfo, sizeof(struct hfsinfo_metadata)); + + /* + * Getting number of allocation blocks for all btrees + * should be a quick operation, so we grab locks for + * all of them at the same time + */ + lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE; + ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); + /* + * Make sure that we were able to acquire all locks requested + * to protect us against conditions like unmount in progress. + */ + if ((lockflags & ret_lockflags) != lockflags) { + /* Release any locks that were acquired */ + hfs_systemfile_unlock(hfsmp, ret_lockflags); + return EPERM; + } + + /* Get information about all the btrees */ + hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks; + hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks; + hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks; + hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks; + + /* Done with btrees, give up the locks */ + hfs_systemfile_unlock(hfsmp, ret_lockflags); + + /* Get information about journal file */ + hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize); + + /* Calculate total number of metadata blocks */ + hinfo->total = hinfo->extents + hinfo->catalog + + hinfo->allocation + hinfo->attribute + + hinfo->journal; + + return 0; +} + +static int +hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs) +{ + vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8"); + + return 0; +} + +__private_extern__ +int hfs_freeze(struct hfsmount *hfsmp) +{ + // First make sure some other process isn't freezing + hfs_lock_mount(hfsmp); + while (hfsmp->hfs_freeze_state != HFS_THAWED) { + if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex, + PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) { + hfs_unlock_mount(hfsmp); + return EINTR; + } + } + + // Stop new syncers from starting + hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE; + + // Now wait for all syncers to finish + while (hfsmp->hfs_syncers) { + if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex, + PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) { + hfs_thaw_locked(hfsmp); + hfs_unlock_mount(hfsmp); + return EINTR; + } + } + hfs_unlock_mount(hfsmp); + + // flush things before we get started to try and prevent + // dirty data from being paged out while we're frozen. + // note: we can't do this once we're in the freezing state because + // other threads will need to take the global lock + vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL); + + // Block everything in hfs_lock_global now + hfs_lock_mount(hfsmp); + hfsmp->hfs_freeze_state = HFS_FREEZING; + hfsmp->hfs_freezing_thread = current_thread(); + hfs_unlock_mount(hfsmp); + + /* Take the exclusive lock to flush out anything else that + might have the global lock at the moment and also so we + can flush the journal. */ + hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK); + journal_flush(hfsmp->jnl, JOURNAL_WAIT_FOR_IO); + hfs_unlock_global(hfsmp); + + // don't need to iterate on all vnodes, we just need to + // wait for writes to the system files and the device vnode + // + // Now that journal flush waits for all metadata blocks to + // be written out, waiting for btree writes is probably no + // longer required. + if (HFSTOVCB(hfsmp)->extentsRefNum) + vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3"); + if (HFSTOVCB(hfsmp)->catalogRefNum) + vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4"); + if (HFSTOVCB(hfsmp)->allocationsRefNum) + vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5"); + if (hfsmp->hfs_attribute_vp) + vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6"); + vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7"); + + // We're done, mark frozen + hfs_lock_mount(hfsmp); + hfsmp->hfs_freeze_state = HFS_FROZEN; + hfsmp->hfs_freezing_proc = current_proc(); + hfs_unlock_mount(hfsmp); + + return 0; +} + +__private_extern__ +int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process) +{ + hfs_lock_mount(hfsmp); + + if (hfsmp->hfs_freeze_state != HFS_FROZEN) { + hfs_unlock_mount(hfsmp); + return EINVAL; + } + if (process && hfsmp->hfs_freezing_proc != process) { + hfs_unlock_mount(hfsmp); + return EPERM; + } + + hfs_thaw_locked(hfsmp); + + hfs_unlock_mount(hfsmp); + + return 0; +} + +static void hfs_thaw_locked(struct hfsmount *hfsmp) +{ + hfsmp->hfs_freezing_proc = NULL; + hfsmp->hfs_freeze_state = HFS_THAWED; + + wakeup(&hfsmp->hfs_freeze_state); +}