X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/3a60a9f5b85abb8c2cf24e1926c5c7b3f608a5e2..0b4c1975fb5e4eccf1012a35081f7e7799b81046:/bsd/hfs/hfs_vfsops.c?ds=sidebyside diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c index 7ebe8aff7..aaac6d0df 100644 --- a/bsd/hfs/hfs_vfsops.c +++ b/bsd/hfs/hfs_vfsops.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 1999-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2010 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * Copyright (c) 1991, 1993, 1994 @@ -69,6 +75,7 @@ #include #include +#include #include #include #include @@ -79,6 +86,7 @@ #include #include #include +#include #include @@ -87,6 +95,9 @@ #include #include +#include +#include + #include "hfs.h" #include "hfs_catalog.h" #include "hfs_cnode.h" @@ -98,27 +109,24 @@ #include "hfscommon/headers/FileMgrInternal.h" #include "hfscommon/headers/BTreesInternal.h" - #if HFS_DIAGNOSTIC int hfs_dbg_all = 0; int hfs_dbg_err = 0; #endif +/* Enable/disable debugging code for live volume resizing */ +int hfs_resize_debug = 0; lck_grp_attr_t * hfs_group_attr; lck_attr_t * hfs_lock_attr; lck_grp_t * hfs_mutex_group; lck_grp_t * hfs_rwlock_group; - extern struct vnodeopv_desc hfs_vnodeop_opv_desc; +extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc; -extern void hfs_converterinit(void); - -extern void inittodr(time_t base); - -extern int hfs_write_access(struct vnode *, kauth_cred_t, struct proc *, Boolean); - +/* not static so we can re-use in hfs_readwrite.c for build_path calls */ +int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args); static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context); @@ -127,8 +135,8 @@ static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush); static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp); static int hfs_init(struct vfsconf *vfsp); static int hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context); -static int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, vfs_context_t context); -static int hfs_reload(struct mount *mp, kauth_cred_t cred, struct proc *p); +static int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context); +static int hfs_reload(struct mount *mp); static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context); static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context); static int hfs_start(struct mount *mp, int flags, vfs_context_t context); @@ -137,10 +145,11 @@ static int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context); static int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen, vfs_context_t context); static int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context); -static int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context); -static int hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk); +static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context); +static int hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t fileID); +static int hfs_journal_replay(vnode_t devvp, vfs_context_t context); /* @@ -155,8 +164,8 @@ hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context) ExtendedVCB *vcb; struct vfsstatfs *vfsp; int error; - - if ((error = hfs_mountfs(rvp, mp, NULL, context))) + + if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) return (error); /* Init hfsmp */ @@ -192,19 +201,19 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte struct hfsmount *hfsmp = NULL; struct hfs_mount_args args; int retval = E_NONE; - uint32_t cmdflags; + u_int32_t cmdflags; if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) { return (retval); } - cmdflags = (uint32_t)vfs_flags(mp) & MNT_CMDFLAGS; + cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS; if (cmdflags & MNT_UPDATE) { hfsmp = VFSTOHFS(mp); /* Reload incore data after an fsck. */ if (cmdflags & MNT_RELOAD) { if (vfs_isrdonly(mp)) - return hfs_reload(mp, vfs_context_ucred(context), p); + return hfs_reload(mp); else return (EINVAL); } @@ -214,19 +223,37 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte vfs_isrdonly(mp)) { int flags; + /* Set flag to indicate that a downgrade to read-only + * is in progress and therefore block any further + * modifications to the file system. + */ + hfs_global_exclusive_lock_acquire(hfsmp); + hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE; + hfsmp->hfs_downgrading_proc = current_thread(); + hfs_global_exclusive_lock_release(hfsmp); + /* use VFS_SYNC to push out System (btree) files */ retval = VFS_SYNC(mp, MNT_WAIT, context); - if (retval && ((cmdflags & MNT_FORCE) == 0)) + if (retval && ((cmdflags & MNT_FORCE) == 0)) { + hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE; + hfsmp->hfs_downgrading_proc = NULL; goto out; + } flags = WRITECLOSE; if (cmdflags & MNT_FORCE) flags |= FORCECLOSE; - if ((retval = hfs_flushfiles(mp, flags, p))) + if ((retval = hfs_flushfiles(mp, flags, p))) { + hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE; + hfsmp->hfs_downgrading_proc = NULL; goto out; - hfsmp->hfs_flags |= HFS_READ_ONLY; + } + + /* mark the volume cleanly unmounted */ + hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask; retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); + hfsmp->hfs_flags |= HFS_READ_ONLY; /* also get the volume bitmap blocks */ if (!retval) { @@ -239,6 +266,8 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte } } if (retval) { + hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE; + hfsmp->hfs_downgrading_proc = NULL; hfsmp->hfs_flags &= ~HFS_READ_ONLY; goto out; } @@ -254,13 +283,22 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte hfs_global_exclusive_lock_release(hfsmp); } + + hfsmp->hfs_downgrading_proc = NULL; } /* Change to a writable file system. */ if (vfs_iswriteupgrade(mp)) { - retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); - if (retval != E_NONE) + + /* + * On inconsistent disks, do not allow read-write mount + * unless it is the boot volume being mounted. + */ + if (!(vfs_flags(mp) & MNT_ROOTFS) && + (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) { + retval = EINVAL; goto out; + } // If the journal was shut-down previously because we were // asked to be read-only, let's start it back up again now @@ -282,7 +320,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset, hfsmp->jnl_size, hfsmp->hfs_devvp, - hfsmp->hfs_phys_block_size, + hfsmp->hfs_logical_block_size, jflags, 0, hfs_sync_metadata, hfsmp->hfs_mp); @@ -298,20 +336,46 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte } - /* Only clear HFS_READ_ONLY after a successfull write */ + /* See if we need to erase unused Catalog nodes due to . */ + retval = hfs_erase_unused_nodes(hfsmp); + if (retval != E_NONE) + goto out; + + /* Only clear HFS_READ_ONLY after a successful write */ hfsmp->hfs_flags &= ~HFS_READ_ONLY; - if (!(hfsmp->hfs_flags & (HFS_READ_ONLY & HFS_STANDARD))) { - /* setup private/hidden directory for unlinked files */ - FindMetaDataDirectory(HFSTOVCB(hfsmp)); + /* If this mount point was downgraded from read-write + * to read-only, clear that information as we are now + * moving back to read-write. + */ + hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE; + hfsmp->hfs_downgrading_proc = NULL; + + /* mark the volume dirty (clear clean unmount bit) */ + hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask; + + retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); + if (retval != E_NONE) + goto out; + + if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) { + /* Setup private/hidden directories for hardlinks. */ + hfs_privatedir_init(hfsmp, FILE_HARDLINKS); + hfs_privatedir_init(hfsmp, DIR_HARDLINKS); + hfs_remove_orphans(hfsmp); /* * Allow hot file clustering if conditions allow. */ - if (hfsmp->hfs_flags & HFS_METADATA_ZONE) { + if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && + ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0)) { (void) hfs_recording_init(hfsmp); } + /* Force ACLs on HFS+ file systems. */ + if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) { + vfs_setextendedsecurity(HFSTOVFS(hfsmp)); + } } } @@ -321,9 +385,9 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte } else /* not an update request */ { /* Set the mount flag to indicate that we support volfs */ - vfs_setflags(mp, (uint64_t)((unsigned int)MNT_DOVOLFS)); + vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS)); - retval = hfs_mountfs(devvp, mp, &args, context); + retval = hfs_mountfs(devvp, mp, &args, 0, context); } out: if (retval == 0) { @@ -348,13 +412,18 @@ hfs_changefs_callback(struct vnode *vp, void *cargs) struct cat_desc cndesc; struct cat_attr cnattr; struct hfs_changefs_cargs *args; + int lockflags; + int error; args = (struct hfs_changefs_cargs *)cargs; cp = VTOC(vp); vcb = HFSTOVCB(args->hfsmp); - if (cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL)) { + lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL); + hfs_systemfile_unlock(args->hfsmp, lockflags); + if (error) { /* * If we couldn't find this guy skip to the next one */ @@ -381,7 +450,7 @@ hfs_changefs_callback(struct vnode *vp, void *cargs) replace_desc(cp, &cndesc); if (cndesc.cd_cnid == kHFSRootFolderID) { - strncpy(vcb->vcbVN, cp->c_desc.cd_nameptr, NAME_MAX); + strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1); cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding; } } else { @@ -400,14 +469,16 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) ExtendedVCB *vcb; hfs_to_unicode_func_t get_unicode_func; unicode_to_hfs_func_t get_hfsname_func; - u_long old_encoding = 0; + u_int32_t old_encoding = 0; struct hfs_changefs_cargs cargs; - uint32_t mount_flags; + u_int32_t mount_flags; hfsmp = VFSTOHFS(mp); vcb = HFSTOVCB(hfsmp); mount_flags = (unsigned int)vfs_flags(mp); + hfsmp->hfs_flags |= HFS_IN_CHANGEFS; + permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) && ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) || (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) && @@ -415,8 +486,9 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) /* The root filesystem must operate with actual permissions: */ if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) { - vfs_clearflags(mp, (uint64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS)); /* Just say "No". */ - return EINVAL; + vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS)); /* Just say "No". */ + retval = EINVAL; + goto exit; } if (mount_flags & MNT_UNKNOWNPERMISSIONS) hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS; @@ -425,6 +497,14 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) namefix = permfix = 0; + /* + * Tracking of hot files requires up-to-date access times. So if + * access time updates are disabled, we must also disable hot files. + */ + if (mount_flags & MNT_NOATIME) { + (void) hfs_recording_suspend(hfsmp); + } + /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */ if (args->hfs_timezone.tz_minuteswest != VNOVAL) { gTimeZone = args->hfs_timezone; @@ -454,7 +534,7 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) /* Change the hfs encoding value (hfs only) */ if ((vcb->vcbSigWord == kHFSSigWord) && - (args->hfs_encoding != (u_long)VNOVAL) && + (args->hfs_encoding != (u_int32_t)VNOVAL) && (hfsmp->hfs_encoding != args->hfs_encoding)) { retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func); @@ -494,8 +574,9 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) * * hfs_changefs_callback will be called for each vnode * hung off of this mount point - * the vnode will be - * properly referenced and unreferenced around the callback + * + * The vnode will be properly referenced and unreferenced + * around the callback */ cargs.hfsmp = hfsmp; cargs.namefix = namefix; @@ -515,14 +596,13 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) (void) hfs_relconverter(old_encoding); } exit: + hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS; return (retval); } struct hfs_reload_cargs { struct hfsmount *hfsmp; - kauth_cred_t cred; - struct proc *p; int error; }; @@ -531,6 +611,7 @@ hfs_reload_callback(struct vnode *vp, void *cargs) { struct cnode *cp; struct hfs_reload_cargs *args; + int lockflags; args = (struct hfs_reload_cargs *)cargs; /* @@ -548,15 +629,19 @@ hfs_reload_callback(struct vnode *vp, void *cargs) /* * Re-read cnode data for all active vnodes (non-metadata files). */ - if (!VNODE_IS_RSRC(vp)) { + if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp)) { struct cat_fork *datafork; struct cat_desc desc; datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL; /* lookup by fileID since name could have changed */ - if ((args->error = cat_idlookup(args->hfsmp, cp->c_fileid, &desc, &cp->c_attr, datafork))) + lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, &desc, &cp->c_attr, datafork); + hfs_systemfile_unlock(args->hfsmp, lockflags); + if (args->error) { return (VNODE_RETURNED_DONE); + } /* update cnode's catalog descriptor */ (void) replace_desc(cp, &desc); @@ -579,11 +664,10 @@ hfs_reload_callback(struct vnode *vp, void *cargs) * re-read cnode data for all active vnodes. */ static int -hfs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p) +hfs_reload(struct mount *mountp) { register struct vnode *devvp; struct buf *bp; - int sectorsize; int error, i; struct hfsmount *hfsmp; struct HFSPlusVolumeHeader *vhp; @@ -591,7 +675,7 @@ hfs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p) struct filefork *forkp; struct cat_desc cndesc; struct hfs_reload_cargs args; - int lockflags; + daddr64_t priIDSector; hfsmp = VFSTOHFS(mountp); vcb = HFSTOVCB(hfsmp); @@ -607,8 +691,6 @@ hfs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p) panic("hfs_reload: dirty1"); args.hfsmp = hfsmp; - args.cred = cred; - args.p = p; args.error = 0; /* * hfs_reload_callback will be called for each vnode @@ -617,9 +699,7 @@ hfs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p) * the vnode will be in an 'unbusy' state (VNODE_WAIT) and * properly referenced and unreferenced around the callback */ - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args); - hfs_systemfile_unlock(hfsmp, lockflags); if (args.error) return (args.error); @@ -627,18 +707,19 @@ hfs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p) /* * Re-read VolumeHeader from disk. */ - sectorsize = hfsmp->hfs_phys_block_size; + priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + + HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size)); error = (int)buf_meta_bread(hfsmp->hfs_devvp, - (daddr64_t)((vcb->hfsPlusIOPosOffset / sectorsize) + HFS_PRI_SECTOR(sectorsize)), - sectorsize, NOCRED, &bp); + HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &bp); if (error) { if (bp != NULL) buf_brelse(bp); return (error); } - vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize)); + vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size)); /* Do a quick sanity check */ if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord && @@ -659,7 +740,7 @@ hfs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p) vcb->vcbWrCnt = SWAP_BE32 (vhp->writeCount); vcb->vcbFilCnt = SWAP_BE32 (vhp->fileCount); vcb->vcbDirCnt = SWAP_BE32 (vhp->folderCount); - vcb->nextAllocation = SWAP_BE32 (vhp->nextAllocation); + HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation)); vcb->totalBlocks = SWAP_BE32 (vhp->totalBlocks); vcb->freeBlocks = SWAP_BE32 (vhp->freeBlocks); vcb->encodingsBitmap = SWAP_BE64 (vhp->encodingsBitmap); @@ -737,14 +818,15 @@ hfs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p) } /* Reload the volume name */ - if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, &cndesc, NULL, NULL))) + if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, NULL, NULL))) return (error); vcb->volumeNameEncodingHint = cndesc.cd_encoding; bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen)); cat_releasedesc(&cndesc); - /* Re-establish private/hidden directory for unlinked files */ - FindMetaDataDirectory(vcb); + /* Re-establish private/hidden directories. */ + hfs_privatedir_init(hfsmp, FILE_HARDLINKS); + hfs_privatedir_init(hfsmp, DIR_HARDLINKS); /* In case any volume information changed to trigger a notification */ hfs_generate_volume_notifications(hfsmp); @@ -753,29 +835,161 @@ hfs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p) } + +static void +hfs_syncer(void *arg0, void *unused) +{ +#pragma unused(unused) + + struct hfsmount *hfsmp = arg0; + clock_sec_t secs; + clock_usec_t usecs; + uint32_t delay = HFS_META_DELAY; + uint64_t now; + static int no_max=1; + + clock_get_calendar_microtime(&secs, &usecs); + now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + + // + // If the amount of pending writes is more than our limit, wait + // for 2/3 of it to drain and then flush the journal. + // + if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) { + int counter=0; + uint64_t pending_io, start, rate; + + no_max = 0; + + hfs_start_transaction(hfsmp); // so we hold off any new i/o's + + pending_io = hfsmp->hfs_mp->mnt_pending_write_size; + + clock_get_calendar_microtime(&secs, &usecs); + start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + + while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) { + tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10); + } + + if (counter >= 500) { + printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size); + } + + if (hfsmp->jnl) { + journal_flush(hfsmp->jnl); + } else { + hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel()); + } + + clock_get_calendar_microtime(&secs, &usecs); + now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + hfsmp->hfs_last_sync_time = now; + rate = ((pending_io * 1000000ULL) / (now - start)); // yields bytes per second + + hfs_end_transaction(hfsmp); + + // + // If a reasonable amount of time elapsed then check the + // i/o rate. If it's taking less than 1 second or more + // than 2 seconds, adjust hfs_max_pending_io so that we + // will allow about 1.5 seconds of i/o to queue up. + // + if ((now - start) >= 300000) { + uint64_t scale = (pending_io * 100) / rate; + + if (scale < 100 || scale > 200) { + // set it so that it should take about 1.5 seconds to drain + hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL; + } + } + + } else if ( ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL) + || (((now - hfsmp->hfs_last_sync_time) >= 100000LL) + && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL) + && (hfsmp->hfs_active_threads == 0) + && (hfsmp->hfs_global_lock_nesting == 0))) { + + // + // Flush the journal if more than 5 seconds elapsed since + // the last sync OR we have not sync'ed recently and the + // last sync request time was more than 100 milliseconds + // ago and no one is in the middle of a transaction right + // now. Else we defer the sync and reschedule it. + // + if (hfsmp->jnl) { + lck_rw_lock_shared(&hfsmp->hfs_global_lock); + + journal_flush(hfsmp->jnl); + + lck_rw_unlock_shared(&hfsmp->hfs_global_lock); + } else { + hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel()); + } + + clock_get_calendar_microtime(&secs, &usecs); + now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + hfsmp->hfs_last_sync_time = now; + + } else if (hfsmp->hfs_active_threads == 0) { + uint64_t deadline; + + clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline); + thread_call_enter_delayed(hfsmp->hfs_syncer, deadline); + + // note: we intentionally return early here and do not + // decrement the sync_scheduled and sync_incomplete + // variables because we rescheduled the timer. + + return; + } + + // + // NOTE: we decrement these *after* we're done the journal_flush() since + // it can take a significant amount of time and so we don't want more + // callbacks scheduled until we're done this one. + // + OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled); + OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete); + wakeup((caddr_t)&hfsmp->hfs_sync_incomplete); +} + + +extern int IOBSDIsMediaEjectable( const char *cdev_name ); + /* * Common code for mount and mountroot */ static int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, - vfs_context_t context) + int journal_replay_only, vfs_context_t context) { struct proc *p = vfs_context_proc(context); int retval = E_NONE; - struct hfsmount *hfsmp; + struct hfsmount *hfsmp = NULL; struct buf *bp; dev_t dev; - HFSMasterDirectoryBlock *mdbp; + HFSMasterDirectoryBlock *mdbp = NULL; int ronly; +#if QUOTA int i; +#endif int mntwrapper; kauth_cred_t cred; u_int64_t disksize; - daddr64_t blkcnt; - u_int32_t blksize; + daddr64_t log_blkcnt; + u_int32_t log_blksize; + u_int32_t phys_blksize; u_int32_t minblksize; u_int32_t iswritable; daddr64_t mdb_offset; + int isvirtual = 0; + int isroot = 0; + + if (args == NULL) { + /* only hfs_mountroot passes us NULL as the 'args' argument */ + isroot = 1; + } ronly = vfs_isrdonly(mp); dev = vnode_specrdev(devvp); @@ -790,13 +1004,37 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, /* Advisory locking should be handled at the VFS layer */ vfs_setlocklocal(mp); - /* Get the real physical block size. */ - if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, context)) { + /* Get the logical block size (treated as physical block size everywhere) */ + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) { + retval = ENXIO; + goto error_exit; + } + if (log_blksize == 0 || log_blksize > 1024*1024*1024) { + printf("hfs: logical block size 0x%x looks bad. Not mounting.\n", log_blksize); + retval = ENXIO; + goto error_exit; + } + + /* Get the physical block size. */ + retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context); + if (retval) { + if ((retval != ENOTSUP) && (retval != ENOTTY)) { + retval = ENXIO; + goto error_exit; + } + /* If device does not support this ioctl, assume that physical + * block size is same as logical block size + */ + phys_blksize = log_blksize; + } + if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) { + printf("hfs: physical block size 0x%x looks bad. Not mounting.\n", phys_blksize); retval = ENXIO; goto error_exit; } + /* Switch to 512 byte sectors (temporarily) */ - if (blksize > 512) { + if (log_blksize > 512) { u_int32_t size512 = 512; if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) { @@ -805,31 +1043,50 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, } } /* Get the number of 512 byte physical blocks. */ - if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) { + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) { + /* resetting block size may fail if getting block count did */ + (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context); + retval = ENXIO; goto error_exit; } /* Compute an accurate disk size (i.e. within 512 bytes) */ - disksize = (u_int64_t)blkcnt * (u_int64_t)512; + disksize = (u_int64_t)log_blkcnt * (u_int64_t)512; /* * On Tiger it is not necessary to switch the device * block size to be 4k if there are more than 31-bits * worth of blocks but to insure compatibility with * pre-Tiger systems we have to do it. + * + * If the device size is not a multiple of 4K (8 * 512), then + * switching the logical block size isn't going to help because + * we will be unable to write the alternate volume header. + * In this case, just leave the logical block size unchanged. */ - if (blkcnt > (u_int64_t)0x000000007fffffff) { - minblksize = blksize = 4096; + if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) { + minblksize = log_blksize = 4096; + if (phys_blksize < log_blksize) + phys_blksize = log_blksize; } - /* Now switch to our prefered physical block size. */ - if (blksize > 512) { - if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, context)) { + /* + * The cluster layer is not currently prepared to deal with a logical + * block size larger than the system's page size. (It can handle + * blocks per page, but not multiple pages per block.) So limit the + * logical block size to the page size. + */ + if (log_blksize > PAGE_SIZE) + log_blksize = PAGE_SIZE; + + /* Now switch to our preferred physical block size. */ + if (log_blksize > 512) { + if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) { retval = ENXIO; goto error_exit; } /* Get the count of physical blocks. */ - if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) { + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) { retval = ENXIO; goto error_exit; } @@ -837,22 +1094,34 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, /* * At this point: * minblksize is the minimum physical block size - * blksize has our prefered physical block size - * blkcnt has the total number of physical blocks + * log_blksize has our preferred physical block size + * log_blkcnt has the total number of physical blocks */ - mdb_offset = (daddr64_t)HFS_PRI_SECTOR(blksize); - if ((retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp))) { + mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize); + if ((retval = (int)buf_meta_bread(devvp, + HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)), + phys_blksize, cred, &bp))) { goto error_exit; } MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK); - bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(blksize), mdbp, kMDBSize); + if (mdbp == NULL) { + retval = ENOMEM; + goto error_exit; + } + bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize); buf_brelse(bp); bp = NULL; MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK); + if (hfsmp == NULL) { + retval = ENOMEM; + goto error_exit; + } bzero(hfsmp, sizeof(struct hfsmount)); + hfs_chashinit_finish(hfsmp); + /* * Init the volume information structure */ @@ -866,15 +1135,21 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, hfsmp->hfs_mp = mp; /* Make VFSTOHFS work */ hfsmp->hfs_raw_dev = vnode_specrdev(devvp); hfsmp->hfs_devvp = devvp; - hfsmp->hfs_phys_block_size = blksize; - hfsmp->hfs_phys_block_count = blkcnt; + vnode_ref(devvp); /* Hold a ref on the device, dropped when hfsmp is freed. */ + hfsmp->hfs_logical_block_size = log_blksize; + hfsmp->hfs_logical_block_count = log_blkcnt; + hfsmp->hfs_physical_block_size = phys_blksize; + hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize); hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA; if (ronly) hfsmp->hfs_flags |= HFS_READ_ONLY; if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS; + +#if QUOTA for (i = 0; i < MAXQUOTAS; i++) dqfileinit(&hfsmp->hfs_qfiles[i]); +#endif if (args) { hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid; @@ -915,32 +1190,48 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, } // record the current time at which we're mounting this volume - { - struct timeval tv; - microtime(&tv); - hfsmp->hfs_mount_time = tv.tv_sec; - } + struct timeval tv; + microtime(&tv); + hfsmp->hfs_mount_time = tv.tv_sec; /* Mount a standard HFS disk */ if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) && (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) { + + /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */ + if (vfs_isrdwr(mp)) { + retval = EROFS; + goto error_exit; + } + /* Treat it as if it's read-only and not writeable */ + hfsmp->hfs_flags |= HFS_READ_ONLY; + hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA; + + /* If only journal replay is requested, exit immediately */ + if (journal_replay_only) { + retval = 0; + goto error_exit; + } + if ((vfs_flags(mp) & MNT_ROOTFS)) { retval = EINVAL; /* Cannot root from HFS standard disks */ goto error_exit; } /* HFS disks can only use 512 byte physical blocks */ - if (blksize > kHFSBlockSize) { - blksize = kHFSBlockSize; - if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, context)) { + if (log_blksize > kHFSBlockSize) { + log_blksize = kHFSBlockSize; + if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) { retval = ENXIO; goto error_exit; } - if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) { + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) { retval = ENXIO; goto error_exit; } - hfsmp->hfs_phys_block_size = blksize; - hfsmp->hfs_phys_block_count = blkcnt; + hfsmp->hfs_logical_block_size = log_blksize; + hfsmp->hfs_logical_block_count = log_blkcnt; + hfsmp->hfs_physical_block_size = log_blksize; + hfsmp->hfs_log_per_phys = 1; } if (args) { hfsmp->hfs_encoding = args->hfs_encoding; @@ -976,37 +1267,43 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, * block size so everything will line up on a block * boundary. */ - if ((embeddedOffset % blksize) != 0) { - printf("HFS Mount: embedded volume offset not" + if ((embeddedOffset % log_blksize) != 0) { + printf("hfs_mountfs: embedded volume offset not" " a multiple of physical block size (%d);" - " switching to 512\n", blksize); - blksize = 512; + " switching to 512\n", log_blksize); + log_blksize = 512; if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, - (caddr_t)&blksize, FWRITE, context)) { + (caddr_t)&log_blksize, FWRITE, context)) { retval = ENXIO; goto error_exit; } if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, - (caddr_t)&blkcnt, 0, context)) { + (caddr_t)&log_blkcnt, 0, context)) { retval = ENXIO; goto error_exit; } /* Note: relative block count adjustment */ - hfsmp->hfs_phys_block_count *= - hfsmp->hfs_phys_block_size / blksize; - hfsmp->hfs_phys_block_size = blksize; + hfsmp->hfs_logical_block_count *= + hfsmp->hfs_logical_block_size / log_blksize; + + /* Update logical /physical block size */ + hfsmp->hfs_logical_block_size = log_blksize; + hfsmp->hfs_physical_block_size = log_blksize; + phys_blksize = log_blksize; + hfsmp->hfs_log_per_phys = 1; } disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) * (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz); - hfsmp->hfs_phys_block_count = disksize / blksize; + hfsmp->hfs_logical_block_count = disksize / log_blksize; - mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize)); - retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp); + mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize)); + retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), + phys_blksize, cred, &bp); if (retval) goto error_exit; - bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(blksize), mdbp, 512); + bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512); buf_brelse(bp); bp = NULL; vhp = (HFSPlusVolumeHeader*) mdbp; @@ -1016,11 +1313,29 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, vhp = (HFSPlusVolumeHeader*) mdbp; } + /* + * On inconsistent disks, do not allow read-write mount + * unless it is the boot volume being mounted. We also + * always want to replay the journal if the journal_replay_only + * flag is set because that will (most likely) get the + * disk into a consistent state before fsck_hfs starts + * looking at it. + */ + if ( !(vfs_flags(mp) & MNT_ROOTFS) + && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask) + && !journal_replay_only + && !(hfsmp->hfs_flags & HFS_READ_ONLY)) { + retval = EINVAL; + goto error_exit; + } + + // XXXdbg // hfsmp->jnl = NULL; hfsmp->jvp = NULL; - if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) && args->journal_disable) { + if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) && + args->journal_disable) { jnl_disable = 1; } @@ -1040,9 +1355,17 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, // if we're able to init the journal, mark the mount // point as journaled. // - if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) { - vfs_setflags(mp, (uint64_t)((unsigned int)MNT_JOURNALED)); + if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) { + vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); } else { + if (retval == EROFS) { + // EROFS is a special error code that means the volume has an external + // journal which we couldn't find. in that case we do not want to + // rewrite the volume header - we'll just refuse to mount the volume. + retval = EINVAL; + goto error_exit; + } + // if the journal failed to open, then set the lastMountedVersion // to be "FSK!" which fsck_hfs will see and force the fsck instead // of just bailing out because the volume is journaled. @@ -1052,13 +1375,15 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, hfsmp->hfs_flags |= HFS_NEED_JNL_RESET; if (mdb_offset == 0) { - mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize)); + mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize)); } bp = NULL; - retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp); + retval = (int)buf_meta_bread(devvp, + HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), + phys_blksize, cred, &bp); if (retval == 0) { - jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(blksize)); + jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize)); if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) { printf ("hfs(1): Journal replay fail. Writing lastMountVersion as FSK!\n"); @@ -1087,6 +1412,15 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, } // XXXdbg + /* Either the journal is replayed successfully, or there + * was nothing to replay, or no journal exists. In any case, + * return success. + */ + if (journal_replay_only) { + retval = 0; + goto error_exit; + } + (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname); retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred); @@ -1094,29 +1428,30 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, * If the backend didn't like our physical blocksize * then retry with physical blocksize of 512. */ - if ((retval == ENXIO) && (blksize > 512) && (blksize != minblksize)) { - printf("HFS Mount: could not use physical block size " - "(%d) switching to 512\n", blksize); - blksize = 512; - if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, context)) { + if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) { + printf("hfs_mountfs: could not use physical block size " + "(%d) switching to 512\n", log_blksize); + log_blksize = 512; + if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) { retval = ENXIO; goto error_exit; } - if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) { + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) { retval = ENXIO; goto error_exit; } - devvp->v_specsize = blksize; + devvp->v_specsize = log_blksize; /* Note: relative block count adjustment (in case this is an embedded volume). */ - hfsmp->hfs_phys_block_count *= hfsmp->hfs_phys_block_size / blksize; - hfsmp->hfs_phys_block_size = blksize; + hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize; + hfsmp->hfs_logical_block_size = log_blksize; + hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize; - if (hfsmp->jnl) { + if (hfsmp->jnl && hfsmp->jvp == devvp) { // close and re-open this with the new block size journal_close(hfsmp->jnl); hfsmp->jnl = NULL; if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) { - vfs_setflags(mp, (uint64_t)((unsigned int)MNT_JOURNALED)); + vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); } else { // if the journal failed to open, then set the lastMountedVersion // to be "FSK!" which fsck_hfs will see and force the fsck instead @@ -1127,13 +1462,14 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, hfsmp->hfs_flags |= HFS_NEED_JNL_RESET; if (mdb_offset == 0) { - mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize)); + mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize)); } bp = NULL; - retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp); + retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), + phys_blksize, cred, &bp); if (retval == 0) { - jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(blksize)); + jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize)); if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) { printf ("hfs(2): Journal replay fail. Writing lastMountVersion as FSK!\n"); @@ -1179,19 +1515,31 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, mp->mnt_vfsstat.f_fsid.val[0] = (long)dev; mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp); vfs_setmaxsymlen(mp, 0); - mp->mnt_vtable->vfc_threadsafe = TRUE; + mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR; +#if NAMEDSTREAMS + mp->mnt_kern_flag |= MNTK_NAMED_STREAMS; +#endif + if (!(hfsmp->hfs_flags & HFS_STANDARD)) { + /* Tell VFS that we support directory hard links. */ + mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS; + } else { + /* HFS standard doesn't support extended readdir! */ + mp->mnt_vtable->vfc_vfsflags &= ~VFC_VFSREADDIR_EXTENDED; + } if (args) { /* * Set the free space warning levels for a non-root volume: * - * Set the lower freespace limit (the level that will trigger a warning) - * to 5% of the volume size or 250MB, whichever is less, and the desired - * level (which will cancel the alert request) to 1/2 above that limit. - * Start looking for free space to drop below this level and generate a - * warning immediately if needed: + * Set the "danger" limit to 1% of the volume size or 100MB, whichever + * is less. Set the "warning" limit to 2% of the volume size or 150MB, + * whichever is less. And last, set the "desired" freespace level to + * to 3% of the volume size or 200MB, whichever is less. */ + hfsmp->hfs_freespace_notify_dangerlimit = + MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, + (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION); hfsmp->hfs_freespace_notify_warninglimit = MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION); @@ -1202,10 +1550,14 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, /* * Set the free space warning levels for the root volume: * - * Set the lower freespace limit (the level that will trigger a warning) - * to 1% of the volume size or 50MB, whichever is less, and the desired - * level (which will cancel the alert request) to 2% or 75MB, whichever is less. + * Set the "danger" limit to 5% of the volume size or 125MB, whichever + * is less. Set the "warning" limit to 10% of the volume size or 250MB, + * whichever is less. And last, set the "desired" freespace level to + * to 11% of the volume size or 375MB, whichever is less. */ + hfsmp->hfs_freespace_notify_dangerlimit = + MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, + (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION); hfsmp->hfs_freespace_notify_warninglimit = MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION); @@ -1214,13 +1566,33 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION); }; + /* Check if the file system exists on virtual device, like disk image */ + if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) { + if (isvirtual) { + hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE; + } + } + + /* do not allow ejectability checks on the root device */ + if (isroot == 0) { + if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 && + IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) { + hfsmp->hfs_max_pending_io = 4096*1024; // a reasonable value to start with. + hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp); + if (hfsmp->hfs_syncer == NULL) { + printf("hfs: failed to allocate syncer thread callback for %s (%s)\n", + mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname); + } + } + } + /* * Start looking for free space to drop below this level and generate a * warning immediately if needed: */ hfsmp->hfs_notification_conditions = 0; hfs_generate_volume_notifications(hfsmp); - + if (ronly == 0) { (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); } @@ -1234,10 +1606,16 @@ error_exit: FREE(mdbp, M_TEMP); if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { - (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, context); + vnode_clearmountedon(hfsmp->jvp); + (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel()); hfsmp->jvp = NULL; } if (hfsmp) { + if (hfsmp->hfs_devvp) { + vnode_rele(hfsmp->hfs_devvp); + } + hfs_delete_chash(hfsmp); + FREE(hfsmp, M_HFSMNT); vfs_setfsprivate(mp, NULL); } @@ -1283,12 +1661,56 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) if (hfsmp->hfs_flags & HFS_METADATA_ZONE) (void) hfs_recording_suspend(hfsmp); + /* + * Cancel any pending timers for this volume. Then wait for any timers + * which have fired, but whose callbacks have not yet completed. + */ + if (hfsmp->hfs_syncer) + { + struct timespec ts = {0, 100000000}; /* 0.1 seconds */ + + /* + * Cancel any timers that have been scheduled, but have not + * fired yet. NOTE: The kernel considers a timer complete as + * soon as it starts your callback, so the kernel does not + * keep track of the number of callbacks in progress. + */ + if (thread_call_cancel(hfsmp->hfs_syncer)) + OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete); + thread_call_free(hfsmp->hfs_syncer); + hfsmp->hfs_syncer = NULL; + + /* + * This waits for all of the callbacks that were entered before + * we did thread_call_cancel above, but have not completed yet. + */ + while(hfsmp->hfs_sync_incomplete > 0) + { + msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts); + } + + if (hfsmp->hfs_sync_incomplete < 0) + panic("hfs_unmount: pm_sync_incomplete underflow!\n"); + } + /* * Flush out the b-trees, volume bitmap and Volume Header */ if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) { - hfs_start_transaction(hfsmp); - started_tr = 1; + retval = hfs_start_transaction(hfsmp); + if (retval == 0) { + started_tr = 1; + } else if (!force) { + goto err_exit; + } + + if (hfsmp->hfs_startup_vp) { + (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK); + retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p); + hfs_unlock(VTOC(hfsmp->hfs_startup_vp)); + if (retval && !force) + goto err_exit; + } if (hfsmp->hfs_attribute_vp) { (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK); @@ -1323,16 +1745,33 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) if (retval && !force) goto err_exit; } -#if 0 - /* See if this volume is damaged, is so do not unmount cleanly */ - if (HFSTOVCB(hfsmp)->vcbFlags & kHFS_DamagedVolume) { + + /* If runtime corruption was detected, indicate that the volume + * was not unmounted cleanly. + */ + if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) { HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask; } else { HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask; } -#else - HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask; -#endif + + if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { + int i; + u_int32_t min_start = hfsmp->totalBlocks; + + // set the nextAllocation pointer to the smallest free block number + // we've seen so on the next mount we won't rescan unnecessarily + for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) { + if (hfsmp->vcbFreeExt[i].startBlock < min_start) { + min_start = hfsmp->vcbFreeExt[i].startBlock; + } + } + if (min_start < hfsmp->nextAllocation) { + hfsmp->nextAllocation = min_start; + } + } + + retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); if (retval) { HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask; @@ -1340,12 +1779,14 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) goto err_exit; /* could not flush everything */ } - hfs_end_transaction(hfsmp); - started_tr = 0; + if (started_tr) { + hfs_end_transaction(hfsmp); + started_tr = 0; + } } if (hfsmp->jnl) { - journal_flush(hfsmp->jnl); + hfs_journal_flush(hfsmp); } /* @@ -1370,9 +1811,10 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context); if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { + vnode_clearmountedon(hfsmp->jvp); retval = VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, - context); + vfs_context_kernel()); vnode_put(hfsmp->jvp); hfsmp->jvp = NULL; } @@ -1390,6 +1832,9 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) } #endif /* HFS_SPARSE_DEV */ lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group); + vnode_rele(hfsmp->hfs_devvp); + + hfs_delete_chash(hfsmp); FREE(hfsmp, M_HFSMNT); return (0); @@ -1415,16 +1860,20 @@ hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t contex /* * Do operations associated with quotas */ +#if !QUOTA +static int +hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context) +{ + return (ENOTSUP); +} +#else static int hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context) { struct proc *p = vfs_context_proc(context); int cmd, type, error; -#if !QUOTA - return (ENOTSUP); -#else - if (uid == -1) + if (uid == ~0U) uid = vfs_context_ucred(context)->cr_ruid; cmd = cmds >> SUBCMDSHIFT; @@ -1484,8 +1933,8 @@ hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t vfs_unbusy(mp); return (error); -#endif /* QUOTA */ } +#endif /* QUOTA */ /* Subtype is composite of bits */ #define HFS_SUBTYPE_JOURNALED 0x01 @@ -1501,18 +1950,18 @@ hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_contex { ExtendedVCB *vcb = VFSTOVCB(mp); struct hfsmount *hfsmp = VFSTOHFS(mp); - u_long freeCNIDs; - uint16_t subtype = 0; + u_int32_t freeCNIDs; + u_int16_t subtype = 0; - freeCNIDs = (u_long)0xFFFFFFFF - (u_long)vcb->vcbNxtCNID; + freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID; - sbp->f_bsize = (uint32_t)vcb->blockSize; - sbp->f_iosize = (size_t)(MAX_UPL_TRANSFER * PAGE_SIZE); - sbp->f_blocks = (uint64_t)((unsigned long)vcb->totalBlocks); - sbp->f_bfree = (uint64_t)((unsigned long )hfs_freeblks(hfsmp, 0)); - sbp->f_bavail = (uint64_t)((unsigned long )hfs_freeblks(hfsmp, 1)); - sbp->f_files = (uint64_t)((unsigned long )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */ - sbp->f_ffree = (uint64_t)((unsigned long )(MIN(freeCNIDs, sbp->f_bavail))); + sbp->f_bsize = (u_int32_t)vcb->blockSize; + sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0); + sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks); + sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0)); + sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1)); + sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */ + sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail))); /* * Subtypes (flavors) for HFS @@ -1554,19 +2003,21 @@ hfs_sync_metadata(void *arg) struct hfsmount *hfsmp; ExtendedVCB *vcb; buf_t bp; - int sectorsize, retval; + int retval; daddr64_t priIDSector; hfsmp = VFSTOHFS(mp); vcb = HFSTOVCB(hfsmp); // now make sure the super block is flushed - sectorsize = hfsmp->hfs_phys_block_size; - priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / sectorsize) + - HFS_PRI_SECTOR(sectorsize)); - retval = (int)buf_meta_bread(hfsmp->hfs_devvp, priIDSector, sectorsize, NOCRED, &bp); - if (retval != 0) { - panic("hfs: sync_metadata: can't read super-block?! (retval 0x%x, priIDSector)\n", - retval, priIDSector); + priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + + HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size)); + + retval = (int)buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &bp); + if ((retval != 0 ) && (retval != ENXIO)) { + printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n", + (int)priIDSector, retval); } if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) { @@ -1580,7 +2031,9 @@ hfs_sync_metadata(void *arg) // hfs_btreeio.c:FlushAlternate() should flag when it was // written... if (hfsmp->hfs_alt_id_sector) { - retval = (int)buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &bp); + retval = (int)buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &bp); if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) { buf_bwrite(bp); } else if (bp) { @@ -1645,14 +2098,14 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) int error, allerror = 0; struct hfs_sync_cargs args; + hfsmp = VFSTOHFS(mp); + /* - * During MNT_UPDATE hfs_changefs might be manipulating - * vnodes so back off + * hfs_changefs might be manipulating vnodes so back off */ - if (((uint32_t)vfs_flags(mp)) & MNT_UPDATE) /* XXX MNT_UPDATE may not be visible here */ + if (hfsmp->hfs_flags & HFS_IN_CHANGEFS) return (0); - hfsmp = VFSTOHFS(mp); if (hfsmp->hfs_flags & HFS_READ_ONLY) return (EROFS); @@ -1660,7 +2113,7 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync)) return 0; - args.cred = vfs_context_proc(context); + args.cred = kauth_cred_get(); args.waitfor = waitfor; args.p = p; args.error = 0; @@ -1725,11 +2178,11 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) hfs_qsync(mp); #endif /* QUOTA */ - hfs_hotfilesync(hfsmp, p); + hfs_hotfilesync(hfsmp, vfs_context_kernel()); + /* * Write back modified superblock. */ - if (IsVCBDirty(vcb)) { error = hfs_flushvolumeheader(hfsmp, waitfor, 0); if (error) @@ -1737,7 +2190,17 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) } if (hfsmp->jnl) { - journal_flush(hfsmp->jnl); + hfs_journal_flush(hfsmp); + } + + { + clock_sec_t secs; + clock_usec_t usecs; + uint64_t now; + + clock_get_calendar_microtime(&secs, &usecs); + now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + hfsmp->hfs_last_sync_time = now; } lck_rw_unlock_shared(&hfsmp->hfs_insync); @@ -1756,7 +2219,7 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) * those rights via. exflagsp and credanonp */ static int -hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context) +hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context) { struct hfsfid *hfsfhp; struct vnode *nvp; @@ -1765,32 +2228,29 @@ hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, *vpp = NULL; hfsfhp = (struct hfsfid *)fhp; - if (fhlen < sizeof(struct hfsfid)) + if (fhlen < (int)sizeof(struct hfsfid)) return (EINVAL); - result = hfs_vget(VFSTOHFS(mp), hfsfhp->hfsfid_cnid, &nvp, 0); + result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0); if (result) { if (result == ENOENT) result = ESTALE; return result; } - - /* The createtime can be changed by hfs_setattr or hfs_setattrlist. - * For NFS, we are assuming that only if the createtime was moved - * forward would it mean the fileID got reused in that session by - * wrapping. We don't have a volume ID or other unique identifier to - * to use here for a generation ID across reboots, crashes where - * metadata noting lastFileID didn't make it to disk but client has - * it, or volume erasures where fileIDs start over again. Lastly, - * with HFS allowing "wraps" of fileIDs now, this becomes more - * error prone. Future, would be change the "wrap bit" to a unique - * wrap number and use that for generation number. For now do this. - */ - if ((hfsfhp->hfsfid_gen < VTOC(nvp)->c_itime)) { - hfs_unlock(VTOC(nvp)); - vnode_put(nvp); - return (ESTALE); - } + + /* + * We used to use the create time as the gen id of the file handle, + * but it is not static enough because it can change at any point + * via system calls. We still don't have another volume ID or other + * unique identifier to use for a generation ID across reboots that + * persists until the file is removed. Using only the CNID exposes + * us to the potential wrap-around case, but as of 2/2008, it would take + * over 2 months to wrap around if the machine did nothing but allocate + * CNIDs. Using some kind of wrap counter would only be effective if + * each file had the wrap counter associated with it. For now, + * we use only the CNID to identify the file as it's good enough. + */ + *vpp = nvp; hfs_unlock(VTOC(nvp)); @@ -1803,7 +2263,7 @@ hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, */ /* ARGSUSED */ static int -hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context) +hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context) { struct cnode *cp; struct hfsfid *hfsfhp; @@ -1816,8 +2276,9 @@ hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t cont cp = VTOC(vp); hfsfhp = (struct hfsfid *)fhp; - hfsfhp->hfsfid_cnid = cp->c_fileid; - hfsfhp->hfsfid_gen = cp->c_itime; + /* only the CNID is used to identify the file now */ + hfsfhp->hfsfid_cnid = htonl(cp->c_fileid); + hfsfhp->hfsfid_gen = htonl(cp->c_fileid); *fhlenp = sizeof(struct hfsfid); return (0); @@ -1837,9 +2298,6 @@ hfs_init(__unused struct vfsconf *vfsp) done = 1; hfs_chashinit(); hfs_converterinit(); -#if QUOTA - dqinit(); -#endif /* QUOTA */ BTReserveSetup(); @@ -1848,18 +2306,16 @@ hfs_init(__unused struct vfsconf *vfsp) hfs_group_attr = lck_grp_attr_alloc_init(); hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr); hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr); - - /* Turn on lock debugging */ - //lck_attr_setdebug(hfs_lock_attr); - + +#if HFS_COMPRESSION + decmpfs_init(); +#endif return (0); } static int -hfs_getmountpoint(vp, hfsmpp) - struct vnode *vp; - struct hfsmount **hfsmpp; +hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp) { struct hfsmount * hfsmp; char fstypename[MFSNAMELEN]; @@ -1871,7 +2327,7 @@ hfs_getmountpoint(vp, hfsmpp) return (EINVAL); vnode_vfsname(vp, fstypename); - if (strcmp(fstypename, "hfs") != 0) + if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0) return (EINVAL); hfsmp = VTOHFS(vp); @@ -1901,7 +2357,7 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, /* all sysctl names at this level are terminal */ if (name[0] == HFS_ENCODINGBIAS) { - u_int32_t bias; + int bias; bias = hfs_getencodingbias(); error = sysctl_int(oldp, oldlenp, newp, newlen, &bias); @@ -1911,13 +2367,13 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, } else if (name[0] == HFS_EXTEND_FS) { u_int64_t newsize; - vnode_t vp = p->p_fd->fd_cdir; + vnode_t vp = vfs_context_cwd(context); - if (newp == USER_ADDR_NULL || vp == NULL) + if (newp == USER_ADDR_NULL || vp == NULLVP) return (EINVAL); if ((error = hfs_getmountpoint(vp, &hfsmp))) return (error); - error = sysctl_quad(oldp, oldlenp, newp, newlen, &newsize); + error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize); if (error) return (error); @@ -1928,29 +2384,45 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, size_t bufsize; size_t bytes; u_int32_t hint; - u_int16_t *unicode_name; - char *filename; + u_int16_t *unicode_name = NULL; + char *filename = NULL; + + if ((newlen <= 0) || (newlen > MAXPATHLEN)) + return (EINVAL); bufsize = MAX(newlen * 3, MAXPATHLEN); MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK); + if (filename == NULL) { + error = ENOMEM; + goto encodinghint_exit; + } MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK); + if (filename == NULL) { + error = ENOMEM; + goto encodinghint_exit; + } error = copyin(newp, (caddr_t)filename, newlen); if (error == 0) { - error = utf8_decodestr(filename, newlen - 1, unicode_name, + error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name, &bytes, bufsize, 0, UTF_DECOMPOSED); if (error == 0) { hint = hfs_pickencoding(unicode_name, bytes / 2); - error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, &hint); + error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint); } } - FREE(unicode_name, M_TEMP); - FREE(filename, M_TEMP); + +encodinghint_exit: + if (unicode_name) + FREE(unicode_name, M_TEMP); + if (filename) + FREE(filename, M_TEMP); return (error); } else if (name[0] == HFS_ENABLE_JOURNALING) { // make the file system journaled... - struct vnode *vp = p->p_fd->fd_cdir, *jvp; + vnode_t vp = vfs_context_cwd(context); + vnode_t jvp; ExtendedVCB *vcb; struct cat_attr jnl_attr, jinfo_attr; struct cat_fork jnl_fork, jinfo_fork; @@ -1961,7 +2433,7 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, if (!is_suser()) { return (EPERM); } - if (vp == NULL) + if (vp == NULLVP) return EINVAL; hfsmp = VTOHFS(vp); @@ -1974,7 +2446,7 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, } if (hfsmp->jnl) { - printf("hfs: volume @ mp 0x%x is already journaled!\n", vnode_mount(vp)); + printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp)); return EAGAIN; } @@ -2001,13 +2473,21 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n", (off_t)name[2], (off_t)name[3]); + // + // XXXdbg - note that currently (Sept, 08) hfs_util does not support + // enabling the journal on a separate device so it is safe + // to just copy hfs_devvp here. If hfs_util gets the ability + // to dynamically enable the journal on a separate device then + // we will have to do the same thing as hfs_early_journal_init() + // to locate and open the journal device. + // jvp = hfsmp->hfs_devvp; jnl = journal_create(jvp, (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset, (off_t)((unsigned)name[3]), hfsmp->hfs_devvp, - hfsmp->hfs_phys_block_size, + hfsmp->hfs_logical_block_size, 0, 0, hfs_sync_metadata, hfsmp->hfs_mp); @@ -2015,7 +2495,8 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, if (jnl == NULL) { printf("hfs: FAILED to create the journal!\n"); if (jvp && jvp != hfsmp->hfs_devvp) { - VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, context); + vnode_clearmountedon(jvp); + VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel()); } jvp = NULL; @@ -2024,6 +2505,16 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, hfs_global_exclusive_lock_acquire(hfsmp); + /* + * Flush all dirty metadata buffers. + */ + buf_flushdirtyblks(hfsmp->hfs_devvp, MNT_WAIT, 0, "hfs_sysctl"); + buf_flushdirtyblks(hfsmp->hfs_extents_vp, MNT_WAIT, 0, "hfs_sysctl"); + buf_flushdirtyblks(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, "hfs_sysctl"); + buf_flushdirtyblks(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, "hfs_sysctl"); + if (hfsmp->hfs_attribute_vp) + buf_flushdirtyblks(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, "hfs_sysctl"); + HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1]; HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask; hfsmp->jvp = jvp; @@ -2035,26 +2526,42 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid; hfsmp->hfs_jnlfileid = jnl_attr.ca_fileid; - vfs_setflags(hfsmp->hfs_mp, (uint64_t)((unsigned int)MNT_JOURNALED)); + vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); hfs_global_exclusive_lock_release(hfsmp); hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1); + { + fsid_t fsid; + + fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev; + fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp)); + vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL); + } return 0; } else if (name[0] == HFS_DISABLE_JOURNALING) { // clear the journaling bit - struct vnode *vp = p->p_fd->fd_cdir; + vnode_t vp = vfs_context_cwd(context); /* Only root can disable journaling */ if (!is_suser()) { return (EPERM); } - if (vp == NULL) + if (vp == NULLVP) return EINVAL; hfsmp = VTOHFS(vp); - printf("hfs: disabling journaling for mount @ 0x%x\n", vnode_mount(vp)); + /* + * Disabling journaling is disallowed on volumes with directory hard links + * because we have not tested the relevant code path. + */ + if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){ + printf("hfs: cannot disable journaling on volumes with directory hardlinks\n"); + return EPERM; + } + + printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp)); hfs_global_exclusive_lock_acquire(hfsmp); @@ -2063,10 +2570,12 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, hfsmp->jnl = NULL; if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { - VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, context); + vnode_clearmountedon(hfsmp->jvp); + VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel()); + vnode_put(hfsmp->jvp); } hfsmp->jvp = NULL; - vfs_clearflags(hfsmp->hfs_mp, (uint64_t)((unsigned int)MNT_JOURNALED)); + vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); hfsmp->jnl_start = 0; hfsmp->hfs_jnlinfoblkid = 0; hfsmp->hfs_jnlfileid = 0; @@ -2076,14 +2585,25 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, hfs_global_exclusive_lock_release(hfsmp); hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1); + { + fsid_t fsid; + + fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev; + fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp)); + vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL); + } return 0; } else if (name[0] == HFS_GET_JOURNAL_INFO) { - struct vnode *vp = p->p_fd->fd_cdir; + vnode_t vp = vfs_context_cwd(context); off_t jnl_start, jnl_size; - if (vp == NULL) + if (vp == NULLVP) return EINVAL; + /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */ + if (proc_is64bit(current_proc())) + return EINVAL; + hfsmp = VTOHFS(vp); if (hfsmp->jnl == NULL) { jnl_start = 0; @@ -2103,47 +2623,97 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, return 0; } else if (name[0] == HFS_SET_PKG_EXTENSIONS) { - return set_package_extensions_table((void *)name[1], name[2], name[3]); + return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]); } else if (name[0] == VFS_CTL_QUERY) { struct sysctl_req *req; - struct vfsidctl vc; - struct user_vfsidctl user_vc; + union union_vfsidctl vc; struct mount *mp; struct vfsquery vq; - boolean_t is_64_bit; - is_64_bit = proc_is64bit(p); req = CAST_DOWN(struct sysctl_req *, oldp); /* we're new style vfs sysctl. */ - if (is_64_bit) { - error = SYSCTL_IN(req, &user_vc, sizeof(user_vc)); - if (error) return (error); - - mp = vfs_getvfs(&user_vc.vc_fsid); - } - else { - error = SYSCTL_IN(req, &vc, sizeof(vc)); - if (error) return (error); - - mp = vfs_getvfs(&vc.vc_fsid); - } + error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32)); + if (error) return (error); + + mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */ if (mp == NULL) return (ENOENT); hfsmp = VFSTOHFS(mp); bzero(&vq, sizeof(vq)); vq.vq_flags = hfsmp->hfs_notification_conditions; return SYSCTL_OUT(req, &vq, sizeof(vq));; - }; - - return (ENOTSUP); -} - - -static int -hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context) + } else if (name[0] == HFS_REPLAY_JOURNAL) { + vnode_t devvp = NULL; + int device_fd; + if (namelen != 2) { + return (EINVAL); + } + device_fd = name[1]; + error = file_vnode(device_fd, &devvp); + if (error) { + return error; + } + error = vnode_getwithref(devvp); + if (error) { + file_drop(device_fd); + return error; + } + error = hfs_journal_replay(devvp, context); + file_drop(device_fd); + vnode_put(devvp); + return error; + } + + return (ENOTSUP); +} + +/* + * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support + * the build_path ioctl. We use it to leverage the code below that updates + * the origin list cache if necessary + */ + +int +hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context) { - return hfs_vget(VFSTOHFS(mp), (cnid_t)ino, vpp, 1); + int error; + int lockflags; + struct hfsmount *hfsmp; + + hfsmp = VFSTOHFS(mp); + + error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1); + if (error) + return (error); + + /* + * ADLs may need to have their origin state updated + * since build_path needs a valid parent. The same is true + * for hardlinked files as well. There isn't a race window here + * in re-acquiring the cnode lock since we aren't pulling any data + * out of the cnode; instead, we're going to the catalog. + */ + if ((VTOC(*vpp)->c_flag & C_HARDLINK) && + (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) { + cnode_t *cp = VTOC(*vpp); + struct cat_desc cdesc; + + if (!hfs_haslinkorigin(cp)) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = cat_findname(hfsmp, (cnid_t)ino, &cdesc); + hfs_systemfile_unlock(hfsmp, lockflags); + if (error == 0) { + if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) && + (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) { + hfs_savelinkorigin(cp, cdesc.cd_parentcnid); + } + cat_releasedesc(&cdesc); + } + } + hfs_unlock(cp); + } + return (0); } @@ -2158,26 +2728,27 @@ __private_extern__ int hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock) { - struct vnode *vp = NULL; + struct vnode *vp = NULLVP; struct cat_desc cndesc; struct cat_attr cnattr; struct cat_fork cnfork; - struct componentname cn; + u_int32_t linkref = 0; int error; /* Check for cnids that should't be exported. */ - if ((cnid < kHFSFirstUserCatalogNodeID) - && (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) + if ((cnid < kHFSFirstUserCatalogNodeID) && + (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) { return (ENOENT); - - /* Don't export HFS Private Data dir. */ - if (cnid == hfsmp->hfs_privdir_desc.cd_cnid) + } + /* Don't export our private directories. */ + if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid || + cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) { return (ENOENT); - + } /* * Check the hash first */ - vp = hfs_chash_getvnode(hfsmp->hfs_raw_dev, cnid, 0, skiplock); + vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock); if (vp) { *vpp = vp; return(0); @@ -2193,21 +2764,24 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock) if (cnid == kHFSRootParentID) { static char hfs_rootname[] = "/"; - cndesc.cd_nameptr = &hfs_rootname[0]; + cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0]; cndesc.cd_namelen = 1; cndesc.cd_parentcnid = kHFSRootParentID; cndesc.cd_cnid = kHFSRootFolderID; cndesc.cd_flags = CD_ISDIR; cnattr.ca_fileid = kHFSRootFolderID; - cnattr.ca_nlink = 2; + cnattr.ca_linkcount = 1; cnattr.ca_entries = 1; + cnattr.ca_dircount = 1; cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO); } else { int lockflags; + cnid_t pid; + const char *nameptr; lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_idlookup(hfsmp, cnid, &cndesc, &cnattr, &cnfork); + error = cat_idlookup(hfsmp, cnid, 0, &cndesc, &cnattr, &cnfork); hfs_systemfile_unlock(hfsmp, lockflags); if (error) { @@ -2215,43 +2789,91 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock) return (error); } - /* Hide open files that have been deleted */ - if ((hfsmp->hfs_privdir_desc.cd_cnid != 0) && - (cndesc.cd_parentcnid == hfsmp->hfs_privdir_desc.cd_cnid)) { - // XXXdbg - if this is a hardlink, we could call - // hfs_chash_snoop() to see if there is - // already a cnode and vnode present for - // this fileid. however I'd rather not - // risk it at this point in Tiger. - cat_releasedesc(&cndesc); - error = ENOENT; + /* + * Check for a raw hardlink inode and save its linkref. + */ + pid = cndesc.cd_parentcnid; + nameptr = (const char *)cndesc.cd_nameptr; + + if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) && + (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) { + linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10); + + } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) && + (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) { + linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10); + + } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) && + (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) { *vpp = NULL; - return (error); + cat_releasedesc(&cndesc); + return (ENOENT); /* open unlinked file */ } } /* - * Supply hfs_getnewvnode with a component name. - */ - MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); - cn.cn_nameiop = LOOKUP; - cn.cn_flags = ISLASTCN | HASBUF; - cn.cn_context = NULL; - cn.cn_pnlen = MAXPATHLEN; - cn.cn_nameptr = cn.cn_pnbuf; - cn.cn_namelen = cndesc.cd_namelen; - cn.cn_hash = 0; - cn.cn_consume = 0; - bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1); - - /* XXX should we supply the parent as well... ? */ - error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr, &cnfork, &vp); - FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI); + * Finish initializing cnode descriptor for hardlinks. + * + * We need a valid name and parent for reverse lookups. + */ + if (linkref) { + cnid_t nextlinkid; + cnid_t prevlinkid; + struct cat_desc linkdesc; + int lockflags; + + cnattr.ca_linkref = linkref; + + /* + * Pick up the first link in the chain and get a descriptor for it. + * This allows blind volfs paths to work for hardlinks. + */ + if ((hfs_lookuplink(hfsmp, linkref, &prevlinkid, &nextlinkid) == 0) && + (nextlinkid != 0)) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = cat_findname(hfsmp, nextlinkid, &linkdesc); + hfs_systemfile_unlock(hfsmp, lockflags); + if (error == 0) { + cat_releasedesc(&cndesc); + bcopy(&linkdesc, &cndesc, sizeof(linkdesc)); + } + } + } + + if (linkref) { + error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cnfork, &vp); + if (error == 0) { + VTOC(vp)->c_flag |= C_HARDLINK; + vnode_setmultipath(vp); + } + } else { + struct componentname cn; + + /* Supply hfs_getnewvnode with a component name. */ + MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + cn.cn_nameiop = LOOKUP; + cn.cn_flags = ISLASTCN | HASBUF; + cn.cn_context = NULL; + cn.cn_pnlen = MAXPATHLEN; + cn.cn_nameptr = cn.cn_pnbuf; + cn.cn_namelen = cndesc.cd_namelen; + cn.cn_hash = 0; + cn.cn_consume = 0; + bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1); + + error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr, &cnfork, &vp); + if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) { + hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid); + } + FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI); + } cat_releasedesc(&cndesc); + *vpp = vp; - if (vp && skiplock) + if (vp && skiplock) { hfs_unlock(VTOC(vp)); + } return (error); } @@ -2260,13 +2882,19 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock) * Flush out all the files in a filesystem. */ static int +#if QUOTA hfs_flushfiles(struct mount *mp, int flags, struct proc *p) +#else +hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p) +#endif /* QUOTA */ { struct hfsmount *hfsmp; struct vnode *skipvp = NULLVP; + int error; +#if QUOTA int quotafilecnt; int i; - int error; +#endif hfsmp = VFSTOHFS(mp); @@ -2286,7 +2914,7 @@ hfs_flushfiles(struct mount *mp, int flags, struct proc *p) } /* Obtain the root vnode so we can skip over it. */ - skipvp = hfs_chash_getvnode(hfsmp->hfs_raw_dev, kHFSRootFolderID, 0, 0); + skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0); } #endif /* QUOTA */ @@ -2336,7 +2964,7 @@ hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding) #define kIndexMacUkrainian 48 /* MacUkrainian encoding is 152 */ #define kIndexMacFarsi 49 /* MacFarsi encoding is 140 */ - UInt32 index; + u_int32_t index; switch (encoding) { case kTextEncodingMacUkrainian: @@ -2350,10 +2978,10 @@ hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding) break; } - if (index < 64) { + if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) { HFS_MOUNT_LOCK(hfsmp, TRUE) hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index); - hfsmp->vcbFlags |= 0xFF00; + MarkVCBDirty(hfsmp); HFS_MOUNT_UNLOCK(hfsmp, TRUE); } } @@ -2373,7 +3001,7 @@ hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot) lck_mtx_lock(&hfsmp->hfs_mutex); - hfsmp->vcbFlags |= 0xFF00; + MarkVCBDirty(hfsmp); hfsmp->hfs_mtime = tv.tv_sec; switch (op) { @@ -2426,7 +3054,7 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush) int sectorsize; ByteCount namelen; - sectorsize = hfsmp->hfs_phys_block_size; + sectorsize = hfsmp->hfs_logical_block_size; retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp); if (retval) { if (bp) @@ -2447,7 +3075,7 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush) mdb->drNxtCNID = SWAP_BE32 (vcb->vcbNxtCNID); mdb->drFreeBks = SWAP_BE16 (vcb->freeBlocks); - namelen = strlen(vcb->vcbVN); + namelen = strlen((char *)vcb->vcbVN); retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN); /* Retry with MacRoman in case that's how it was exported. */ if (retval) @@ -2521,65 +3149,95 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) { ExtendedVCB *vcb = HFSTOVCB(hfsmp); struct filefork *fp; - HFSPlusVolumeHeader *volumeHeader; + HFSPlusVolumeHeader *volumeHeader, *altVH; int retval; - struct buf *bp; + struct buf *bp, *alt_bp; int i; - int sectorsize; daddr64_t priIDSector; - int critical = 0; + int critical; u_int16_t signature; u_int16_t hfsversion; if (hfsmp->hfs_flags & HFS_READ_ONLY) { return(0); } - if (vcb->vcbSigWord == kHFSSigWord) + if (hfsmp->hfs_flags & HFS_STANDARD) { return hfs_flushMDB(hfsmp, waitfor, altflush); - - if (altflush) - critical = 1; - sectorsize = hfsmp->hfs_phys_block_size; - priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / sectorsize) + - HFS_PRI_SECTOR(sectorsize)); + } + critical = altflush; + priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + + HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size)); if (hfs_start_transaction(hfsmp) != 0) { return EINVAL; } - retval = (int)buf_meta_bread(hfsmp->hfs_devvp, priIDSector, sectorsize, NOCRED, &bp); - if (retval) { - if (bp) - buf_brelse(bp); - - hfs_end_transaction(hfsmp); - - printf("HFS: err %d reading VH blk (%s)\n", retval, vcb->vcbVN); - return (retval); - } + bp = NULL; + alt_bp = NULL; - if (hfsmp->jnl) { - journal_modify_block_start(hfsmp->jnl, bp); + retval = (int)buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &bp); + if (retval) { + printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN); + goto err_exit; } - volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize)); + volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) + + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size)); /* - * Sanity check what we just read. + * Sanity check what we just read. If it's bad, try the alternate + * instead. */ signature = SWAP_BE16 (volumeHeader->signature); hfsversion = SWAP_BE16 (volumeHeader->version); if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) || (hfsversion < kHFSPlusVersion) || (hfsversion > 100) || (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) { -#if 1 - panic("HFS: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d", + printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n", vcb->vcbVN, signature, hfsversion, - SWAP_BE32 (volumeHeader->blockSize)); -#endif - printf("HFS: corrupt VH blk (%s)\n", vcb->vcbVN); - buf_brelse(bp); - return (EIO); + SWAP_BE32 (volumeHeader->blockSize), + hfsmp->hfs_alt_id_sector ? "; trying alternate" : ""); + hfs_mark_volume_inconsistent(hfsmp); + + if (hfsmp->hfs_alt_id_sector) { + retval = buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &alt_bp); + if (retval) { + printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN); + goto err_exit; + } + + altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) + + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)); + signature = SWAP_BE16(altVH->signature); + hfsversion = SWAP_BE16(altVH->version); + + if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) || + (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) || + (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) { + printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n", + vcb->vcbVN, signature, hfsversion, + SWAP_BE32(altVH->blockSize)); + retval = EIO; + goto err_exit; + } + + /* The alternate is plausible, so use it. */ + bcopy(altVH, volumeHeader, kMDBSize); + buf_brelse(alt_bp); + alt_bp = NULL; + } else { + /* No alternate VH, nothing more we can do. */ + retval = EIO; + goto err_exit; + } + } + + if (hfsmp->jnl) { + journal_modify_block_start(hfsmp->jnl, bp); } /* @@ -2591,15 +3249,16 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) struct buf *bp2; HFSMasterDirectoryBlock *mdb; - retval = (int)buf_meta_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), - sectorsize, NOCRED, &bp2); + retval = (int)buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &bp2); if (retval) { if (bp2) buf_brelse(bp2); retval = 0; } else { mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) + - HFS_PRI_OFFSET(sectorsize)); + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size)); if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate ) { @@ -2610,7 +3269,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate); /* pick up the new create date */ if (hfsmp->jnl) { - journal_modify_block_end(hfsmp->jnl, bp2); + journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL); } else { (void) VNOP_BWRITE(bp2); /* write out the changes */ } @@ -2622,9 +3281,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) } } - if (1 /* hfsmp->jnl == 0 */) { - lck_mtx_lock(&hfsmp->hfs_mutex); - } + lck_mtx_lock(&hfsmp->hfs_mutex); /* Note: only update the lower 16 bits worth of attributes */ volumeHeader->attributes = SWAP_BE32 (vcb->vcbAtrb); @@ -2653,6 +3310,13 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) critical = 1; } + /* + * System files are only dirty when altflush is set. + */ + if (altflush == 0) { + goto done; + } + /* Sync Extents over-flow file meta data */ fp = VTOF(vcb->extentsRefNum); if (FTOC(fp)->c_flag & C_MODIFIED) { @@ -2713,25 +3377,42 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) volumeHeader->attributesFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); } - vcb->vcbFlags &= 0x00FF; - - if (1 /* hfsmp->jnl == 0 */) { - lck_mtx_unlock(&hfsmp->hfs_mutex); + /* Sync Startup file meta data */ + if (hfsmp->hfs_startup_vp) { + fp = VTOF(hfsmp->hfs_startup_vp); + if (FTOC(fp)->c_flag & C_MODIFIED) { + for (i = 0; i < kHFSPlusExtentDensity; i++) { + volumeHeader->startupFile.extents[i].startBlock = + SWAP_BE32 (fp->ff_extents[i].startBlock); + volumeHeader->startupFile.extents[i].blockCount = + SWAP_BE32 (fp->ff_extents[i].blockCount); + } + volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size); + volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); + volumeHeader->startupFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); + FTOC(fp)->c_flag &= ~C_MODIFIED; + } } +done: + MarkVCBClean(hfsmp); + lck_mtx_unlock(&hfsmp->hfs_mutex); + /* If requested, flush out the alternate volume header */ if (altflush && hfsmp->hfs_alt_id_sector) { - struct buf *alt_bp = NULL; - - if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &alt_bp) == 0) { + if (buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) { if (hfsmp->jnl) { journal_modify_block_start(hfsmp->jnl, alt_bp); } - bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize); + bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) + + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), + kMDBSize); if (hfsmp->jnl) { - journal_modify_block_end(hfsmp->jnl, alt_bp); + journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL); } else { (void) VNOP_BWRITE(alt_bp); } @@ -2740,7 +3421,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) } if (hfsmp->jnl) { - journal_modify_block_end(hfsmp->jnl, bp); + journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL); } else { if (waitfor != MNT_WAIT) buf_bawrite(bp); @@ -2756,6 +3437,14 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) hfs_end_transaction(hfsmp); return (retval); + +err_exit: + if (alt_bp) + buf_brelse(alt_bp); + if (bp) + buf_brelse(bp); + hfs_end_transaction(hfsmp); + return retval; } @@ -2780,11 +3469,15 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) u_int32_t addblks; u_int64_t sectorcnt; u_int32_t sectorsize; + u_int32_t phys_sectorsize; daddr64_t prev_alt_sector; daddr_t bitmapblks; - int lockflags; + int lockflags = 0; int error; - + int64_t oldBitmapSize; + Boolean usedExtendFileC = false; + int transaction_begun = 0; + devvp = hfsmp->hfs_devvp; vcb = HFSTOVCB(hfsmp); @@ -2823,7 +3516,7 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)§orsize, 0, context)) { return (ENXIO); } - if (sectorsize != hfsmp->hfs_phys_block_size) { + if (sectorsize != hfsmp->hfs_logical_block_size) { return (ENXIO); } if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)§orcnt, 0, context)) { @@ -2833,12 +3526,20 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) printf("hfs_extendfs: not enough space on device\n"); return (ENOSPC); } + error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sectorsize, 0, context); + if (error) { + if ((error != ENOTSUP) && (error != ENOTTY)) { + return (ENXIO); + } + /* If ioctl is not supported, force physical and logical sector size to be same */ + phys_sectorsize = sectorsize; + } oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; /* * Validate new size. */ - if ((newsize <= oldsize) || (newsize % sectorsize)) { + if ((newsize <= oldsize) || (newsize % sectorsize) || (newsize % phys_sectorsize)) { printf("hfs_extendfs: invalid size\n"); return (EINVAL); } @@ -2849,14 +3550,33 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) addblks = newblkcnt - vcb->totalBlocks; printf("hfs_extendfs: growing %s by %d blocks\n", vcb->vcbVN, addblks); + + HFS_MOUNT_LOCK(hfsmp, TRUE); + if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) { + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + error = EALREADY; + goto out; + } + hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS; + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + + /* Invalidate the current free extent cache */ + invalidate_free_extent_cache(hfsmp); + /* * Enclose changes inside a transaction. */ if (hfs_start_transaction(hfsmp) != 0) { - return (EINVAL); + error = EINVAL; + goto out; } + transaction_begun = 1; - lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + /* + * Note: we take the attributes lock in case we have an attribute data vnode + * which needs to change size. + */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); vp = vcb->allocationsRefNum; fp = VTOF(vp); bcopy(&fp->ff_data, &forkdata, sizeof(forkdata)); @@ -2864,7 +3584,8 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) /* * Calculate additional space required (if any) by allocation bitmap. */ - bitmapblks = roundup(newblkcnt / 8, vcb->vcbVBMIOSize) / vcb->blockSize; + oldBitmapSize = fp->ff_size; + bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize; if (bitmapblks > (daddr_t)fp->ff_blocks) bitmapblks -= fp->ff_blocks; else @@ -2873,26 +3594,59 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) if (bitmapblks > 0) { daddr64_t blkno; daddr_t blkcnt; + off_t bytesAdded; /* - * Add a new extent to the allocation bitmap file. + * Get the bitmap's current size (in allocation blocks) so we know + * where to start zero filling once the new space is added. We've + * got to do this before the bitmap is grown. */ - error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks); - if (error) { - printf("hfs_extendfs: error %d adding extents\n", error); - goto out; - } - blkcnt = bitmapblks; blkno = (daddr64_t)fp->ff_blocks; - fp->ff_blocks += bitmapblks; + + /* + * Try to grow the allocation file in the normal way, using allocation + * blocks already existing in the file system. This way, we might be + * able to grow the bitmap contiguously, or at least in the metadata + * zone. + */ + error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0, + kEFAllMask | kEFNoClumpMask | kEFReserveMask | kEFMetadataMask, + &bytesAdded); + + if (error == 0) { + usedExtendFileC = true; + } else { + /* + * If the above allocation failed, fall back to allocating the new + * extent of the bitmap from the space we're going to add. Since those + * blocks don't yet belong to the file system, we have to update the + * extent list directly, and manually adjust the file size. + */ + bytesAdded = 0; + error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks); + if (error) { + printf("hfs_extendfs: error %d adding extents\n", error); + goto out; + } + fp->ff_blocks += bitmapblks; + VTOC(vp)->c_blocks = fp->ff_blocks; + VTOC(vp)->c_flag |= C_MODIFIED; + } + + /* + * Update the allocation file's size to include the newly allocated + * blocks. Note that ExtendFileC doesn't do this, which is why this + * statement is outside the above "if" statement. + */ fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize; - VTOC(vp)->c_blocks = fp->ff_blocks; + /* * Zero out the new bitmap blocks. */ { bp = NULL; + blkcnt = bitmapblks; while (blkcnt > 0) { error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp); if (error) { @@ -2916,11 +3670,20 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) } /* * Mark the new bitmap space as allocated. + * + * Note that ExtendFileC will have marked any blocks it allocated, so + * this is only needed if we used AddFileExtent. Also note that this + * has to come *after* the zero filling of new blocks in the case where + * we used AddFileExtent (since the part of the bitmap we're touching + * is in those newly allocated blocks). */ - error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks); - if (error) { - printf("hfs_extendfs: error %d setting bitmap\n", error); - goto out; + if (!usedExtendFileC) { + error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks); + if (error) { + printf("hfs_extendfs: error %d setting bitmap\n", error); + goto out; + } + vcb->freeBlocks -= bitmapblks; } } /* @@ -2944,14 +3707,14 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) /* * Adjust file system variables for new space. */ - prev_phys_block_count = hfsmp->hfs_phys_block_count; + prev_phys_block_count = hfsmp->hfs_logical_block_count; prev_alt_sector = hfsmp->hfs_alt_id_sector; vcb->totalBlocks += addblks; - vcb->freeBlocks += addblks - bitmapblks; - hfsmp->hfs_phys_block_count = newsize / sectorsize; + vcb->freeBlocks += addblks; + hfsmp->hfs_logical_block_count = newsize / sectorsize; hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sectorsize) + - HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_phys_block_count); + HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_logical_block_count); MarkVCBDirty(vcb); error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); if (error) { @@ -2959,10 +3722,21 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) /* * Restore to old state. */ - fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize; + if (usedExtendFileC) { + (void) TruncateFileC(vcb, fp, oldBitmapSize, false); + } else { + fp->ff_blocks -= bitmapblks; + fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize; + /* + * No need to mark the excess blocks free since those bitmap blocks + * are no longer part of the bitmap. But we do need to undo the + * effect of the "vcb->freeBlocks -= bitmapblks" above. + */ + vcb->freeBlocks += bitmapblks; + } vcb->totalBlocks -= addblks; - vcb->freeBlocks -= addblks - bitmapblks; - hfsmp->hfs_phys_block_count = prev_phys_block_count; + vcb->freeBlocks -= addblks; + hfsmp->hfs_logical_block_count = prev_phys_block_count; hfsmp->hfs_alt_id_sector = prev_alt_sector; MarkVCBDirty(vcb); if (vcb->blockSize == 512) @@ -2976,17 +3750,44 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) */ bp = NULL; if (prev_alt_sector) { - if (buf_meta_bread(hfsmp->hfs_devvp, prev_alt_sector, sectorsize, - NOCRED, &bp) == 0) { + if (buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) { journal_modify_block_start(hfsmp->jnl, bp); - bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize); + bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize); - journal_modify_block_end(hfsmp->jnl, bp); + journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL); } else if (bp) { buf_brelse(bp); } } + + /* + * Update the metadata zone size based on current volume size + */ + hfs_metadatazone_init(hfsmp); + + /* + * Adjust the size of hfsmp->hfs_attrdata_vp + */ + if (hfsmp->hfs_attrdata_vp) { + struct cnode *attr_cp; + struct filefork *attr_fp; + + if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) { + attr_cp = VTOC(hfsmp->hfs_attrdata_vp); + attr_fp = VTOF(hfsmp->hfs_attrdata_vp); + + attr_cp->c_blocks = newblkcnt; + attr_fp->ff_blocks = newblkcnt; + attr_fp->ff_extents[0].blockCount = newblkcnt; + attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize; + ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size); + vnode_put(hfsmp->hfs_attrdata_vp); + } + } + out: if (error && fp) { /* Restore allocation fork. */ @@ -2994,8 +3795,21 @@ out: VTOC(vp)->c_blocks = fp->ff_blocks; } - hfs_systemfile_unlock(hfsmp, lockflags); - hfs_end_transaction(hfsmp); + /* + Regardless of whether or not the totalblocks actually increased, + we should reset the allocLimit field. If it changed, it will + get updated; if not, it will remain the same. + */ + HFS_MOUNT_LOCK(hfsmp, TRUE); + hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS; + hfsmp->allocLimit = vcb->totalBlocks; + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + if (lockflags) { + hfs_systemfile_unlock(hfsmp, lockflags); + } + if (transaction_begun) { + hfs_end_transaction(hfsmp); + } return (error); } @@ -3007,31 +3821,32 @@ out: */ __private_extern__ int -hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, __unused vfs_context_t context) +hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) { - struct vnode* rvp = NULL; struct buf *bp = NULL; u_int64_t oldsize; u_int32_t newblkcnt; - u_int32_t reclaimblks; + u_int32_t reclaimblks = 0; int lockflags = 0; int transaction_begun = 0; + Boolean updateFreeBlocks = false; int error; - /* - * Grab the root vnode to serialize with another hfs_truncatefs call. - */ - error = hfs_vget(hfsmp, kHFSRootFolderID, &rvp, 0); - if (error) { - return (error); + HFS_MOUNT_LOCK(hfsmp, TRUE); + if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) { + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + return (EALREADY); } + hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS; + hfsmp->hfs_resize_filesmoved = 0; + hfsmp->hfs_resize_totalfiles = 0; + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + /* - * - HFS Plus file systems only. - * - Journaling must be enabled. + * - Journaled HFS Plus volumes only. * - No embedded volumes. */ - if ((hfsmp->hfs_flags & HFS_STANDARD) || - (hfsmp->jnl == NULL) || + if ((hfsmp->jnl == NULL) || (hfsmp->hfsPlusIOPosOffset != 0)) { error = EPERM; goto out; @@ -3040,22 +3855,33 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, __unused vfs_context_t newblkcnt = newsize / hfsmp->blockSize; reclaimblks = hfsmp->totalBlocks - newblkcnt; + if (hfs_resize_debug) { + printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1)); + printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks); + } + /* Make sure new size is valid. */ if ((newsize < HFS_MIN_SIZE) || (newsize >= oldsize) || - (newsize % hfsmp->hfs_phys_block_size)) { + (newsize % hfsmp->hfs_logical_block_size) || + (newsize % hfsmp->hfs_physical_block_size)) { + printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize); error = EINVAL; goto out; } - /* Make sure there's enough space to work with. */ - if (reclaimblks > (hfsmp->freeBlocks / 4)) { + /* Make sure that the file system has enough free blocks reclaim */ + if (reclaimblks >= hfs_freeblks(hfsmp, 1)) { + printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1)); error = ENOSPC; goto out; } - - printf("hfs_truncatefs: shrinking %s by %d blocks out of %d\n", - hfsmp->vcbVN, reclaimblks, hfsmp->totalBlocks); - + + /* Invalidate the current free extent cache */ + invalidate_free_extent_cache(hfsmp); + + /* Start with a clean journal. */ + hfs_journal_flush(hfsmp); + if (hfs_start_transaction(hfsmp) != 0) { error = EINVAL; goto out; @@ -3063,9 +3889,44 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, __unused vfs_context_t transaction_begun = 1; /* - * Look for files that have blocks beyond newblkcnt. + * Prevent new allocations from using the part we're trying to truncate. + * + * NOTE: allocLimit is set to the allocation block number where the new + * alternate volume header will be. That way there will be no files to + * interfere with allocating the new alternate volume header, and no files + * in the allocation blocks beyond (i.e. the blocks we're trying to + * truncate away. + */ + HFS_MOUNT_LOCK(hfsmp, TRUE); + if (hfsmp->blockSize == 512) + hfsmp->allocLimit = newblkcnt - 2; + else + hfsmp->allocLimit = newblkcnt - 1; + /* + * Update the volume free block count to reflect the total number + * of free blocks that will exist after a successful resize. + * Relocation of extents will result in no net change in the total + * free space on the disk. Therefore the code that allocates + * space for new extent and deallocates the old extent explicitly + * prevents updating the volume free block count. It will also + * prevent false disk full error when the number of blocks in + * an extent being relocated is more than the free blocks that + * will exist after the volume is resized. + */ + hfsmp->freeBlocks -= reclaimblks; + updateFreeBlocks = true; + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + + /* + * Update the metadata zone size, and, if required, disable it + */ + hfs_metadatazone_init(hfsmp); + + /* + * Look for files that have blocks at or beyond the location of the + * new alternate volume header */ - if (hfs_isallocated(hfsmp, newblkcnt, reclaimblks - 1)) { + if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) { /* * hfs_reclaimspace will use separate transactions when * relocating files (so we don't overwhelm the journal). @@ -3074,8 +3935,9 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, __unused vfs_context_t transaction_begun = 0; /* Attempt to reclaim some space. */ - if (hfs_reclaimspace(hfsmp, newblkcnt) != 0) { - printf("hfs_truncatefs: couldn't reclaim space on %s\n", hfsmp->vcbVN); + error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context); + if (error != 0) { + printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error); error = ENOSPC; goto out; } @@ -3086,82 +3948,901 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, __unused vfs_context_t transaction_begun = 1; /* Check if we're clear now. */ - if (hfs_isallocated(hfsmp, newblkcnt, reclaimblks - 1)) { - printf("hfs_truncatefs: didn't reclaim enough space on %s\n", hfsmp->vcbVN); - error = ENOSPC; + error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks); + if (error != 0) { + printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error); + error = EAGAIN; /* tell client to try again */ goto out; } } - lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + + /* + * Note: we take the attributes lock in case we have an attribute data vnode + * which needs to change size. + */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); /* * Mark the old alternate volume header as free. * We don't bother shrinking allocation bitmap file. */ - if (hfsmp->blockSize == 512) + if (hfsmp->blockSize == 512) (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2); else (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1); /* - * Allocate last block for alternate volume header. + * Allocate last 1KB for alternate volume header. */ - if (hfsmp->blockSize == 512) - error = BlockMarkAllocated(hfsmp, newblkcnt - 2, 2); - else - error = BlockMarkAllocated(hfsmp, newblkcnt - 1, 1); - + error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1); if (error) { + printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error); goto out; } /* * Invalidate the existing alternate volume header. + * + * Don't include this in a transaction (don't call journal_modify_block) + * since this block will be outside of the truncated file system! */ if (hfsmp->hfs_alt_id_sector) { - if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, - hfsmp->hfs_phys_block_size, NOCRED, &bp) == 0) { - journal_modify_block_start(hfsmp->jnl, bp); - - bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_phys_block_size)), kMDBSize); - - journal_modify_block_end(hfsmp->jnl, bp); - } else if (bp) { - buf_brelse(bp); + error = buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &bp); + if (error == 0) { + bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize); + (void) VNOP_BWRITE(bp); + } else { + if (bp) { + buf_brelse(bp); + } } bp = NULL; } + /* Log successful shrinking. */ + printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n", + hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks); + /* * Adjust file system variables and flush them to disk. */ - hfsmp->freeBlocks -= hfsmp->totalBlocks - newblkcnt; hfsmp->totalBlocks = newblkcnt; - hfsmp->hfs_phys_block_count = newsize / hfsmp->hfs_phys_block_size; - hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_phys_block_size, hfsmp->hfs_phys_block_count); + hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size; + hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count); MarkVCBDirty(hfsmp); error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); if (error) panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error); + + /* + * Adjust the size of hfsmp->hfs_attrdata_vp + */ + if (hfsmp->hfs_attrdata_vp) { + struct cnode *cp; + struct filefork *fp; + + if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) { + cp = VTOC(hfsmp->hfs_attrdata_vp); + fp = VTOF(hfsmp->hfs_attrdata_vp); + + cp->c_blocks = newblkcnt; + fp->ff_blocks = newblkcnt; + fp->ff_extents[0].blockCount = newblkcnt; + fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize; + ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size); + vnode_put(hfsmp->hfs_attrdata_vp); + } + } + out: + lck_mtx_lock(&hfsmp->hfs_mutex); + if (error && (updateFreeBlocks == true)) + hfsmp->freeBlocks += reclaimblks; + hfsmp->allocLimit = hfsmp->totalBlocks; + if (hfsmp->nextAllocation >= hfsmp->allocLimit) + hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1; + hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS; + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + /* On error, reset the metadata zone for original volume size */ + if (error && (updateFreeBlocks == true)) { + hfs_metadatazone_init(hfsmp); + } + if (lockflags) { hfs_systemfile_unlock(hfsmp, lockflags); } if (transaction_begun) { hfs_end_transaction(hfsmp); + hfs_journal_flush(hfsmp); + /* Just to be sure, sync all data to the disk */ + (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); } - if (rvp) { - hfs_unlock(VTOC(rvp)); - vnode_put(rvp); - } + return (error); } + +/* + * Invalidate the physical block numbers associated with buffer cache blocks + * in the given extent of the given vnode. + */ +struct hfs_inval_blk_no { + daddr64_t sectorStart; + daddr64_t sectorCount; +}; +static int +hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in) +{ + daddr64_t blkno; + struct hfs_inval_blk_no *args; + + blkno = buf_blkno(bp); + args = args_in; + + if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount) + buf_setblkno(bp, buf_lblkno(bp)); + + return BUF_RETURNED; +} +static void +hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount) +{ + struct hfs_inval_blk_no args; + args.sectorStart = sectorStart; + args.sectorCount = sectorCount; + + buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args); +} + + +/* + * Copy the contents of an extent to a new location. Also invalidates the + * physical block number of any buffer cache block in the copied extent + * (so that if the block is written, it will go through VNOP_BLOCKMAP to + * determine the new physical block number). + */ +static int +hfs_copy_extent( + struct hfsmount *hfsmp, + struct vnode *vp, /* The file whose extent is being copied. */ + u_int32_t oldStart, /* The start of the source extent. */ + u_int32_t newStart, /* The start of the destination extent. */ + u_int32_t blockCount, /* The number of allocation blocks to copy. */ + vfs_context_t context) +{ + int err = 0; + size_t bufferSize; + void *buffer = NULL; + struct vfsioattr ioattr; + buf_t bp = NULL; + off_t resid; + size_t ioSize; + u_int32_t ioSizeSectors; /* Device sectors in this I/O */ + daddr64_t srcSector, destSector; + u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size; + + /* + * Sanity check that we have locked the vnode of the file we're copying. + * + * But since hfs_systemfile_lock() doesn't actually take the lock on + * the allocation file if a journal is active, ignore the check if the + * file being copied is the allocation file. + */ + struct cnode *cp = VTOC(vp); + if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread()) + panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp); + + /* + * Determine the I/O size to use + * + * NOTE: Many external drives will result in an ioSize of 128KB. + * TODO: Should we use a larger buffer, doing several consecutive + * reads, then several consecutive writes? + */ + vfs_ioattr(hfsmp->hfs_mp, &ioattr); + bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt); + if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize)) + return ENOMEM; + + /* Get a buffer for doing the I/O */ + bp = buf_alloc(hfsmp->hfs_devvp); + buf_setdataptr(bp, (uintptr_t)buffer); + + resid = (off_t) blockCount * (off_t) hfsmp->blockSize; + srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size; + destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size; + while (resid > 0) { + ioSize = MIN(bufferSize, (size_t) resid); + ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size; + + /* Prepare the buffer for reading */ + buf_reset(bp, B_READ); + buf_setsize(bp, ioSize); + buf_setcount(bp, ioSize); + buf_setblkno(bp, srcSector); + buf_setlblkno(bp, srcSector); + + /* Do the read */ + err = VNOP_STRATEGY(bp); + if (!err) + err = buf_biowait(bp); + if (err) { + printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err); + break; + } + + /* Prepare the buffer for writing */ + buf_reset(bp, B_WRITE); + buf_setsize(bp, ioSize); + buf_setcount(bp, ioSize); + buf_setblkno(bp, destSector); + buf_setlblkno(bp, destSector); + if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl)) + buf_markfua(bp); + + /* Do the write */ + vnode_startwrite(hfsmp->hfs_devvp); + err = VNOP_STRATEGY(bp); + if (!err) + err = buf_biowait(bp); + if (err) { + printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err); + break; + } + + resid -= ioSize; + srcSector += ioSizeSectors; + destSector += ioSizeSectors; + } + if (bp) + buf_free(bp); + if (buffer) + kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize); + + /* Make sure all writes have been flushed to disk. */ + if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) { + err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); + if (err) { + printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err); + err = 0; /* Don't fail the copy. */ + } + } + + if (!err) + hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock); + + return err; +} + + +static int +hfs_relocate_callback(__unused HFSPlusExtentKey *key, HFSPlusExtentRecord *record, HFSPlusExtentRecord *state) +{ + bcopy(state, record, sizeof(HFSPlusExtentRecord)); + return 0; +} + +/* + * Reclaim space at the end of a volume, used by a given file. + * + * This routine attempts to move any extent which contains allocation blocks + * at or after "startblk." A separate transaction is used to do the move. + * The contents of any moved extents are read and written via the volume's + * device vnode -- NOT via "vp." During the move, moved blocks which are part + * of a transaction have their physical block numbers invalidated so they will + * eventually be written to their new locations. + * + * Inputs: + * hfsmp The volume being resized. + * startblk Blocks >= this allocation block need to be moved. + * locks Which locks need to be taken for the given system file. + * vp The vnode for the system file. + * + * The caller of this function, hfs_reclaimspace(), grabs cnode lock + * for non-system files before calling this function. + * + * Outputs: + * blks_moved Total number of allocation blocks moved by this routine. + */ +static int +hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, int locks, u_int32_t *blks_moved, vfs_context_t context) +{ + int error; + int lockflags; + int i; + u_long datablks; + u_long end_block; + u_int32_t oldStartBlock; + u_int32_t newStartBlock; + u_int32_t oldBlockCount; + u_int32_t newBlockCount; + struct filefork *fp; + struct cnode *cp; + int is_sysfile; + int took_truncate_lock = 0; + struct BTreeIterator *iterator = NULL; + u_int8_t forktype; + u_int32_t fileID; + u_int32_t alloc_flags; + + /* If there is no vnode for this file, then there's nothing to do. */ + if (vp == NULL) + return 0; + + cp = VTOC(vp); + fileID = cp->c_cnid; + is_sysfile = vnode_issystem(vp); + forktype = VNODE_IS_RSRC(vp) ? 0xFF : 0; + + /* Flush all the buffer cache blocks and cluster pages associated with + * this vnode. + * + * If the current vnode is a system vnode, all the buffer cache blocks + * associated with it should already be sync'ed to the disk as part of + * journal flush in hfs_truncatefs(). Normally there should not be + * buffer cache blocks for regular files, but for objects like symlinks, + * we can have buffer cache blocks associated with the vnode. Therefore + * we call buf_flushdirtyblks() always. Resource fork data for directory + * hard links are directly written using buffer cache for device vnode, + * which should also be sync'ed as part of journal flush in hfs_truncatefs(). + * + * Flushing cluster pages should be the normal case for regular files, + * and really should not do anything for system files. But just to be + * sure that all blocks associated with this vnode is sync'ed to the + * disk, we call both buffer cache and cluster layer functions. + */ + buf_flushdirtyblks(vp, MNT_NOWAIT, 0, "hfs_reclaim_file"); + + if (!is_sysfile) { + /* The caller grabs cnode lock for non-system files only, therefore + * we unlock only non-system files before calling cluster layer. + */ + hfs_unlock(cp); + hfs_lock_truncate(cp, TRUE); + took_truncate_lock = 1; + } + (void) cluster_push(vp, 0); + if (!is_sysfile) { + error = hfs_lock(cp, HFS_FORCE_LOCK); + if (error) { + hfs_unlock_truncate(cp, TRUE); + return error; + } + + /* If the file no longer exists, nothing left to do */ + if (cp->c_flag & C_NOEXISTS) { + hfs_unlock_truncate(cp, TRUE); + return 0; + } + } + + /* Wait for any in-progress writes to this vnode to complete, so that we'll + * be copying consistent bits. (Otherwise, it's possible that an async + * write will complete to the old extent after we read from it. That + * could lead to corruption.) + */ + error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file"); + if (error) { + printf("hfs_reclaim_file: Error %d from vnode_waitforwrites\n", error); + return error; + } + + if (hfs_resize_debug) { + printf("hfs_reclaim_file: Start relocating %sfork for fileid=%u name=%.*s\n", (forktype ? "rsrc" : "data"), fileID, cp->c_desc.cd_namelen, cp->c_desc.cd_nameptr); + } + + /* We always need the allocation bitmap and extents B-tree */ + locks |= SFL_BITMAP | SFL_EXTENTS; + + error = hfs_start_transaction(hfsmp); + if (error) { + printf("hfs_reclaim_file: hfs_start_transaction returned %d\n", error); + if (took_truncate_lock) { + hfs_unlock_truncate(cp, TRUE); + } + return error; + } + lockflags = hfs_systemfile_lock(hfsmp, locks, HFS_EXCLUSIVE_LOCK); + fp = VTOF(vp); + datablks = 0; + *blks_moved = 0; + + /* Relocate non-overflow extents */ + for (i = 0; i < kHFSPlusExtentDensity; ++i) { + if (fp->ff_extents[i].blockCount == 0) + break; + oldStartBlock = fp->ff_extents[i].startBlock; + oldBlockCount = fp->ff_extents[i].blockCount; + datablks += oldBlockCount; + end_block = oldStartBlock + oldBlockCount; + /* Check if the file overlaps the target space */ + if (end_block > startblk) { + alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS; + if (is_sysfile) { + alloc_flags |= HFS_ALLOC_METAZONE; + } + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount); + if (error) { + if (!is_sysfile && ((error == dskFulErr) || (error == ENOSPC))) { + /* Try allocating again using the metadata zone */ + alloc_flags |= HFS_ALLOC_METAZONE; + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount); + } + if (error) { + printf("hfs_reclaim_file: BlockAllocate(metazone) (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount); + goto fail; + } else { + if (hfs_resize_debug) { + printf("hfs_reclaim_file: BlockAllocate(metazone) success for fileID=%u %u:(%u,%u)\n", fileID, i, newStartBlock, newBlockCount); + } + } + } + + /* Copy data from old location to new location */ + error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, newBlockCount, context); + if (error) { + printf("hfs_reclaim_file: hfs_copy_extent error=%d for fileID=%u %u:(%u,%u) to %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount); + if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS)) { + hfs_mark_volume_inconsistent(hfsmp); + } + goto fail; + } + fp->ff_extents[i].startBlock = newStartBlock; + cp->c_flag |= C_MODIFIED; + *blks_moved += newBlockCount; + + /* Deallocate the old extent */ + error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS); + if (error) { + printf("hfs_reclaim_file: BlockDeallocate returned %d\n", error); + hfs_mark_volume_inconsistent(hfsmp); + goto fail; + } + + /* If this is a system file, sync the volume header on disk */ + if (is_sysfile) { + error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + if (error) { + printf("hfs_reclaim_file: hfs_flushvolumeheader returned %d\n", error); + hfs_mark_volume_inconsistent(hfsmp); + goto fail; + } + } + + if (hfs_resize_debug) { + printf ("hfs_reclaim_file: Relocated %u:(%u,%u) to %u:(%u,%u)\n", i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount); + } + } + } + + /* Relocate overflow extents (if any) */ + if (i == kHFSPlusExtentDensity && fp->ff_blocks > datablks) { + struct FSBufferDescriptor btdata; + HFSPlusExtentRecord record; + HFSPlusExtentKey *key; + FCB *fcb; + int overflow_count = 0; + + if (kmem_alloc(kernel_map, (vm_offset_t*) &iterator, sizeof(*iterator))) { + printf("hfs_reclaim_file: kmem_alloc failed!\n"); + error = ENOMEM; + goto fail; + } + + bzero(iterator, sizeof(*iterator)); + key = (HFSPlusExtentKey *) &iterator->key; + key->keyLength = kHFSPlusExtentKeyMaximumLength; + key->forkType = forktype; + key->fileID = fileID; + key->startBlock = datablks; + + btdata.bufferAddress = &record; + btdata.itemSize = sizeof(record); + btdata.itemCount = 1; + + fcb = VTOF(hfsmp->hfs_extents_vp); + + error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator); + while (error == 0) { + /* Stop when we encounter a different file or fork. */ + if ((key->fileID != fileID) || + (key->forkType != forktype)) { + break; + } + + /* Just track the overflow extent record number for debugging... */ + if (hfs_resize_debug) { + overflow_count++; + } + + /* + * Check if the file overlaps target space. + */ + for (i = 0; i < kHFSPlusExtentDensity; ++i) { + if (record[i].blockCount == 0) { + goto fail; + } + oldStartBlock = record[i].startBlock; + oldBlockCount = record[i].blockCount; + end_block = oldStartBlock + oldBlockCount; + if (end_block > startblk) { + alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS; + if (is_sysfile) { + alloc_flags |= HFS_ALLOC_METAZONE; + } + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount); + if (error) { + if (!is_sysfile && ((error == dskFulErr) || (error == ENOSPC))) { + /* Try allocating again using the metadata zone */ + alloc_flags |= HFS_ALLOC_METAZONE; + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount); + } + if (error) { + printf("hfs_reclaim_file: BlockAllocate(metazone) (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount); + goto fail; + } else { + if (hfs_resize_debug) { + printf("hfs_reclaim_file: BlockAllocate(metazone) success for fileID=%u %u:(%u,%u)\n", fileID, i, newStartBlock, newBlockCount); + } + } + } + error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, newBlockCount, context); + if (error) { + printf("hfs_reclaim_file: hfs_copy_extent error=%d for fileID=%u (%u,%u) to (%u,%u)\n", error, fileID, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount); + if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS)) { + hfs_mark_volume_inconsistent(hfsmp); + } + goto fail; + } + record[i].startBlock = newStartBlock; + cp->c_flag |= C_MODIFIED; + *blks_moved += newBlockCount; + + /* + * NOTE: To support relocating overflow extents of the + * allocation file, we must update the BTree record BEFORE + * deallocating the old extent so that BlockDeallocate will + * use the extent's new location to calculate physical block + * numbers. (This is for the case where the old extent's + * bitmap bits actually reside in the extent being moved.) + */ + error = BTUpdateRecord(fcb, iterator, (IterateCallBackProcPtr) hfs_relocate_callback, &record); + if (error) { + printf("hfs_reclaim_file: BTUpdateRecord returned %d\n", error); + hfs_mark_volume_inconsistent(hfsmp); + goto fail; + } + error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS); + if (error) { + printf("hfs_reclaim_file: BlockDeallocate returned %d\n", error); + hfs_mark_volume_inconsistent(hfsmp); + goto fail; + } + if (hfs_resize_debug) { + printf ("hfs_reclaim_file: Relocated overflow#%d %u:(%u,%u) to %u:(%u,%u)\n", overflow_count, i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount); + } + } + } + /* Look for more records. */ + error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); + if (error == btNotFound) { + error = 0; + break; + } + } + } + +fail: + if (iterator) { + kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); + } + + (void) hfs_systemfile_unlock(hfsmp, lockflags); + + if ((*blks_moved != 0) && (is_sysfile == false)) { + (void) hfs_update(vp, MNT_WAIT); + } + + (void) hfs_end_transaction(hfsmp); + + if (took_truncate_lock) { + hfs_unlock_truncate(cp, TRUE); + } + + if (hfs_resize_debug) { + printf("hfs_reclaim_file: Finished relocating %sfork for fileid=%u (error=%d)\n", (forktype ? "rsrc" : "data"), fileID, error); + } + + return error; +} + + +/* + * This journal_relocate callback updates the journal info block to point + * at the new journal location. This write must NOT be done using the + * transaction. We must write the block immediately. We must also force + * it to get to the media so that the new journal location will be seen by + * the replay code before we can safely let journaled blocks be written + * to their normal locations. + * + * The tests for journal_uses_fua below are mildly hacky. Since the journal + * and the file system are both on the same device, I'm leveraging what + * the journal has decided about FUA. + */ +struct hfs_journal_relocate_args { + struct hfsmount *hfsmp; + vfs_context_t context; + u_int32_t newStartBlock; +}; + +static errno_t +hfs_journal_relocate_callback(void *_args) +{ + int error; + struct hfs_journal_relocate_args *args = _args; + struct hfsmount *hfsmp = args->hfsmp; + buf_t bp; + JournalInfoBlock *jibp; + + error = buf_meta_bread(hfsmp->hfs_devvp, + hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), + hfsmp->blockSize, vfs_context_ucred(args->context), &bp); + if (error) { + printf("hfs_reclaim_journal_file: failed to read JIB (%d)\n", error); + return error; + } + jibp = (JournalInfoBlock*) buf_dataptr(bp); + jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize); + jibp->size = SWAP_BE64(hfsmp->jnl_size); + if (journal_uses_fua(hfsmp->jnl)) + buf_markfua(bp); + error = buf_bwrite(bp); + if (error) { + printf("hfs_reclaim_journal_file: failed to write JIB (%d)\n", error); + return error; + } + if (!journal_uses_fua(hfsmp->jnl)) { + error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context); + if (error) { + printf("hfs_reclaim_journal_file: DKIOCSYNCHRONIZECACHE failed (%d)\n", error); + error = 0; /* Don't fail the operation. */ + } + } + + return error; +} + + +static int +hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context) +{ + int error; + int lockflags; + u_int32_t oldStartBlock; + u_int32_t newStartBlock; + u_int32_t oldBlockCount; + u_int32_t newBlockCount; + struct cat_desc journal_desc; + struct cat_attr journal_attr; + struct cat_fork journal_fork; + struct hfs_journal_relocate_args callback_args; + + error = hfs_start_transaction(hfsmp); + if (error) { + printf("hfs_reclaim_journal_file: hfs_start_transaction returned %d\n", error); + return error; + } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + + oldBlockCount = hfsmp->jnl_size / hfsmp->blockSize; + + /* TODO: Allow the journal to change size based on the new volume size. */ + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, + HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS, + &newStartBlock, &newBlockCount); + if (error) { + printf("hfs_reclaim_journal_file: BlockAllocate returned %d\n", error); + goto fail; + } + if (newBlockCount != oldBlockCount) { + printf("hfs_reclaim_journal_file: newBlockCount != oldBlockCount (%u, %u)\n", newBlockCount, oldBlockCount); + goto free_fail; + } + + error = BlockDeallocate(hfsmp, hfsmp->jnl_start, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS); + if (error) { + printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error); + goto free_fail; + } + + /* Update the catalog record for .journal */ + error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, &journal_desc, &journal_attr, &journal_fork); + if (error) { + printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error); + goto free_fail; + } + oldStartBlock = journal_fork.cf_extents[0].startBlock; + journal_fork.cf_size = newBlockCount * hfsmp->blockSize; + journal_fork.cf_extents[0].startBlock = newStartBlock; + journal_fork.cf_extents[0].blockCount = newBlockCount; + journal_fork.cf_blocks = newBlockCount; + error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL); + cat_releasedesc(&journal_desc); /* all done with cat descriptor */ + if (error) { + printf("hfs_reclaim_journal_file: cat_update returned %d\n", error); + goto free_fail; + } + callback_args.hfsmp = hfsmp; + callback_args.context = context; + callback_args.newStartBlock = newStartBlock; + + error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize, + (off_t)newBlockCount*hfsmp->blockSize, 0, + hfs_journal_relocate_callback, &callback_args); + if (error) { + /* NOTE: journal_relocate will mark the journal invalid. */ + printf("hfs_reclaim_journal_file: journal_relocate returned %d\n", error); + goto fail; + } + hfsmp->jnl_start = newStartBlock; + hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize; + + hfs_systemfile_unlock(hfsmp, lockflags); + error = hfs_end_transaction(hfsmp); + if (error) { + printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error); + } + + if (!error && hfs_resize_debug) { + printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount); + } + return error; + +free_fail: + (void) BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS); +fail: + hfs_systemfile_unlock(hfsmp, lockflags); + (void) hfs_end_transaction(hfsmp); + if (hfs_resize_debug) { + printf ("hfs_reclaim_journal_file: Error relocating journal file (error=%d)\n", error); + } + return error; +} + + +/* + * Move the journal info block to a new location. We have to make sure the + * new copy of the journal info block gets to the media first, then change + * the field in the volume header and the catalog record. + */ +static int +hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context) +{ + int error; + int lockflags; + u_int32_t oldBlock; + u_int32_t newBlock; + u_int32_t blockCount; + struct cat_desc jib_desc; + struct cat_attr jib_attr; + struct cat_fork jib_fork; + buf_t old_bp, new_bp; + + error = hfs_start_transaction(hfsmp); + if (error) { + printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error); + return error; + } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + + error = BlockAllocate(hfsmp, 1, 1, 1, + HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS, + &newBlock, &blockCount); + if (error) { + printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error); + goto fail; + } + if (blockCount != 1) { + printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount); + goto free_fail; + } + error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS); + if (error) { + printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error); + goto free_fail; + } + + /* Copy the old journal info block content to the new location */ + error = buf_meta_bread(hfsmp->hfs_devvp, + hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), + hfsmp->blockSize, vfs_context_ucred(context), &old_bp); + if (error) { + printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error); + goto free_fail; + } + new_bp = buf_getblk(hfsmp->hfs_devvp, + newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size), + hfsmp->blockSize, 0, 0, BLK_META); + bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize); + buf_brelse(old_bp); + if (journal_uses_fua(hfsmp->jnl)) + buf_markfua(new_bp); + error = buf_bwrite(new_bp); + if (error) { + printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error); + goto free_fail; + } + if (!journal_uses_fua(hfsmp->jnl)) { + error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); + if (error) { + printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error); + /* Don't fail the operation. */ + } + } + + /* Update the catalog record for .journal_info_block */ + error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, &jib_desc, &jib_attr, &jib_fork); + if (error) { + printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error); + goto fail; + } + oldBlock = jib_fork.cf_extents[0].startBlock; + jib_fork.cf_size = hfsmp->blockSize; + jib_fork.cf_extents[0].startBlock = newBlock; + jib_fork.cf_extents[0].blockCount = 1; + jib_fork.cf_blocks = 1; + error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL); + cat_releasedesc(&jib_desc); /* all done with cat descriptor */ + if (error) { + printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error); + goto fail; + } + + /* Update the pointer to the journal info block in the volume header. */ + hfsmp->vcbJinfoBlock = newBlock; + error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + if (error) { + printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error); + goto fail; + } + hfs_systemfile_unlock(hfsmp, lockflags); + error = hfs_end_transaction(hfsmp); + if (error) { + printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error); + } + error = hfs_journal_flush(hfsmp); + if (error) { + printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error); + } + + if (!error && hfs_resize_debug) { + printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount); + } + return error; + +free_fail: + (void) BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS); +fail: + hfs_systemfile_unlock(hfsmp, lockflags); + (void) hfs_end_transaction(hfsmp); + if (hfs_resize_debug) { + printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error); + } + return error; +} + + /* * Reclaim space at the end of a file system. + * + * Inputs - + * startblk - start block of the space being reclaimed + * reclaimblks - number of allocation blocks to reclaim */ static int -hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk) +hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context) { struct vnode *vp = NULL; FCB *fcb; @@ -3171,32 +4852,90 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk) u_int32_t saved_next_allocation; cnid_t * cnidbufp; size_t cnidbufsize; - int filecnt; + int filecnt = 0; int maxfilecnt; - u_long block; + u_int32_t block; int lockflags; - int i; + int i, j; int error; + int lastprogress = 0; + u_int32_t blks_moved = 0; + u_int32_t total_blks_moved = 0; + Boolean need_relocate; - /* - * Check if Attributes file overlaps. + /* Relocate extents of the Allocation file if they're in the way. */ + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, startblk, SFL_BITMAP, &blks_moved, context); + if (error) { + printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error); + return error; + } + total_blks_moved += blks_moved; + + /* Relocate extents of the Extents B-tree if they're in the way. */ + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, startblk, SFL_EXTENTS, &blks_moved, context); + if (error) { + printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error); + return error; + } + total_blks_moved += blks_moved; + + /* Relocate extents of the Catalog B-tree if they're in the way. */ + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, startblk, SFL_CATALOG, &blks_moved, context); + if (error) { + printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error); + return error; + } + total_blks_moved += blks_moved; + + /* Relocate extents of the Attributes B-tree if they're in the way. */ + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, startblk, SFL_ATTRIBUTE, &blks_moved, context); + if (error) { + printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error); + return error; + } + total_blks_moved += blks_moved; + + /* Relocate extents of the Startup File if there is one and they're in the way. */ + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, startblk, SFL_STARTUP, &blks_moved, context); + if (error) { + printf("hfs_reclaimspace: reclaim startup file returned %d\n", error); + return error; + } + total_blks_moved += blks_moved; + + /* + * We need to make sure the alternate volume header gets flushed if we moved + * any extents in the volume header. But we need to do that before + * shrinking the size of the volume, or else the journal code will panic + * with an invalid (too large) block number. + * + * Note that total_blks_moved will be set if ANY extent was moved, even + * if it was just an overflow extent. In this case, the journal_flush isn't + * strictly required, but shouldn't hurt. */ - if (hfsmp->hfs_attribute_vp) { - struct filefork *fp; + if (total_blks_moved) { + hfs_journal_flush(hfsmp); + } + + if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) > startblk) { + error = hfs_reclaim_journal_file(hfsmp, context); + if (error) { + printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error); + return error; + } + } - fp = VTOF(hfsmp->hfs_attribute_vp); - for (i = 0; i < kHFSPlusExtentDensity; ++i) { - block = fp->ff_extents[i].startBlock + - fp->ff_extents[i].blockCount; - if (block >= startblk) { - printf("hfs_reclaimspace: Attributes file can't move\n"); - return (EPERM); - } + if (hfsmp->vcbJinfoBlock >= startblk) { + error = hfs_reclaim_journal_info_block(hfsmp, context); + if (error) { + printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error); + return error; } } - - /* For now we'll move a maximum of 16,384 files. */ - maxfilecnt = MIN(hfsmp->hfs_filecount, 16384); + + /* For now move a maximum of 250,000 files. */ + maxfilecnt = MIN(hfsmp->hfs_filecount, 250000); + maxfilecnt = MIN((u_int32_t)maxfilecnt, reclaimblks); cnidbufsize = maxfilecnt * sizeof(cnid_t); if (kmem_alloc(kernel_map, (vm_offset_t *)&cnidbufp, cnidbufsize)) { return (ENOMEM); @@ -3207,7 +4946,8 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk) } saved_next_allocation = hfsmp->nextAllocation; - hfsmp->nextAllocation = hfsmp->hfs_metazone_start; + /* Always try allocating new blocks after the metadata zone */ + HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_start); fcb = VTOF(hfsmp->hfs_catalog_vp); bzero(iterator, sizeof(*iterator)); @@ -3216,132 +4956,328 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk) btdata.itemSize = sizeof(filerec); btdata.itemCount = 1; - /* Keep the Catalog file locked during iteration. */ - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + /* Keep the Catalog and extents files locked during iteration. */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_SHARED_LOCK); + error = BTIterateRecord(fcb, kBTreeFirstRecord, iterator, NULL, NULL); if (error) { - hfs_systemfile_unlock(hfsmp, lockflags); - goto out; + goto end_iteration; } - /* * Iterate over all the catalog records looking for files - * that overlap into the space we're trying to free up. + * that overlap into the space we're trying to free up and + * the total number of blocks that will require relocation. */ for (filecnt = 0; filecnt < maxfilecnt; ) { error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); if (error) { - if (error == btNotFound) - error = 0; + if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) { + error = 0; + } break; } - if (filerec.recordType != kHFSPlusFileRecord || - filerec.fileID == hfsmp->hfs_jnlfileid) + if (filerec.recordType != kHFSPlusFileRecord) { continue; - /* - * Check if either fork overlaps target space. - */ + } + + need_relocate = false; + /* Check if data fork overlaps the target space */ for (i = 0; i < kHFSPlusExtentDensity; ++i) { + if (filerec.dataFork.extents[i].blockCount == 0) { + break; + } block = filerec.dataFork.extents[i].startBlock + - filerec.dataFork.extents[i].blockCount; + filerec.dataFork.extents[i].blockCount; if (block >= startblk) { - if (filerec.fileID == hfsmp->hfs_jnlfileid) { + if ((filerec.fileID == hfsmp->hfs_jnlfileid) || + (filerec.fileID == hfsmp->hfs_jnlinfoblkid)) { printf("hfs_reclaimspace: cannot move active journal\n"); error = EPERM; - break; + goto end_iteration; } - cnidbufp[filecnt++] = filerec.fileID; + need_relocate = true; + goto save_fileid; + } + } + + /* Check if resource fork overlaps the target space */ + for (j = 0; j < kHFSPlusExtentDensity; ++j) { + if (filerec.resourceFork.extents[j].blockCount == 0) { break; } - block = filerec.resourceFork.extents[i].startBlock + - filerec.resourceFork.extents[i].blockCount; + block = filerec.resourceFork.extents[j].startBlock + + filerec.resourceFork.extents[j].blockCount; if (block >= startblk) { - cnidbufp[filecnt++] = filerec.fileID; - break; + need_relocate = true; + goto save_fileid; + } + } + + /* Check if any forks' overflow extents overlap the target space */ + if ((i == kHFSPlusExtentDensity) || (j == kHFSPlusExtentDensity)) { + if (hfs_overlapped_overflow_extents(hfsmp, startblk, filerec.fileID)) { + need_relocate = true; + goto save_fileid; + } + } + +save_fileid: + if (need_relocate == true) { + cnidbufp[filecnt++] = filerec.fileID; + if (hfs_resize_debug) { + printf ("hfs_reclaimspace: Will relocate extents for fileID=%u\n", filerec.fileID); } } } + +end_iteration: + /* If no regular file was found to be relocated and + * no system file was moved, we probably do not have + * enough space to relocate the system files, or + * something else went wrong. + */ + if ((filecnt == 0) && (total_blks_moved == 0)) { + printf("hfs_reclaimspace: no files moved\n"); + error = ENOSPC; + } /* All done with catalog. */ hfs_systemfile_unlock(hfsmp, lockflags); - if (error) + if (error || filecnt == 0) goto out; + hfsmp->hfs_resize_filesmoved = 0; + hfsmp->hfs_resize_totalfiles = filecnt; + /* Now move any files that are in the way. */ for (i = 0; i < filecnt; ++i) { - struct vnode * rvp; + struct vnode *rvp; + struct cnode *cp; + struct filefork *datafork; if (hfs_vget(hfsmp, cnidbufp[i], &vp, 0) != 0) continue; + + cp = VTOC(vp); + datafork = VTOF(vp); - /* Relocate any data fork blocks. */ - if (VTOF(vp)->ff_blocks > 0) { - error = hfs_relocate(vp, hfsmp->hfs_metazone_end + 1, kauth_cred_get(), current_proc()); + /* Relocating directory hard links is not supported, so we punt (see radar 6217026). */ + if ((cp->c_flag & C_HARDLINK) && vnode_isdir(vp)) { + printf("hfs_reclaimspace: Unable to relocate directory hard link id=%d\n", cp->c_cnid); + error = EINVAL; + goto out; + } + + /* Relocate any overlapping data fork blocks. */ + if (datafork && datafork->ff_blocks > 0) { + error = hfs_reclaim_file(hfsmp, vp, startblk, 0, &blks_moved, context); + if (error) { + printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", cnidbufp[i], error); + break; + } + total_blks_moved += blks_moved; } - hfs_unlock(VTOC(vp)); - if (error) - break; - /* Relocate any resource fork blocks. */ - if ((VTOC((vp))->c_blocks - VTOF((vp))->ff_blocks) > 0) { - error = hfs_vgetrsrc(hfsmp, vp, &rvp, current_proc()); - if (error) + /* Relocate any overlapping resource fork blocks. */ + if ((cp->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) { + error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, TRUE); + if (error) { + printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", cnidbufp[i], error); break; - hfs_lock(VTOC(rvp), HFS_EXCLUSIVE_LOCK); - error = hfs_relocate(rvp, hfsmp->hfs_metazone_end + 1, kauth_cred_get(), current_proc()); - hfs_unlock(VTOC(rvp)); - vnode_put(rvp); - if (error) + } + error = hfs_reclaim_file(hfsmp, rvp, startblk, 0, &blks_moved, context); + VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT; + if (error) { + printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", cnidbufp[i], error); break; + } + total_blks_moved += blks_moved; } + hfs_unlock(cp); vnode_put(vp); vp = NULL; + + ++hfsmp->hfs_resize_filesmoved; + + /* Report intermediate progress. */ + if (filecnt > 100) { + int progress; + + progress = (i * 100) / filecnt; + if (progress > (lastprogress + 9)) { + printf("hfs_reclaimspace: %d%% done...\n", progress); + lastprogress = progress; + } + } } if (vp) { + hfs_unlock(VTOC(vp)); vnode_put(vp); vp = NULL; } + if (hfsmp->hfs_resize_filesmoved != 0) { + printf("hfs_reclaimspace: relocated %u blocks from %d files on \"%s\"\n", + total_blks_moved, (int)hfsmp->hfs_resize_filesmoved, hfsmp->vcbVN); + } +out: + kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); + kmem_free(kernel_map, (vm_offset_t)cnidbufp, cnidbufsize); /* - * Note: this implementation doesn't handle overflow extents. + * Restore the roving allocation pointer on errors. + * (but only if we didn't move any files) + */ + if (error && hfsmp->hfs_resize_filesmoved == 0) { + HFS_UPDATE_NEXT_ALLOCATION(hfsmp, saved_next_allocation); + } + return (error); +} + + +/* + * Check if there are any overflow data or resource fork extents that overlap + * into the disk space that is being reclaimed. + * + * Output - + * 1 - One of the overflow extents need to be relocated + * 0 - No overflow extents need to be relocated, or there was an error + */ +static int +hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t fileID) +{ + struct BTreeIterator * iterator = NULL; + struct FSBufferDescriptor btdata; + HFSPlusExtentRecord extrec; + HFSPlusExtentKey *extkeyptr; + FCB *fcb; + int overlapped = 0; + int i; + int error; + + if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { + return 0; + } + bzero(iterator, sizeof(*iterator)); + extkeyptr = (HFSPlusExtentKey *)&iterator->key; + extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength; + extkeyptr->forkType = 0; + extkeyptr->fileID = fileID; + extkeyptr->startBlock = 0; + + btdata.bufferAddress = &extrec; + btdata.itemSize = sizeof(extrec); + btdata.itemCount = 1; + + fcb = VTOF(hfsmp->hfs_extents_vp); + + /* This will position the iterator just before the first overflow + * extent record for given fileID. It will always return btNotFound, + * so we special case the error code. */ + error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator); + if (error && (error != btNotFound)) { + goto out; + } + + /* BTIterateRecord() might return error if the btree is empty, and + * therefore we return that the extent does not overflow to the caller + */ + error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); + while (error == 0) { + /* Stop when we encounter a different file. */ + if (extkeyptr->fileID != fileID) { + break; + } + /* Check if any of the forks exist in the target space. */ + for (i = 0; i < kHFSPlusExtentDensity; ++i) { + if (extrec[i].blockCount == 0) { + break; + } + if ((extrec[i].startBlock + extrec[i].blockCount) >= startblk) { + overlapped = 1; + goto out; + } + } + /* Look for more records. */ + error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); + } + out: kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); - kmem_free(kernel_map, (vm_offset_t)cnidbufp, cnidbufsize); + return overlapped; +} - /* On errors restore the roving allocation pointer. */ - if (error) { - hfsmp->nextAllocation = saved_next_allocation; + +/* + * Calculate the progress of a file system resize operation. + */ +__private_extern__ +int +hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress) +{ + if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) { + return (ENXIO); } - return (error); + + if (hfsmp->hfs_resize_totalfiles > 0) + *progress = (hfsmp->hfs_resize_filesmoved * 100) / hfsmp->hfs_resize_totalfiles; + else + *progress = 0; + + return (0); } +/* + * Creates a UUID from a unique "name" in the HFS UUID Name space. + * See version 3 UUID. + */ +static void +hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result) +{ + MD5_CTX md5c; + uint8_t rawUUID[8]; + + ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6]; + ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7]; + + MD5Init( &md5c ); + MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) ); + MD5Update( &md5c, rawUUID, sizeof (rawUUID) ); + MD5Final( result, &md5c ); + + result[6] = 0x30 | ( result[6] & 0x0F ); + result[8] = 0x80 | ( result[8] & 0x3F ); +} + /* * Get file system attributes. */ static int hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context) { +#define HFS_ATTR_CMN_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST)) +#define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST)) + ExtendedVCB *vcb = VFSTOVCB(mp); struct hfsmount *hfsmp = VFSTOHFS(mp); - u_long freeCNIDs; - - freeCNIDs = (u_long)0xFFFFFFFF - (u_long)hfsmp->vcbNxtCNID; - - VFSATTR_RETURN(fsap, f_objcount, (uint64_t)hfsmp->vcbFilCnt + (uint64_t)hfsmp->vcbDirCnt); - VFSATTR_RETURN(fsap, f_filecount, (uint64_t)hfsmp->vcbFilCnt); - VFSATTR_RETURN(fsap, f_dircount, (uint64_t)hfsmp->vcbDirCnt); - VFSATTR_RETURN(fsap, f_maxobjcount, (uint64_t)0xFFFFFFFF); - VFSATTR_RETURN(fsap, f_iosize, (size_t)(MAX_UPL_TRANSFER * PAGE_SIZE)); - VFSATTR_RETURN(fsap, f_blocks, (uint64_t)hfsmp->totalBlocks); - VFSATTR_RETURN(fsap, f_bfree, (uint64_t)hfs_freeblks(hfsmp, 0)); - VFSATTR_RETURN(fsap, f_bavail, (uint64_t)hfs_freeblks(hfsmp, 1)); - VFSATTR_RETURN(fsap, f_bsize, (uint32_t)vcb->blockSize); + u_int32_t freeCNIDs; + + freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID; + + VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt); + VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt); + VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt); + VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF); + VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0)); + VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks); + VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0)); + VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1)); + VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize); /* XXX needs clarification */ VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1)); /* Maximum files is constrained by total blocks. */ - VFSATTR_RETURN(fsap, f_files, (uint64_t)(hfsmp->totalBlocks - 2)); - VFSATTR_RETURN(fsap, f_ffree, MIN((uint64_t)freeCNIDs, (uint64_t)hfs_freeblks(hfsmp, 1))); + VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2)); + VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1))); fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev; fsap->f_fsid.val[1] = vfs_typenum(mp); @@ -3359,18 +5295,28 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t cap->capabilities[VOL_CAPABILITIES_FORMAT] = VOL_CAP_FMT_PERSISTENTOBJECTIDS | VOL_CAP_FMT_CASE_PRESERVING | - VOL_CAP_FMT_FAST_STATFS; + VOL_CAP_FMT_FAST_STATFS | + VOL_CAP_FMT_HIDDEN_FILES | + VOL_CAP_FMT_PATH_FROM_ID; } else { cap->capabilities[VOL_CAPABILITIES_FORMAT] = VOL_CAP_FMT_PERSISTENTOBJECTIDS | VOL_CAP_FMT_SYMBOLICLINKS | VOL_CAP_FMT_HARDLINKS | VOL_CAP_FMT_JOURNAL | + VOL_CAP_FMT_ZERO_RUNS | (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) | (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) | VOL_CAP_FMT_CASE_PRESERVING | VOL_CAP_FMT_FAST_STATFS | - VOL_CAP_FMT_2TB_FILESIZE; + VOL_CAP_FMT_2TB_FILESIZE | + VOL_CAP_FMT_HIDDEN_FILES | +#if HFS_COMPRESSION + VOL_CAP_FMT_PATH_FROM_ID | + VOL_CAP_FMT_DECMPFS_COMPRESSION; +#else + VOL_CAP_FMT_PATH_FROM_ID; +#endif } cap->capabilities[VOL_CAPABILITIES_INTERFACES] = VOL_CAP_INT_SEARCHFS | @@ -3381,7 +5327,13 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t VOL_CAP_INT_ALLOCATE | VOL_CAP_INT_VOL_RENAME | VOL_CAP_INT_ADVLOCK | - VOL_CAP_INT_FLOCK; + VOL_CAP_INT_FLOCK | +#if NAMEDSTREAMS + VOL_CAP_INT_EXTENDED_ATTR | + VOL_CAP_INT_NAMEDSTREAMS; +#else + VOL_CAP_INT_EXTENDED_ATTR; +#endif cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0; cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0; @@ -3397,7 +5349,15 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t VOL_CAP_FMT_CASE_SENSITIVE | VOL_CAP_FMT_CASE_PRESERVING | VOL_CAP_FMT_FAST_STATFS | - VOL_CAP_FMT_2TB_FILESIZE; + VOL_CAP_FMT_2TB_FILESIZE | + VOL_CAP_FMT_OPENDENYMODES | + VOL_CAP_FMT_HIDDEN_FILES | +#if HFS_COMPRESSION + VOL_CAP_FMT_PATH_FROM_ID | + VOL_CAP_FMT_DECMPFS_COMPRESSION; +#else + VOL_CAP_FMT_PATH_FROM_ID; +#endif cap->valid[VOL_CAPABILITIES_INTERFACES] = VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_ATTRLIST | @@ -3408,7 +5368,14 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t VOL_CAP_INT_ALLOCATE | VOL_CAP_INT_VOL_RENAME | VOL_CAP_INT_ADVLOCK | - VOL_CAP_INT_FLOCK; + VOL_CAP_INT_FLOCK | + VOL_CAP_INT_MANLOCK | +#if NAMEDSTREAMS + VOL_CAP_INT_EXTENDED_ATTR | + VOL_CAP_INT_NAMEDSTREAMS; +#else + VOL_CAP_INT_EXTENDED_ATTR; +#endif cap->valid[VOL_CAPABILITIES_RESERVED1] = 0; cap->valid[VOL_CAPABILITIES_RESERVED2] = 0; VFSATTR_SET_SUPPORTED(fsap, f_capabilities); @@ -3416,16 +5383,16 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) { vol_attributes_attr_t *attrp = &fsap->f_attributes; - attrp->validattr.commonattr = ATTR_CMN_VALIDMASK; + attrp->validattr.commonattr = HFS_ATTR_CMN_VALIDMASK; attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO; attrp->validattr.dirattr = ATTR_DIR_VALIDMASK; - attrp->validattr.fileattr = ATTR_FILE_VALIDMASK; + attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK; attrp->validattr.forkattr = 0; - attrp->nativeattr.commonattr = ATTR_CMN_VALIDMASK; + attrp->nativeattr.commonattr = HFS_ATTR_CMN_VALIDMASK; attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO; attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK; - attrp->nativeattr.fileattr = ATTR_FILE_VALIDMASK; + attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK; attrp->nativeattr.forkattr = 0; VFSATTR_SET_SUPPORTED(fsap, f_attributes); } @@ -3440,7 +5407,7 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t fsap->f_backup_time.tv_nsec = 0; VFSATTR_SET_SUPPORTED(fsap, f_backup_time); if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) { - uint16_t subtype = 0; + u_int16_t subtype = 0; /* * Subtypes (flavors) for HFS @@ -3465,10 +5432,13 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t } if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) { - strncpy(fsap->f_vol_name, hfsmp->vcbVN, MAXPATHLEN); - fsap->f_vol_name[MAXPATHLEN - 1] = 0; + strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN); VFSATTR_SET_SUPPORTED(fsap, f_vol_name); } + if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) { + hfs_getvoluuid(hfsmp, fsap->f_uuid); + VFSATTR_SET_SUPPORTED(fsap, f_uuid); + } return (0); } @@ -3503,7 +5473,7 @@ hfs_rename_volume(struct vnode *vp, const char *name, proc_t p) todir_desc.cd_cnid = kHFSRootFolderID; todir_desc.cd_flags = CD_ISDIR; - to_desc.cd_nameptr = name; + to_desc.cd_nameptr = (const u_int8_t *)name; to_desc.cd_namelen = strlen(name); to_desc.cd_parentcnid = kHFSRootParentID; to_desc.cd_cnid = cp->c_cnid; @@ -3520,15 +5490,14 @@ hfs_rename_volume(struct vnode *vp, const char *name, proc_t p) * If successful, update the name in the VCB, ensure it's terminated. */ if (!error) { - strncpy(vcb->vcbVN, name, sizeof(vcb->vcbVN)); - vcb->vcbVN[sizeof(vcb->vcbVN) - 1] = 0; + strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN)); } hfs_systemfile_unlock(hfsmp, lockflags); cat_postflight(hfsmp, &cookie, p); if (error) - vcb->vcbFlags |= 0xFF00; + MarkVCBDirty(vcb); (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); } hfs_end_transaction(hfsmp); @@ -3536,12 +5505,12 @@ hfs_rename_volume(struct vnode *vp, const char *name, proc_t p) if (!error) { /* Release old allocated name buffer */ if (cp->c_desc.cd_flags & CD_HASBUF) { - char *name = cp->c_desc.cd_nameptr; + const char *tmp_name = (const char *)cp->c_desc.cd_nameptr; cp->c_desc.cd_nameptr = 0; cp->c_desc.cd_namelen = 0; cp->c_desc.cd_flags &= ~CD_HASBUF; - vfs_removename(name); + vfs_removename(tmp_name); } /* Update cnode's catalog descriptor */ replace_desc(cp, &new_desc); @@ -3589,6 +5558,73 @@ out: return error; } +/* If a runtime corruption is detected, set the volume inconsistent + * bit in the volume attributes. The volume inconsistent bit is a persistent + * bit which represents that the volume is corrupt and needs repair. + * The volume inconsistent bit can be set from the kernel when it detects + * runtime corruption or from file system repair utilities like fsck_hfs when + * a repair operation fails. The bit should be cleared only from file system + * verify/repair utility like fsck_hfs when a verify/repair succeeds. + */ +void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp) +{ + HFS_MOUNT_LOCK(hfsmp, TRUE); + if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) { + hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask; + MarkVCBDirty(hfsmp); + } + if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) { + /* Log information to ASL log */ + fslog_fs_corrupt(hfsmp->hfs_mp); + printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN); + } + HFS_MOUNT_UNLOCK(hfsmp, TRUE); +} + +/* Replay the journal on the device node provided. Returns zero if + * journal replay succeeded or no journal was supposed to be replayed. + */ +static int hfs_journal_replay(vnode_t devvp, vfs_context_t context) +{ + int retval = 0; + struct mount *mp = NULL; + struct hfs_mount_args *args = NULL; + + /* Replay allowed only on raw devices */ + if (!vnode_ischr(devvp)) { + retval = EINVAL; + goto out; + } + + /* Create dummy mount structures */ + MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK); + if (mp == NULL) { + retval = ENOMEM; + goto out; + } + bzero(mp, sizeof(struct mount)); + mount_lock_init(mp); + + MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK); + if (args == NULL) { + retval = ENOMEM; + goto out; + } + bzero(args, sizeof(struct hfs_mount_args)); + + retval = hfs_mountfs(devvp, mp, args, 1, context); + buf_flushdirtyblks(devvp, MNT_WAIT, 0, "hfs_journal_replay"); + +out: + if (mp) { + mount_lock_destroy(mp); + FREE(mp, M_TEMP); + } + if (args) { + FREE(args, M_TEMP); + } + return retval; +} /* * hfs vfs operations. @@ -3606,5 +5642,6 @@ struct vfsops hfs_vfsops = { hfs_vptofh, hfs_init, hfs_sysctl, - hfs_vfs_setattr + hfs_vfs_setattr, + {NULL} };