X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/c910b4d9d2451126ae3917b931cd4390c11e1d52..0b4c1975fb5e4eccf1012a35081f7e7799b81046:/bsd/hfs/hfs_vfsops.c diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c index b2e71a034..aaac6d0df 100644 --- a/bsd/hfs/hfs_vfsops.c +++ b/bsd/hfs/hfs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2008 Apple Inc. All rights reserved. + * Copyright (c) 1999-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -95,6 +95,9 @@ #include #include +#include +#include + #include "hfs.h" #include "hfs_catalog.h" #include "hfs_cnode.h" @@ -111,6 +114,8 @@ int hfs_dbg_all = 0; int hfs_dbg_err = 0; #endif +/* Enable/disable debugging code for live volume resizing */ +int hfs_resize_debug = 0; lck_grp_attr_t * hfs_group_attr; lck_attr_t * hfs_lock_attr; @@ -118,9 +123,10 @@ lck_grp_t * hfs_mutex_group; lck_grp_t * hfs_rwlock_group; extern struct vnodeopv_desc hfs_vnodeop_opv_desc; -/* not static so we can re-use in hfs_readwrite.c for build_path */ -int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); +extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc; +/* not static so we can re-use in hfs_readwrite.c for build_path calls */ +int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args); static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context); @@ -141,10 +147,9 @@ static int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlen static int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context); static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context); -static int hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vfs_context_t context); -static int hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, - u_int32_t catblks, u_int32_t fileID, int rsrcfork); -static int hfs_journal_replay(const char *devnode, vfs_context_t context); +static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context); +static int hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t fileID); +static int hfs_journal_replay(vnode_t devvp, vfs_context_t context); /* @@ -160,8 +165,6 @@ hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context) struct vfsstatfs *vfsp; int error; - hfs_chashinit_finish(); - if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) return (error); @@ -333,7 +336,12 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte } - /* Only clear HFS_READ_ONLY after a successfull write */ + /* See if we need to erase unused Catalog nodes due to . */ + retval = hfs_erase_unused_nodes(hfsmp); + if (retval != E_NONE) + goto out; + + /* Only clear HFS_READ_ONLY after a successful write */ hfsmp->hfs_flags &= ~HFS_READ_ONLY; /* If this mount point was downgraded from read-write @@ -360,7 +368,8 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte /* * Allow hot file clustering if conditions allow. */ - if (hfsmp->hfs_flags & HFS_METADATA_ZONE) { + if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && + ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0)) { (void) hfs_recording_init(hfsmp); } /* Force ACLs on HFS+ file systems. */ @@ -378,8 +387,6 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte /* Set the mount flag to indicate that we support volfs */ vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS)); - hfs_chashinit_finish(); - retval = hfs_mountfs(devvp, mp, &args, 0, context); } out: @@ -462,7 +469,7 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) ExtendedVCB *vcb; hfs_to_unicode_func_t get_unicode_func; unicode_to_hfs_func_t get_hfsname_func; - u_long old_encoding = 0; + u_int32_t old_encoding = 0; struct hfs_changefs_cargs cargs; u_int32_t mount_flags; @@ -527,7 +534,7 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) /* Change the hfs encoding value (hfs only) */ if ((vcb->vcbSigWord == kHFSSigWord) && - (args->hfs_encoding != (u_long)VNOVAL) && + (args->hfs_encoding != (u_int32_t)VNOVAL) && (hfsmp->hfs_encoding != args->hfs_encoding)) { retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func); @@ -828,6 +835,128 @@ hfs_reload(struct mount *mountp) } + +static void +hfs_syncer(void *arg0, void *unused) +{ +#pragma unused(unused) + + struct hfsmount *hfsmp = arg0; + clock_sec_t secs; + clock_usec_t usecs; + uint32_t delay = HFS_META_DELAY; + uint64_t now; + static int no_max=1; + + clock_get_calendar_microtime(&secs, &usecs); + now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + + // + // If the amount of pending writes is more than our limit, wait + // for 2/3 of it to drain and then flush the journal. + // + if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) { + int counter=0; + uint64_t pending_io, start, rate; + + no_max = 0; + + hfs_start_transaction(hfsmp); // so we hold off any new i/o's + + pending_io = hfsmp->hfs_mp->mnt_pending_write_size; + + clock_get_calendar_microtime(&secs, &usecs); + start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + + while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) { + tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10); + } + + if (counter >= 500) { + printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size); + } + + if (hfsmp->jnl) { + journal_flush(hfsmp->jnl); + } else { + hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel()); + } + + clock_get_calendar_microtime(&secs, &usecs); + now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + hfsmp->hfs_last_sync_time = now; + rate = ((pending_io * 1000000ULL) / (now - start)); // yields bytes per second + + hfs_end_transaction(hfsmp); + + // + // If a reasonable amount of time elapsed then check the + // i/o rate. If it's taking less than 1 second or more + // than 2 seconds, adjust hfs_max_pending_io so that we + // will allow about 1.5 seconds of i/o to queue up. + // + if ((now - start) >= 300000) { + uint64_t scale = (pending_io * 100) / rate; + + if (scale < 100 || scale > 200) { + // set it so that it should take about 1.5 seconds to drain + hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL; + } + } + + } else if ( ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL) + || (((now - hfsmp->hfs_last_sync_time) >= 100000LL) + && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL) + && (hfsmp->hfs_active_threads == 0) + && (hfsmp->hfs_global_lock_nesting == 0))) { + + // + // Flush the journal if more than 5 seconds elapsed since + // the last sync OR we have not sync'ed recently and the + // last sync request time was more than 100 milliseconds + // ago and no one is in the middle of a transaction right + // now. Else we defer the sync and reschedule it. + // + if (hfsmp->jnl) { + lck_rw_lock_shared(&hfsmp->hfs_global_lock); + + journal_flush(hfsmp->jnl); + + lck_rw_unlock_shared(&hfsmp->hfs_global_lock); + } else { + hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel()); + } + + clock_get_calendar_microtime(&secs, &usecs); + now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + hfsmp->hfs_last_sync_time = now; + + } else if (hfsmp->hfs_active_threads == 0) { + uint64_t deadline; + + clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline); + thread_call_enter_delayed(hfsmp->hfs_syncer, deadline); + + // note: we intentionally return early here and do not + // decrement the sync_scheduled and sync_incomplete + // variables because we rescheduled the timer. + + return; + } + + // + // NOTE: we decrement these *after* we're done the journal_flush() since + // it can take a significant amount of time and so we don't want more + // callbacks scheduled until we're done this one. + // + OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled); + OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete); + wakeup((caddr_t)&hfsmp->hfs_sync_incomplete); +} + + +extern int IOBSDIsMediaEjectable( const char *cdev_name ); + /* * Common code for mount and mountroot */ @@ -837,10 +966,10 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, { struct proc *p = vfs_context_proc(context); int retval = E_NONE; - struct hfsmount *hfsmp; + struct hfsmount *hfsmp = NULL; struct buf *bp; dev_t dev; - HFSMasterDirectoryBlock *mdbp; + HFSMasterDirectoryBlock *mdbp = NULL; int ronly; #if QUOTA int i; @@ -855,6 +984,12 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, u_int32_t iswritable; daddr64_t mdb_offset; int isvirtual = 0; + int isroot = 0; + + if (args == NULL) { + /* only hfs_mountroot passes us NULL as the 'args' argument */ + isroot = 1; + } ronly = vfs_isrdonly(mp); dev = vnode_specrdev(devvp); @@ -874,6 +1009,12 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, retval = ENXIO; goto error_exit; } + if (log_blksize == 0 || log_blksize > 1024*1024*1024) { + printf("hfs: logical block size 0x%x looks bad. Not mounting.\n", log_blksize); + retval = ENXIO; + goto error_exit; + } + /* Get the physical block size. */ retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context); if (retval) { @@ -886,6 +1027,12 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, */ phys_blksize = log_blksize; } + if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) { + printf("hfs: physical block size 0x%x looks bad. Not mounting.\n", phys_blksize); + retval = ENXIO; + goto error_exit; + } + /* Switch to 512 byte sectors (temporarily) */ if (log_blksize > 512) { u_int32_t size512 = 512; @@ -923,6 +1070,15 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, phys_blksize = log_blksize; } + /* + * The cluster layer is not currently prepared to deal with a logical + * block size larger than the system's page size. (It can handle + * blocks per page, but not multiple pages per block.) So limit the + * logical block size to the page size. + */ + if (log_blksize > PAGE_SIZE) + log_blksize = PAGE_SIZE; + /* Now switch to our preferred physical block size. */ if (log_blksize > 512) { if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) { @@ -949,13 +1105,23 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, goto error_exit; } MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK); + if (mdbp == NULL) { + retval = ENOMEM; + goto error_exit; + } bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize); buf_brelse(bp); bp = NULL; MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK); + if (hfsmp == NULL) { + retval = ENOMEM; + goto error_exit; + } bzero(hfsmp, sizeof(struct hfsmount)); + hfs_chashinit_finish(hfsmp); + /* * Init the volume information structure */ @@ -1032,7 +1198,16 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) && (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) { - /* If only journal replay is requested, exit immediately */ + /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */ + if (vfs_isrdwr(mp)) { + retval = EROFS; + goto error_exit; + } + /* Treat it as if it's read-only and not writeable */ + hfsmp->hfs_flags |= HFS_READ_ONLY; + hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA; + + /* If only journal replay is requested, exit immediately */ if (journal_replay_only) { retval = 0; goto error_exit; @@ -1093,7 +1268,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, * boundary. */ if ((embeddedOffset % log_blksize) != 0) { - printf("HFS Mount: embedded volume offset not" + printf("hfs_mountfs: embedded volume offset not" " a multiple of physical block size (%d);" " switching to 512\n", log_blksize); log_blksize = 512; @@ -1110,9 +1285,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, /* Note: relative block count adjustment */ hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize; - hfsmp->hfs_logical_block_size = log_blksize; - /* Update logical/physical block size */ + /* Update logical /physical block size */ + hfsmp->hfs_logical_block_size = log_blksize; hfsmp->hfs_physical_block_size = log_blksize; phys_blksize = log_blksize; hfsmp->hfs_log_per_phys = 1; @@ -1140,11 +1315,16 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, /* * On inconsistent disks, do not allow read-write mount - * unless it is the boot volume being mounted. + * unless it is the boot volume being mounted. We also + * always want to replay the journal if the journal_replay_only + * flag is set because that will (most likely) get the + * disk into a consistent state before fsck_hfs starts + * looking at it. */ - if (!(vfs_flags(mp) & MNT_ROOTFS) && - (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask) && - !(hfsmp->hfs_flags & HFS_READ_ONLY)) { + if ( !(vfs_flags(mp) & MNT_ROOTFS) + && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask) + && !journal_replay_only + && !(hfsmp->hfs_flags & HFS_READ_ONLY)) { retval = EINVAL; goto error_exit; } @@ -1175,9 +1355,17 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, // if we're able to init the journal, mark the mount // point as journaled. // - if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) { + if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) { vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); } else { + if (retval == EROFS) { + // EROFS is a special error code that means the volume has an external + // journal which we couldn't find. in that case we do not want to + // rewrite the volume header - we'll just refuse to mount the volume. + retval = EINVAL; + goto error_exit; + } + // if the journal failed to open, then set the lastMountedVersion // to be "FSK!" which fsck_hfs will see and force the fsck instead // of just bailing out because the volume is journaled. @@ -1241,7 +1429,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, * then retry with physical blocksize of 512. */ if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) { - printf("HFS Mount: could not use physical block size " + printf("hfs_mountfs: could not use physical block size " "(%d) switching to 512\n", log_blksize); log_blksize = 512; if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) { @@ -1258,7 +1446,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, hfsmp->hfs_logical_block_size = log_blksize; hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize; - if (hfsmp->jnl) { + if (hfsmp->jnl && hfsmp->jvp == devvp) { // close and re-open this with the new block size journal_close(hfsmp->jnl); hfsmp->jnl = NULL; @@ -1327,7 +1515,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, mp->mnt_vfsstat.f_fsid.val[0] = (long)dev; mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp); vfs_setmaxsymlen(mp, 0); - mp->mnt_vtable->vfc_threadsafe = TRUE; + mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR; #if NAMEDSTREAMS mp->mnt_kern_flag |= MNTK_NAMED_STREAMS; @@ -1344,12 +1532,14 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, /* * Set the free space warning levels for a non-root volume: * - * Set the lower freespace limit (the level that will trigger a warning) - * to 5% of the volume size or 250MB, whichever is less, and the desired - * level (which will cancel the alert request) to 1/2 above that limit. - * Start looking for free space to drop below this level and generate a - * warning immediately if needed: + * Set the "danger" limit to 1% of the volume size or 100MB, whichever + * is less. Set the "warning" limit to 2% of the volume size or 150MB, + * whichever is less. And last, set the "desired" freespace level to + * to 3% of the volume size or 200MB, whichever is less. */ + hfsmp->hfs_freespace_notify_dangerlimit = + MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, + (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION); hfsmp->hfs_freespace_notify_warninglimit = MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION); @@ -1360,10 +1550,14 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, /* * Set the free space warning levels for the root volume: * - * Set the lower freespace limit (the level that will trigger a warning) - * to 1% of the volume size or 50MB, whichever is less, and the desired - * level (which will cancel the alert request) to 2% or 75MB, whichever is less. + * Set the "danger" limit to 5% of the volume size or 125MB, whichever + * is less. Set the "warning" limit to 10% of the volume size or 250MB, + * whichever is less. And last, set the "desired" freespace level to + * to 11% of the volume size or 375MB, whichever is less. */ + hfsmp->hfs_freespace_notify_dangerlimit = + MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, + (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION); hfsmp->hfs_freespace_notify_warninglimit = MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION); @@ -1379,6 +1573,19 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, } } + /* do not allow ejectability checks on the root device */ + if (isroot == 0) { + if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 && + IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) { + hfsmp->hfs_max_pending_io = 4096*1024; // a reasonable value to start with. + hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp); + if (hfsmp->hfs_syncer == NULL) { + printf("hfs: failed to allocate syncer thread callback for %s (%s)\n", + mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname); + } + } + } + /* * Start looking for free space to drop below this level and generate a * warning immediately if needed: @@ -1399,13 +1606,16 @@ error_exit: FREE(mdbp, M_TEMP); if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { - (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, context); + vnode_clearmountedon(hfsmp->jvp); + (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel()); hfsmp->jvp = NULL; } if (hfsmp) { if (hfsmp->hfs_devvp) { vnode_rele(hfsmp->hfs_devvp); } + hfs_delete_chash(hfsmp); + FREE(hfsmp, M_HFSMNT); vfs_setfsprivate(mp, NULL); } @@ -1451,6 +1661,38 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) if (hfsmp->hfs_flags & HFS_METADATA_ZONE) (void) hfs_recording_suspend(hfsmp); + /* + * Cancel any pending timers for this volume. Then wait for any timers + * which have fired, but whose callbacks have not yet completed. + */ + if (hfsmp->hfs_syncer) + { + struct timespec ts = {0, 100000000}; /* 0.1 seconds */ + + /* + * Cancel any timers that have been scheduled, but have not + * fired yet. NOTE: The kernel considers a timer complete as + * soon as it starts your callback, so the kernel does not + * keep track of the number of callbacks in progress. + */ + if (thread_call_cancel(hfsmp->hfs_syncer)) + OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete); + thread_call_free(hfsmp->hfs_syncer); + hfsmp->hfs_syncer = NULL; + + /* + * This waits for all of the callbacks that were entered before + * we did thread_call_cancel above, but have not completed yet. + */ + while(hfsmp->hfs_sync_incomplete > 0) + { + msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts); + } + + if (hfsmp->hfs_sync_incomplete < 0) + panic("hfs_unmount: pm_sync_incomplete underflow!\n"); + } + /* * Flush out the b-trees, volume bitmap and Volume Header */ @@ -1513,6 +1755,23 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask; } + if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { + int i; + u_int32_t min_start = hfsmp->totalBlocks; + + // set the nextAllocation pointer to the smallest free block number + // we've seen so on the next mount we won't rescan unnecessarily + for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) { + if (hfsmp->vcbFreeExt[i].startBlock < min_start) { + min_start = hfsmp->vcbFreeExt[i].startBlock; + } + } + if (min_start < hfsmp->nextAllocation) { + hfsmp->nextAllocation = min_start; + } + } + + retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); if (retval) { HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask; @@ -1527,7 +1786,7 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) } if (hfsmp->jnl) { - journal_flush(hfsmp->jnl); + hfs_journal_flush(hfsmp); } /* @@ -1552,9 +1811,10 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context); if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { + vnode_clearmountedon(hfsmp->jvp); retval = VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, - context); + vfs_context_kernel()); vnode_put(hfsmp->jvp); hfsmp->jvp = NULL; } @@ -1573,6 +1833,8 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) #endif /* HFS_SPARSE_DEV */ lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group); vnode_rele(hfsmp->hfs_devvp); + + hfs_delete_chash(hfsmp); FREE(hfsmp, M_HFSMNT); return (0); @@ -1688,18 +1950,18 @@ hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_contex { ExtendedVCB *vcb = VFSTOVCB(mp); struct hfsmount *hfsmp = VFSTOHFS(mp); - u_long freeCNIDs; + u_int32_t freeCNIDs; u_int16_t subtype = 0; - freeCNIDs = (u_long)0xFFFFFFFF - (u_long)vcb->vcbNxtCNID; + freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID; sbp->f_bsize = (u_int32_t)vcb->blockSize; sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0); - sbp->f_blocks = (u_int64_t)((unsigned long)vcb->totalBlocks); - sbp->f_bfree = (u_int64_t)((unsigned long )hfs_freeblks(hfsmp, 0)); - sbp->f_bavail = (u_int64_t)((unsigned long )hfs_freeblks(hfsmp, 1)); - sbp->f_files = (u_int64_t)((unsigned long )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */ - sbp->f_ffree = (u_int64_t)((unsigned long )(MIN(freeCNIDs, sbp->f_bavail))); + sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks); + sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0)); + sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1)); + sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */ + sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail))); /* * Subtypes (flavors) for HFS @@ -1928,7 +2190,17 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) } if (hfsmp->jnl) { - journal_flush(hfsmp->jnl); + hfs_journal_flush(hfsmp); + } + + { + clock_sec_t secs; + clock_usec_t usecs; + uint64_t now; + + clock_get_calendar_microtime(&secs, &usecs); + now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + hfsmp->hfs_last_sync_time = now; } lck_rw_unlock_shared(&hfsmp->hfs_insync); @@ -1965,23 +2237,20 @@ hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, result = ESTALE; return result; } - - /* The createtime can be changed by hfs_setattr or hfs_setattrlist. - * For NFS, we are assuming that only if the createtime was moved - * forward would it mean the fileID got reused in that session by - * wrapping. We don't have a volume ID or other unique identifier to - * to use here for a generation ID across reboots, crashes where - * metadata noting lastFileID didn't make it to disk but client has - * it, or volume erasures where fileIDs start over again. Lastly, - * with HFS allowing "wraps" of fileIDs now, this becomes more - * error prone. Future, would be change the "wrap bit" to a unique - * wrap number and use that for generation number. For now do this. - */ - if (((time_t)(ntohl(hfsfhp->hfsfid_gen)) < VTOC(nvp)->c_itime)) { - hfs_unlock(VTOC(nvp)); - vnode_put(nvp); - return (ESTALE); - } + + /* + * We used to use the create time as the gen id of the file handle, + * but it is not static enough because it can change at any point + * via system calls. We still don't have another volume ID or other + * unique identifier to use for a generation ID across reboots that + * persists until the file is removed. Using only the CNID exposes + * us to the potential wrap-around case, but as of 2/2008, it would take + * over 2 months to wrap around if the machine did nothing but allocate + * CNIDs. Using some kind of wrap counter would only be effective if + * each file had the wrap counter associated with it. For now, + * we use only the CNID to identify the file as it's good enough. + */ + *vpp = nvp; hfs_unlock(VTOC(nvp)); @@ -2007,8 +2276,9 @@ hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_conte cp = VTOC(vp); hfsfhp = (struct hfsfid *)fhp; + /* only the CNID is used to identify the file now */ hfsfhp->hfsfid_cnid = htonl(cp->c_fileid); - hfsfhp->hfsfid_gen = htonl(cp->c_itime); + hfsfhp->hfsfid_gen = htonl(cp->c_fileid); *fhlenp = sizeof(struct hfsfid); return (0); @@ -2037,6 +2307,9 @@ hfs_init(__unused struct vfsconf *vfsp) hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr); hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr); +#if HFS_COMPRESSION + decmpfs_init(); +#endif return (0); } @@ -2111,15 +2384,23 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, size_t bufsize; size_t bytes; u_int32_t hint; - u_int16_t *unicode_name; - char *filename; + u_int16_t *unicode_name = NULL; + char *filename = NULL; if ((newlen <= 0) || (newlen > MAXPATHLEN)) return (EINVAL); bufsize = MAX(newlen * 3, MAXPATHLEN); MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK); + if (filename == NULL) { + error = ENOMEM; + goto encodinghint_exit; + } MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK); + if (filename == NULL) { + error = ENOMEM; + goto encodinghint_exit; + } error = copyin(newp, (caddr_t)filename, newlen); if (error == 0) { @@ -2130,8 +2411,12 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint); } } - FREE(unicode_name, M_TEMP); - FREE(filename, M_TEMP); + +encodinghint_exit: + if (unicode_name) + FREE(unicode_name, M_TEMP); + if (filename) + FREE(filename, M_TEMP); return (error); } else if (name[0] == HFS_ENABLE_JOURNALING) { @@ -2188,6 +2473,14 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n", (off_t)name[2], (off_t)name[3]); + // + // XXXdbg - note that currently (Sept, 08) hfs_util does not support + // enabling the journal on a separate device so it is safe + // to just copy hfs_devvp here. If hfs_util gets the ability + // to dynamically enable the journal on a separate device then + // we will have to do the same thing as hfs_early_journal_init() + // to locate and open the journal device. + // jvp = hfsmp->hfs_devvp; jnl = journal_create(jvp, (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize @@ -2202,7 +2495,8 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, if (jnl == NULL) { printf("hfs: FAILED to create the journal!\n"); if (jvp && jvp != hfsmp->hfs_devvp) { - VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, context); + vnode_clearmountedon(jvp); + VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel()); } jvp = NULL; @@ -2237,6 +2531,13 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, hfs_global_exclusive_lock_release(hfsmp); hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1); + { + fsid_t fsid; + + fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev; + fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp)); + vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL); + } return 0; } else if (name[0] == HFS_DISABLE_JOURNALING) { // clear the journaling bit @@ -2269,7 +2570,9 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, hfsmp->jnl = NULL; if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { - VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, context); + vnode_clearmountedon(hfsmp->jvp); + VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel()); + vnode_put(hfsmp->jvp); } hfsmp->jvp = NULL; vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); @@ -2282,6 +2585,13 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, hfs_global_exclusive_lock_release(hfsmp); hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1); + { + fsid_t fsid; + + fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev; + fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp)); + vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL); + } return 0; } else if (name[0] == HFS_GET_JOURNAL_INFO) { vnode_t vp = vfs_context_cwd(context); @@ -2290,6 +2600,10 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, if (vp == NULLVP) return EINVAL; + /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */ + if (proc_is64bit(current_proc())) + return EINVAL; + hfsmp = VTOHFS(vp); if (hfsmp->jnl == NULL) { jnl_start = 0; @@ -2309,31 +2623,20 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, return 0; } else if (name[0] == HFS_SET_PKG_EXTENSIONS) { - return set_package_extensions_table((void *)name[1], name[2], name[3]); + return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]); } else if (name[0] == VFS_CTL_QUERY) { struct sysctl_req *req; - struct vfsidctl vc; - struct user_vfsidctl user_vc; + union union_vfsidctl vc; struct mount *mp; struct vfsquery vq; - boolean_t is_64_bit; - is_64_bit = proc_is64bit(p); req = CAST_DOWN(struct sysctl_req *, oldp); /* we're new style vfs sysctl. */ - if (is_64_bit) { - error = SYSCTL_IN(req, &user_vc, sizeof(user_vc)); - if (error) return (error); - - mp = vfs_getvfs(&user_vc.vc_fsid); - } - else { - error = SYSCTL_IN(req, &vc, sizeof(vc)); - if (error) return (error); - - mp = vfs_getvfs(&vc.vc_fsid); - } + error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32)); + if (error) return (error); + + mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */ if (mp == NULL) return (ENOENT); hfsmp = VFSTOHFS(mp); @@ -2341,34 +2644,36 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, vq.vq_flags = hfsmp->hfs_notification_conditions; return SYSCTL_OUT(req, &vq, sizeof(vq));; } else if (name[0] == HFS_REPLAY_JOURNAL) { - char *devnode = NULL; - size_t devnode_len; - - devnode_len = *oldlenp; - MALLOC(devnode, char *, devnode_len + 1, M_TEMP, M_WAITOK); - if (devnode == NULL) { - return ENOMEM; + vnode_t devvp = NULL; + int device_fd; + if (namelen != 2) { + return (EINVAL); } - - error = copyin(oldp, (caddr_t)devnode, devnode_len); + device_fd = name[1]; + error = file_vnode(device_fd, &devvp); if (error) { - FREE(devnode, M_TEMP); return error; } - devnode[devnode_len] = 0; - - error = hfs_journal_replay(devnode, context); - FREE(devnode, M_TEMP); + error = vnode_getwithref(devvp); + if (error) { + file_drop(device_fd); + return error; + } + error = hfs_journal_replay(devvp, context); + file_drop(device_fd); + vnode_put(devvp); return error; } return (ENOTSUP); } -/* hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support the - * build_path ioctl. We use it to leverage the code below that updates the origin - * cache if necessary. +/* + * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support + * the build_path ioctl. We use it to leverage the code below that updates + * the origin list cache if necessary */ + int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context) { @@ -2384,10 +2689,10 @@ hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_con /* * ADLs may need to have their origin state updated - * since build_path needs a valid parent. The same is true - * for hardlinked files as well. There isn't a race window here in re-acquiring - * the cnode lock since we aren't pulling any data out of the cnode; instead, we're - * going back to the catalog. + * since build_path needs a valid parent. The same is true + * for hardlinked files as well. There isn't a race window here + * in re-acquiring the cnode lock since we aren't pulling any data + * out of the cnode; instead, we're going to the catalog. */ if ((VTOC(*vpp)->c_flag & C_HARDLINK) && (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) { @@ -2396,13 +2701,11 @@ hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_con if (!hfs_haslinkorigin(cp)) { lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_findname(hfsmp, (cnid_t)ino, &cdesc); + error = cat_findname(hfsmp, (cnid_t)ino, &cdesc); hfs_systemfile_unlock(hfsmp, lockflags); if (error == 0) { - if ((cdesc.cd_parentcnid != - hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) && - (cdesc.cd_parentcnid != - hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) { + if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) && + (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) { hfs_savelinkorigin(cp, cdesc.cd_parentcnid); } cat_releasedesc(&cdesc); @@ -2445,7 +2748,7 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock) /* * Check the hash first */ - vp = hfs_chash_getvnode(hfsmp->hfs_raw_dev, cnid, 0, skiplock); + vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock); if (vp) { *vpp = vp; return(0); @@ -2560,7 +2863,7 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock) error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr, &cnfork, &vp); - if ((error == 0) && (VTOC(vp)->c_flag & C_HARDLINK)) { + if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) { hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid); } FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI); @@ -2611,7 +2914,7 @@ hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p) } /* Obtain the root vnode so we can skip over it. */ - skipvp = hfs_chash_getvnode(hfsmp->hfs_raw_dev, kHFSRootFolderID, 0, 0); + skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0); } #endif /* QUOTA */ @@ -2846,9 +3149,9 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) { ExtendedVCB *vcb = HFSTOVCB(hfsmp); struct filefork *fp; - HFSPlusVolumeHeader *volumeHeader; + HFSPlusVolumeHeader *volumeHeader, *altVH; int retval; - struct buf *bp; + struct buf *bp, *alt_bp; int i; daddr64_t priIDSector; int critical; @@ -2869,42 +3172,72 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) return EINVAL; } + bp = NULL; + alt_bp = NULL; + retval = (int)buf_meta_bread(hfsmp->hfs_devvp, HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, NOCRED, &bp); if (retval) { - if (bp) - buf_brelse(bp); - - hfs_end_transaction(hfsmp); - - printf("HFS: err %d reading VH blk (%s)\n", retval, vcb->vcbVN); - return (retval); - } - - if (hfsmp->jnl) { - journal_modify_block_start(hfsmp->jnl, bp); + printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN); + goto err_exit; } volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size)); /* - * Sanity check what we just read. + * Sanity check what we just read. If it's bad, try the alternate + * instead. */ signature = SWAP_BE16 (volumeHeader->signature); hfsversion = SWAP_BE16 (volumeHeader->version); if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) || (hfsversion < kHFSPlusVersion) || (hfsversion > 100) || (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) { -#if 1 - panic("HFS: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d", + printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n", vcb->vcbVN, signature, hfsversion, - SWAP_BE32 (volumeHeader->blockSize)); -#endif - printf("HFS: corrupt VH blk (%s)\n", vcb->vcbVN); - buf_brelse(bp); - return (EIO); + SWAP_BE32 (volumeHeader->blockSize), + hfsmp->hfs_alt_id_sector ? "; trying alternate" : ""); + hfs_mark_volume_inconsistent(hfsmp); + + if (hfsmp->hfs_alt_id_sector) { + retval = buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &alt_bp); + if (retval) { + printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN); + goto err_exit; + } + + altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) + + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)); + signature = SWAP_BE16(altVH->signature); + hfsversion = SWAP_BE16(altVH->version); + + if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) || + (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) || + (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) { + printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n", + vcb->vcbVN, signature, hfsversion, + SWAP_BE32(altVH->blockSize)); + retval = EIO; + goto err_exit; + } + + /* The alternate is plausible, so use it. */ + bcopy(altVH, volumeHeader, kMDBSize); + buf_brelse(alt_bp); + alt_bp = NULL; + } else { + /* No alternate VH, nothing more we can do. */ + retval = EIO; + goto err_exit; + } + } + + if (hfsmp->jnl) { + journal_modify_block_start(hfsmp->jnl, bp); } /* @@ -3067,8 +3400,6 @@ done: /* If requested, flush out the alternate volume header */ if (altflush && hfsmp->hfs_alt_id_sector) { - struct buf *alt_bp = NULL; - if (buf_meta_bread(hfsmp->hfs_devvp, HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) { @@ -3106,6 +3437,14 @@ done: hfs_end_transaction(hfsmp); return (retval); + +err_exit: + if (alt_bp) + buf_brelse(alt_bp); + if (bp) + buf_brelse(bp); + hfs_end_transaction(hfsmp); + return retval; } @@ -3133,10 +3472,11 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) u_int32_t phys_sectorsize; daddr64_t prev_alt_sector; daddr_t bitmapblks; - int lockflags; + int lockflags = 0; int error; int64_t oldBitmapSize; Boolean usedExtendFileC = false; + int transaction_begun = 0; devvp = hfsmp->hfs_devvp; vcb = HFSTOVCB(hfsmp); @@ -3210,12 +3550,27 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) addblks = newblkcnt - vcb->totalBlocks; printf("hfs_extendfs: growing %s by %d blocks\n", vcb->vcbVN, addblks); + + HFS_MOUNT_LOCK(hfsmp, TRUE); + if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) { + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + error = EALREADY; + goto out; + } + hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS; + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + + /* Invalidate the current free extent cache */ + invalidate_free_extent_cache(hfsmp); + /* * Enclose changes inside a transaction. */ if (hfs_start_transaction(hfsmp) != 0) { - return (EINVAL); + error = EINVAL; + goto out; } + transaction_begun = 1; /* * Note: we take the attributes lock in case we have an attribute data vnode @@ -3408,9 +3763,10 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) } } - /* - * TODO: Adjust the size of the metadata zone based on new volume size? + /* + * Update the metadata zone size based on current volume size */ + hfs_metadatazone_init(hfsmp); /* * Adjust the size of hfsmp->hfs_attrdata_vp @@ -3444,9 +3800,16 @@ out: we should reset the allocLimit field. If it changed, it will get updated; if not, it will remain the same. */ + HFS_MOUNT_LOCK(hfsmp, TRUE); + hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS; hfsmp->allocLimit = vcb->totalBlocks; - hfs_systemfile_unlock(hfsmp, lockflags); - hfs_end_transaction(hfsmp); + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + if (lockflags) { + hfs_systemfile_unlock(hfsmp, lockflags); + } + if (transaction_begun) { + hfs_end_transaction(hfsmp); + } return (error); } @@ -3466,17 +3829,18 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) u_int32_t reclaimblks = 0; int lockflags = 0; int transaction_begun = 0; + Boolean updateFreeBlocks = false; int error; - lck_mtx_lock(&hfsmp->hfs_mutex); + HFS_MOUNT_LOCK(hfsmp, TRUE); if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) { - lck_mtx_unlock(&hfsmp->hfs_mutex); + HFS_MOUNT_UNLOCK(hfsmp, TRUE); return (EALREADY); } hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS; hfsmp->hfs_resize_filesmoved = 0; hfsmp->hfs_resize_totalfiles = 0; - lck_mtx_unlock(&hfsmp->hfs_mutex); + HFS_MOUNT_UNLOCK(hfsmp, TRUE); /* * - Journaled HFS Plus volumes only. @@ -3491,24 +3855,32 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) newblkcnt = newsize / hfsmp->blockSize; reclaimblks = hfsmp->totalBlocks - newblkcnt; + if (hfs_resize_debug) { + printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1)); + printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks); + } + /* Make sure new size is valid. */ if ((newsize < HFS_MIN_SIZE) || (newsize >= oldsize) || (newsize % hfsmp->hfs_logical_block_size) || (newsize % hfsmp->hfs_physical_block_size)) { - printf ("hfs_truncatefs: invalid size\n"); + printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize); error = EINVAL; goto out; } - /* Make sure there's enough space to work with. */ + /* Make sure that the file system has enough free blocks reclaim */ if (reclaimblks >= hfs_freeblks(hfsmp, 1)) { - printf("hfs_truncatefs: insufficient space (need %u blocks; have %u blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1)); + printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1)); error = ENOSPC; goto out; } + /* Invalidate the current free extent cache */ + invalidate_free_extent_cache(hfsmp); + /* Start with a clean journal. */ - journal_flush(hfsmp->jnl); + hfs_journal_flush(hfsmp); if (hfs_start_transaction(hfsmp) != 0) { error = EINVAL; @@ -3525,17 +3897,34 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) * in the allocation blocks beyond (i.e. the blocks we're trying to * truncate away. */ - lck_mtx_lock(&hfsmp->hfs_mutex); + HFS_MOUNT_LOCK(hfsmp, TRUE); if (hfsmp->blockSize == 512) hfsmp->allocLimit = newblkcnt - 2; else hfsmp->allocLimit = newblkcnt - 1; + /* + * Update the volume free block count to reflect the total number + * of free blocks that will exist after a successful resize. + * Relocation of extents will result in no net change in the total + * free space on the disk. Therefore the code that allocates + * space for new extent and deallocates the old extent explicitly + * prevents updating the volume free block count. It will also + * prevent false disk full error when the number of blocks in + * an extent being relocated is more than the free blocks that + * will exist after the volume is resized. + */ hfsmp->freeBlocks -= reclaimblks; - lck_mtx_unlock(&hfsmp->hfs_mutex); - + updateFreeBlocks = true; + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + + /* + * Update the metadata zone size, and, if required, disable it + */ + hfs_metadatazone_init(hfsmp); + /* * Look for files that have blocks at or beyond the location of the - * new alternate volume header. + * new alternate volume header */ if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) { /* @@ -3546,8 +3935,9 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) transaction_begun = 0; /* Attempt to reclaim some space. */ - if (hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context) != 0) { - printf("hfs_truncatefs: couldn't reclaim space on %s\n", hfsmp->vcbVN); + error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context); + if (error != 0) { + printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error); error = ENOSPC; goto out; } @@ -3558,8 +3948,9 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) transaction_begun = 1; /* Check if we're clear now. */ - if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) { - printf("hfs_truncatefs: didn't reclaim enough space on %s\n", hfsmp->vcbVN); + error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks); + if (error != 0) { + printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error); error = EAGAIN; /* tell client to try again */ goto out; } @@ -3596,14 +3987,16 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) * since this block will be outside of the truncated file system! */ if (hfsmp->hfs_alt_id_sector) { - if (buf_meta_bread(hfsmp->hfs_devvp, + error = buf_meta_bread(hfsmp->hfs_devvp, HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), - hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) { - + hfsmp->hfs_physical_block_size, NOCRED, &bp); + if (error == 0) { bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize); (void) VNOP_BWRITE(bp); - } else if (bp) { - buf_brelse(bp); + } else { + if (bp) { + buf_brelse(bp); + } } bp = NULL; } @@ -3623,10 +4016,6 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) if (error) panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error); - /* - * TODO: Adjust the size of the metadata zone based on new volume size? - */ - /* * Adjust the size of hfsmp->hfs_attrdata_vp */ @@ -3648,22 +4037,27 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) } out: - if (error) - hfsmp->freeBlocks += reclaimblks; - lck_mtx_lock(&hfsmp->hfs_mutex); + if (error && (updateFreeBlocks == true)) + hfsmp->freeBlocks += reclaimblks; hfsmp->allocLimit = hfsmp->totalBlocks; if (hfsmp->nextAllocation >= hfsmp->allocLimit) hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1; hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS; - lck_mtx_unlock(&hfsmp->hfs_mutex); + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + /* On error, reset the metadata zone for original volume size */ + if (error && (updateFreeBlocks == true)) { + hfs_metadatazone_init(hfsmp); + } if (lockflags) { hfs_systemfile_unlock(hfsmp, lockflags); } if (transaction_begun) { hfs_end_transaction(hfsmp); - journal_flush(hfsmp->jnl); + hfs_journal_flush(hfsmp); + /* Just to be sure, sync all data to the disk */ + (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); } return (error); @@ -3740,18 +4134,6 @@ hfs_copy_extent( if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread()) panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp); - /* - * Wait for any in-progress writes to this vnode to complete, so that we'll - * be copying consistent bits. (Otherwise, it's possible that an async - * write will complete to the old extent after we read from it. That - * could lead to corruption.) - */ - err = vnode_waitforwrites(vp, 0, 0, 0, "hfs_copy_extent"); - if (err) { - printf("hfs_copy_extent: Error %d from vnode_waitforwrites\n", err); - return err; - } - /* * Determine the I/O size to use * @@ -3772,7 +4154,7 @@ hfs_copy_extent( srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size; destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size; while (resid > 0) { - ioSize = MIN(bufferSize, resid); + ioSize = MIN(bufferSize, (size_t) resid); ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size; /* Prepare the buffer for reading */ @@ -3797,7 +4179,7 @@ hfs_copy_extent( buf_setcount(bp, ioSize); buf_setblkno(bp, destSector); buf_setlblkno(bp, destSector); - if (journal_uses_fua(hfsmp->jnl)) + if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl)) buf_markfua(bp); /* Do the write */ @@ -3820,7 +4202,7 @@ hfs_copy_extent( kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize); /* Make sure all writes have been flushed to disk. */ - if (!journal_uses_fua(hfsmp->jnl)) { + if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) { err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); if (err) { printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err); @@ -3835,8 +4217,15 @@ hfs_copy_extent( } +static int +hfs_relocate_callback(__unused HFSPlusExtentKey *key, HFSPlusExtentRecord *record, HFSPlusExtentRecord *state) +{ + bcopy(state, record, sizeof(HFSPlusExtentRecord)); + return 0; +} + /* - * Reclaim space at the end of a volume, used by a given system file. + * Reclaim space at the end of a volume, used by a given file. * * This routine attempts to move any extent which contains allocation blocks * at or after "startblk." A separate transaction is used to do the move. @@ -3845,109 +4234,199 @@ hfs_copy_extent( * of a transaction have their physical block numbers invalidated so they will * eventually be written to their new locations. * - * This routine can be used to move overflow extents for the allocation file. - * * Inputs: * hfsmp The volume being resized. * startblk Blocks >= this allocation block need to be moved. * locks Which locks need to be taken for the given system file. * vp The vnode for the system file. * + * The caller of this function, hfs_reclaimspace(), grabs cnode lock + * for non-system files before calling this function. + * * Outputs: - * moved Set to true if any extents were moved. + * blks_moved Total number of allocation blocks moved by this routine. */ static int -hfs_relocate_callback(__unused HFSPlusExtentKey *key, HFSPlusExtentRecord *record, HFSPlusExtentRecord *state) -{ - bcopy(state, record, sizeof(HFSPlusExtentRecord)); - return 0; -} -static int -hfs_reclaim_sys_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, int locks, Boolean *moved, vfs_context_t context) +hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, int locks, u_int32_t *blks_moved, vfs_context_t context) { int error; int lockflags; int i; u_long datablks; - u_long block; + u_long end_block; u_int32_t oldStartBlock; u_int32_t newStartBlock; - u_int32_t blockCount; + u_int32_t oldBlockCount; + u_int32_t newBlockCount; struct filefork *fp; - + struct cnode *cp; + int is_sysfile; + int took_truncate_lock = 0; + struct BTreeIterator *iterator = NULL; + u_int8_t forktype; + u_int32_t fileID; + u_int32_t alloc_flags; + /* If there is no vnode for this file, then there's nothing to do. */ if (vp == NULL) return 0; - /* printf("hfs_reclaim_sys_file: %.*s\n", VTOC(vp)->c_desc.cd_namelen, VTOC(vp)->c_desc.cd_nameptr); */ + cp = VTOC(vp); + fileID = cp->c_cnid; + is_sysfile = vnode_issystem(vp); + forktype = VNODE_IS_RSRC(vp) ? 0xFF : 0; + + /* Flush all the buffer cache blocks and cluster pages associated with + * this vnode. + * + * If the current vnode is a system vnode, all the buffer cache blocks + * associated with it should already be sync'ed to the disk as part of + * journal flush in hfs_truncatefs(). Normally there should not be + * buffer cache blocks for regular files, but for objects like symlinks, + * we can have buffer cache blocks associated with the vnode. Therefore + * we call buf_flushdirtyblks() always. Resource fork data for directory + * hard links are directly written using buffer cache for device vnode, + * which should also be sync'ed as part of journal flush in hfs_truncatefs(). + * + * Flushing cluster pages should be the normal case for regular files, + * and really should not do anything for system files. But just to be + * sure that all blocks associated with this vnode is sync'ed to the + * disk, we call both buffer cache and cluster layer functions. + */ + buf_flushdirtyblks(vp, MNT_NOWAIT, 0, "hfs_reclaim_file"); + if (!is_sysfile) { + /* The caller grabs cnode lock for non-system files only, therefore + * we unlock only non-system files before calling cluster layer. + */ + hfs_unlock(cp); + hfs_lock_truncate(cp, TRUE); + took_truncate_lock = 1; + } + (void) cluster_push(vp, 0); + if (!is_sysfile) { + error = hfs_lock(cp, HFS_FORCE_LOCK); + if (error) { + hfs_unlock_truncate(cp, TRUE); + return error; + } + + /* If the file no longer exists, nothing left to do */ + if (cp->c_flag & C_NOEXISTS) { + hfs_unlock_truncate(cp, TRUE); + return 0; + } + } + + /* Wait for any in-progress writes to this vnode to complete, so that we'll + * be copying consistent bits. (Otherwise, it's possible that an async + * write will complete to the old extent after we read from it. That + * could lead to corruption.) + */ + error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file"); + if (error) { + printf("hfs_reclaim_file: Error %d from vnode_waitforwrites\n", error); + return error; + } + + if (hfs_resize_debug) { + printf("hfs_reclaim_file: Start relocating %sfork for fileid=%u name=%.*s\n", (forktype ? "rsrc" : "data"), fileID, cp->c_desc.cd_namelen, cp->c_desc.cd_nameptr); + } + /* We always need the allocation bitmap and extents B-tree */ locks |= SFL_BITMAP | SFL_EXTENTS; error = hfs_start_transaction(hfsmp); if (error) { - printf("hfs_reclaim_sys_file: hfs_start_transaction returned %d\n", error); + printf("hfs_reclaim_file: hfs_start_transaction returned %d\n", error); + if (took_truncate_lock) { + hfs_unlock_truncate(cp, TRUE); + } return error; } lockflags = hfs_systemfile_lock(hfsmp, locks, HFS_EXCLUSIVE_LOCK); fp = VTOF(vp); datablks = 0; + *blks_moved = 0; /* Relocate non-overflow extents */ for (i = 0; i < kHFSPlusExtentDensity; ++i) { if (fp->ff_extents[i].blockCount == 0) break; oldStartBlock = fp->ff_extents[i].startBlock; - blockCount = fp->ff_extents[i].blockCount; - datablks += blockCount; - block = oldStartBlock + blockCount; - if (block > startblk) { - error = BlockAllocate(hfsmp, 1, blockCount, blockCount, true, true, &newStartBlock, &blockCount); - if (error) { - printf("hfs_reclaim_sys_file: BlockAllocate returned %d\n", error); - goto fail; - } - if (blockCount != fp->ff_extents[i].blockCount) { - printf("hfs_reclaim_sys_file: new blockCount=%u, original blockCount=%u", blockCount, fp->ff_extents[i].blockCount); - goto free_fail; + oldBlockCount = fp->ff_extents[i].blockCount; + datablks += oldBlockCount; + end_block = oldStartBlock + oldBlockCount; + /* Check if the file overlaps the target space */ + if (end_block > startblk) { + alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS; + if (is_sysfile) { + alloc_flags |= HFS_ALLOC_METAZONE; } - error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, blockCount, context); + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount); if (error) { - printf("hfs_reclaim_sys_file: hfs_copy_extent returned %d\n", error); - goto free_fail; + if (!is_sysfile && ((error == dskFulErr) || (error == ENOSPC))) { + /* Try allocating again using the metadata zone */ + alloc_flags |= HFS_ALLOC_METAZONE; + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount); + } + if (error) { + printf("hfs_reclaim_file: BlockAllocate(metazone) (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount); + goto fail; + } else { + if (hfs_resize_debug) { + printf("hfs_reclaim_file: BlockAllocate(metazone) success for fileID=%u %u:(%u,%u)\n", fileID, i, newStartBlock, newBlockCount); + } + } } - fp->ff_extents[i].startBlock = newStartBlock; - VTOC(vp)->c_flag |= C_MODIFIED; - *moved = true; - error = BlockDeallocate(hfsmp, oldStartBlock, blockCount); + + /* Copy data from old location to new location */ + error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, newBlockCount, context); if (error) { - /* TODO: Mark volume inconsistent? */ - printf("hfs_reclaim_sys_file: BlockDeallocate returned %d\n", error); + printf("hfs_reclaim_file: hfs_copy_extent error=%d for fileID=%u %u:(%u,%u) to %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount); + if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS)) { + hfs_mark_volume_inconsistent(hfsmp); + } goto fail; } - error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + fp->ff_extents[i].startBlock = newStartBlock; + cp->c_flag |= C_MODIFIED; + *blks_moved += newBlockCount; + + /* Deallocate the old extent */ + error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS); if (error) { - /* TODO: Mark volume inconsistent? */ - printf("hfs_reclaim_sys_file: hfs_flushvolumeheader returned %d\n", error); + printf("hfs_reclaim_file: BlockDeallocate returned %d\n", error); + hfs_mark_volume_inconsistent(hfsmp); goto fail; } + + /* If this is a system file, sync the volume header on disk */ + if (is_sysfile) { + error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + if (error) { + printf("hfs_reclaim_file: hfs_flushvolumeheader returned %d\n", error); + hfs_mark_volume_inconsistent(hfsmp); + goto fail; + } + } + + if (hfs_resize_debug) { + printf ("hfs_reclaim_file: Relocated %u:(%u,%u) to %u:(%u,%u)\n", i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount); + } } } /* Relocate overflow extents (if any) */ if (i == kHFSPlusExtentDensity && fp->ff_blocks > datablks) { - struct BTreeIterator *iterator = NULL; struct FSBufferDescriptor btdata; HFSPlusExtentRecord record; HFSPlusExtentKey *key; FCB *fcb; - u_int32_t fileID; - u_int8_t forktype; + int overflow_count = 0; - forktype = VNODE_IS_RSRC(vp) ? 0xFF : 0; - fileID = VTOC(vp)->c_cnid; if (kmem_alloc(kernel_map, (vm_offset_t*) &iterator, sizeof(*iterator))) { - printf("hfs_reclaim_sys_file: kmem_alloc failed!\n"); + printf("hfs_reclaim_file: kmem_alloc failed!\n"); error = ENOMEM; goto fail; } @@ -3968,40 +4447,59 @@ hfs_reclaim_sys_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator); while (error == 0) { /* Stop when we encounter a different file or fork. */ - if ((key->fileID != fileID) || - (key->forkType != forktype)) { + if ((key->fileID != fileID) || + (key->forkType != forktype)) { break; } + + /* Just track the overflow extent record number for debugging... */ + if (hfs_resize_debug) { + overflow_count++; + } + /* * Check if the file overlaps target space. */ for (i = 0; i < kHFSPlusExtentDensity; ++i) { if (record[i].blockCount == 0) { - goto overflow_done; + goto fail; } oldStartBlock = record[i].startBlock; - blockCount = record[i].blockCount; - block = oldStartBlock + blockCount; - if (block > startblk) { - error = BlockAllocate(hfsmp, 1, blockCount, blockCount, true, true, &newStartBlock, &blockCount); - if (error) { - printf("hfs_reclaim_sys_file: BlockAllocate returned %d\n", error); - goto overflow_done; + oldBlockCount = record[i].blockCount; + end_block = oldStartBlock + oldBlockCount; + if (end_block > startblk) { + alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS; + if (is_sysfile) { + alloc_flags |= HFS_ALLOC_METAZONE; } - if (blockCount != record[i].blockCount) { - printf("hfs_reclaim_sys_file: new blockCount=%u, original blockCount=%u", blockCount, fp->ff_extents[i].blockCount); - kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); - goto free_fail; + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount); + if (error) { + if (!is_sysfile && ((error == dskFulErr) || (error == ENOSPC))) { + /* Try allocating again using the metadata zone */ + alloc_flags |= HFS_ALLOC_METAZONE; + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount); + } + if (error) { + printf("hfs_reclaim_file: BlockAllocate(metazone) (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount); + goto fail; + } else { + if (hfs_resize_debug) { + printf("hfs_reclaim_file: BlockAllocate(metazone) success for fileID=%u %u:(%u,%u)\n", fileID, i, newStartBlock, newBlockCount); + } + } } - error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, blockCount, context); + error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, newBlockCount, context); if (error) { - printf("hfs_reclaim_sys_file: hfs_copy_extent returned %d\n", error); - kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); - goto free_fail; + printf("hfs_reclaim_file: hfs_copy_extent error=%d for fileID=%u (%u,%u) to (%u,%u)\n", error, fileID, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount); + if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS)) { + hfs_mark_volume_inconsistent(hfsmp); + } + goto fail; } record[i].startBlock = newStartBlock; - VTOC(vp)->c_flag |= C_MODIFIED; - *moved = true; + cp->c_flag |= C_MODIFIED; + *blks_moved += newBlockCount; + /* * NOTE: To support relocating overflow extents of the * allocation file, we must update the BTree record BEFORE @@ -4012,15 +4510,18 @@ hfs_reclaim_sys_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, */ error = BTUpdateRecord(fcb, iterator, (IterateCallBackProcPtr) hfs_relocate_callback, &record); if (error) { - /* TODO: Mark volume inconsistent? */ - printf("hfs_reclaim_sys_file: BTUpdateRecord returned %d\n", error); - goto overflow_done; + printf("hfs_reclaim_file: BTUpdateRecord returned %d\n", error); + hfs_mark_volume_inconsistent(hfsmp); + goto fail; } - error = BlockDeallocate(hfsmp, oldStartBlock, blockCount); + error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS); if (error) { - /* TODO: Mark volume inconsistent? */ - printf("hfs_reclaim_sys_file: BlockDeallocate returned %d\n", error); - goto overflow_done; + printf("hfs_reclaim_file: BlockDeallocate returned %d\n", error); + hfs_mark_volume_inconsistent(hfsmp); + goto fail; + } + if (hfs_resize_debug) { + printf ("hfs_reclaim_file: Relocated overflow#%d %u:(%u,%u) to %u:(%u,%u)\n", overflow_count, i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount); } } } @@ -4031,26 +4532,29 @@ hfs_reclaim_sys_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, break; } } -overflow_done: - kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); - if (error) { - goto fail; - } } - hfs_systemfile_unlock(hfsmp, lockflags); - error = hfs_end_transaction(hfsmp); - if (error) { - printf("hfs_reclaim_sys_file: hfs_end_transaction returned %d\n", error); +fail: + if (iterator) { + kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); } - return error; - -free_fail: - (void) BlockDeallocate(hfsmp, newStartBlock, blockCount); -fail: (void) hfs_systemfile_unlock(hfsmp, lockflags); + + if ((*blks_moved != 0) && (is_sysfile == false)) { + (void) hfs_update(vp, MNT_WAIT); + } + (void) hfs_end_transaction(hfsmp); + + if (took_truncate_lock) { + hfs_unlock_truncate(cp, TRUE); + } + + if (hfs_resize_debug) { + printf("hfs_reclaim_file: Finished relocating %sfork for fileid=%u (error=%d)\n", (forktype ? "rsrc" : "data"), fileID, error); + } + return error; } @@ -4116,6 +4620,7 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context) { int error; int lockflags; + u_int32_t oldStartBlock; u_int32_t newStartBlock; u_int32_t oldBlockCount; u_int32_t newBlockCount; @@ -4134,7 +4639,9 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context) oldBlockCount = hfsmp->jnl_size / hfsmp->blockSize; /* TODO: Allow the journal to change size based on the new volume size. */ - error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, true, true, &newStartBlock, &newBlockCount); + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, + HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS, + &newStartBlock, &newBlockCount); if (error) { printf("hfs_reclaim_journal_file: BlockAllocate returned %d\n", error); goto fail; @@ -4144,7 +4651,7 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context) goto free_fail; } - error = BlockDeallocate(hfsmp, hfsmp->jnl_start, oldBlockCount); + error = BlockDeallocate(hfsmp, hfsmp->jnl_start, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS); if (error) { printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error); goto free_fail; @@ -4156,6 +4663,7 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context) printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error); goto free_fail; } + oldStartBlock = journal_fork.cf_extents[0].startBlock; journal_fork.cf_size = newBlockCount * hfsmp->blockSize; journal_fork.cf_extents[0].startBlock = newStartBlock; journal_fork.cf_extents[0].blockCount = newBlockCount; @@ -4187,13 +4695,19 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context) printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error); } + if (!error && hfs_resize_debug) { + printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount); + } return error; free_fail: - (void) BlockDeallocate(hfsmp, newStartBlock, newBlockCount); + (void) BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS); fail: hfs_systemfile_unlock(hfsmp, lockflags); (void) hfs_end_transaction(hfsmp); + if (hfs_resize_debug) { + printf ("hfs_reclaim_journal_file: Error relocating journal file (error=%d)\n", error); + } return error; } @@ -4208,6 +4722,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context) { int error; int lockflags; + u_int32_t oldBlock; u_int32_t newBlock; u_int32_t blockCount; struct cat_desc jib_desc; @@ -4222,7 +4737,9 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context) } lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - error = BlockAllocate(hfsmp, 1, 1, 1, true, true, &newBlock, &blockCount); + error = BlockAllocate(hfsmp, 1, 1, 1, + HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS, + &newBlock, &blockCount); if (error) { printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error); goto fail; @@ -4231,7 +4748,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context) printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount); goto free_fail; } - error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1); + error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS); if (error) { printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error); goto free_fail; @@ -4271,6 +4788,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context) printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error); goto fail; } + oldBlock = jib_fork.cf_extents[0].startBlock; jib_fork.cf_size = hfsmp->blockSize; jib_fork.cf_extents[0].startBlock = newBlock; jib_fork.cf_extents[0].blockCount = 1; @@ -4294,26 +4812,37 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context) if (error) { printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error); } - error = journal_flush(hfsmp->jnl); + error = hfs_journal_flush(hfsmp); if (error) { printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error); } + + if (!error && hfs_resize_debug) { + printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount); + } return error; free_fail: - (void) BlockDeallocate(hfsmp, newBlock, blockCount); + (void) BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS); fail: hfs_systemfile_unlock(hfsmp, lockflags); (void) hfs_end_transaction(hfsmp); + if (hfs_resize_debug) { + printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error); + } return error; } /* * Reclaim space at the end of a file system. + * + * Inputs - + * startblk - start block of the space being reclaimed + * reclaimblks - number of allocation blocks to reclaim */ static int -hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vfs_context_t context) +hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context) { struct vnode *vp = NULL; FCB *fcb; @@ -4325,46 +4854,54 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vf size_t cnidbufsize; int filecnt = 0; int maxfilecnt; - u_long block; - u_long datablks; - u_long rsrcblks; - u_long blkstomove = 0; + u_int32_t block; int lockflags; - int i; + int i, j; int error; int lastprogress = 0; - Boolean system_file_moved = false; + u_int32_t blks_moved = 0; + u_int32_t total_blks_moved = 0; + Boolean need_relocate; /* Relocate extents of the Allocation file if they're in the way. */ - error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_allocation_vp, startblk, SFL_BITMAP, &system_file_moved, context); + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, startblk, SFL_BITMAP, &blks_moved, context); if (error) { printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error); return error; } + total_blks_moved += blks_moved; + /* Relocate extents of the Extents B-tree if they're in the way. */ - error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_extents_vp, startblk, SFL_EXTENTS, &system_file_moved, context); + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, startblk, SFL_EXTENTS, &blks_moved, context); if (error) { printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error); return error; } + total_blks_moved += blks_moved; + /* Relocate extents of the Catalog B-tree if they're in the way. */ - error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_catalog_vp, startblk, SFL_CATALOG, &system_file_moved, context); + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, startblk, SFL_CATALOG, &blks_moved, context); if (error) { printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error); return error; } + total_blks_moved += blks_moved; + /* Relocate extents of the Attributes B-tree if they're in the way. */ - error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_attribute_vp, startblk, SFL_ATTRIBUTE, &system_file_moved, context); + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, startblk, SFL_ATTRIBUTE, &blks_moved, context); if (error) { printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error); return error; } + total_blks_moved += blks_moved; + /* Relocate extents of the Startup File if there is one and they're in the way. */ - error = hfs_reclaim_sys_file(hfsmp, hfsmp->hfs_startup_vp, startblk, SFL_STARTUP, &system_file_moved, context); + error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, startblk, SFL_STARTUP, &blks_moved, context); if (error) { printf("hfs_reclaimspace: reclaim startup file returned %d\n", error); return error; } + total_blks_moved += blks_moved; /* * We need to make sure the alternate volume header gets flushed if we moved @@ -4372,12 +4909,13 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vf * shrinking the size of the volume, or else the journal code will panic * with an invalid (too large) block number. * - * Note that system_file_moved will be set if ANY extent was moved, even + * Note that total_blks_moved will be set if ANY extent was moved, even * if it was just an overflow extent. In this case, the journal_flush isn't * strictly required, but shouldn't hurt. */ - if (system_file_moved) - journal_flush(hfsmp->jnl); + if (total_blks_moved) { + hfs_journal_flush(hfsmp); + } if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) > startblk) { error = hfs_reclaim_journal_file(hfsmp, context); @@ -4397,7 +4935,7 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vf /* For now move a maximum of 250,000 files. */ maxfilecnt = MIN(hfsmp->hfs_filecount, 250000); - maxfilecnt = MIN((u_long)maxfilecnt, reclaimblks); + maxfilecnt = MIN((u_int32_t)maxfilecnt, reclaimblks); cnidbufsize = maxfilecnt * sizeof(cnid_t); if (kmem_alloc(kernel_map, (vm_offset_t *)&cnidbufp, cnidbufsize)) { return (ENOMEM); @@ -4408,6 +4946,7 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vf } saved_next_allocation = hfsmp->nextAllocation; + /* Always try allocating new blocks after the metadata zone */ HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_start); fcb = VTOF(hfsmp->hfs_catalog_vp); @@ -4426,7 +4965,8 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vf } /* * Iterate over all the catalog records looking for files - * that overlap into the space we're trying to free up. + * that overlap into the space we're trying to free up and + * the total number of blocks that will require relocation. */ for (filecnt = 0; filecnt < maxfilecnt; ) { error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); @@ -4439,58 +4979,64 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vf if (filerec.recordType != kHFSPlusFileRecord) { continue; } - datablks = rsrcblks = 0; - /* - * Check if either fork overlaps target space. - */ + + need_relocate = false; + /* Check if data fork overlaps the target space */ for (i = 0; i < kHFSPlusExtentDensity; ++i) { - if (filerec.dataFork.extents[i].blockCount != 0) { - datablks += filerec.dataFork.extents[i].blockCount; - block = filerec.dataFork.extents[i].startBlock + - filerec.dataFork.extents[i].blockCount; - if (block >= startblk) { - if ((filerec.fileID == hfsmp->hfs_jnlfileid) || - (filerec.fileID == hfsmp->hfs_jnlinfoblkid)) { - printf("hfs_reclaimspace: cannot move active journal\n"); - error = EPERM; - goto end_iteration; - } - cnidbufp[filecnt++] = filerec.fileID; - blkstomove += filerec.dataFork.totalBlocks; - break; - } + if (filerec.dataFork.extents[i].blockCount == 0) { + break; } - if (filerec.resourceFork.extents[i].blockCount != 0) { - rsrcblks += filerec.resourceFork.extents[i].blockCount; - block = filerec.resourceFork.extents[i].startBlock + - filerec.resourceFork.extents[i].blockCount; - if (block >= startblk) { - cnidbufp[filecnt++] = filerec.fileID; - blkstomove += filerec.resourceFork.totalBlocks; - break; + block = filerec.dataFork.extents[i].startBlock + + filerec.dataFork.extents[i].blockCount; + if (block >= startblk) { + if ((filerec.fileID == hfsmp->hfs_jnlfileid) || + (filerec.fileID == hfsmp->hfs_jnlinfoblkid)) { + printf("hfs_reclaimspace: cannot move active journal\n"); + error = EPERM; + goto end_iteration; } + need_relocate = true; + goto save_fileid; } } - /* - * Check for any overflow extents that overlap. - */ - if (i == kHFSPlusExtentDensity) { - if (filerec.dataFork.totalBlocks > datablks) { - if (hfs_overlapped_overflow_extents(hfsmp, startblk, datablks, filerec.fileID, 0)) { - cnidbufp[filecnt++] = filerec.fileID; - blkstomove += filerec.dataFork.totalBlocks; - } - } else if (filerec.resourceFork.totalBlocks > rsrcblks) { - if (hfs_overlapped_overflow_extents(hfsmp, startblk, rsrcblks, filerec.fileID, 1)) { - cnidbufp[filecnt++] = filerec.fileID; - blkstomove += filerec.resourceFork.totalBlocks; - } + + /* Check if resource fork overlaps the target space */ + for (j = 0; j < kHFSPlusExtentDensity; ++j) { + if (filerec.resourceFork.extents[j].blockCount == 0) { + break; + } + block = filerec.resourceFork.extents[j].startBlock + + filerec.resourceFork.extents[j].blockCount; + if (block >= startblk) { + need_relocate = true; + goto save_fileid; + } + } + + /* Check if any forks' overflow extents overlap the target space */ + if ((i == kHFSPlusExtentDensity) || (j == kHFSPlusExtentDensity)) { + if (hfs_overlapped_overflow_extents(hfsmp, startblk, filerec.fileID)) { + need_relocate = true; + goto save_fileid; + } + } + +save_fileid: + if (need_relocate == true) { + cnidbufp[filecnt++] = filerec.fileID; + if (hfs_resize_debug) { + printf ("hfs_reclaimspace: Will relocate extents for fileID=%u\n", filerec.fileID); } } } end_iteration: - if (filecnt == 0 && !system_file_moved) { + /* If no regular file was found to be relocated and + * no system file was moved, we probably do not have + * enough space to relocate the system files, or + * something else went wrong. + */ + if ((filecnt == 0) && (total_blks_moved == 0)) { printf("hfs_reclaimspace: no files moved\n"); error = ENOSPC; } @@ -4499,66 +5045,52 @@ end_iteration: if (error || filecnt == 0) goto out; - /* - * Double check space requirements to make sure - * there is enough space to relocate any files - * that reside in the reclaim area. - * - * Blocks To Move -------------- - * | | | - * V V V - * ------------------------------------------------------------------------ - * | | / /// // | - * | | / /// // | - * | | / /// // | - * ------------------------------------------------------------------------ - * - * <------------------- New Total Blocks ------------------><-- Reclaim --> - * - * <------------------------ Original Total Blocks -----------------------> - * - */ - if (blkstomove >= hfs_freeblks(hfsmp, 1)) { - printf("hfs_truncatefs: insufficient space (need %lu blocks; have %u blocks)\n", blkstomove, hfs_freeblks(hfsmp, 1)); - error = ENOSPC; - goto out; - } hfsmp->hfs_resize_filesmoved = 0; hfsmp->hfs_resize_totalfiles = filecnt; /* Now move any files that are in the way. */ for (i = 0; i < filecnt; ++i) { - struct vnode * rvp; - struct cnode * cp; + struct vnode *rvp; + struct cnode *cp; + struct filefork *datafork; if (hfs_vget(hfsmp, cnidbufp[i], &vp, 0) != 0) continue; + + cp = VTOC(vp); + datafork = VTOF(vp); - /* Relocating directory hard links is not supported, so we - * punt (see radar 6217026). */ - cp = VTOC(vp); - if ((cp->c_flag & C_HARDLINK) && vnode_isdir(vp)) { - printf("hfs_reclaimspace: unable to relocate directory hard link %d\n", cp->c_cnid); - error = EINVAL; - goto out; - } - - /* Relocate any data fork blocks. */ - if (VTOF(vp) && VTOF(vp)->ff_blocks > 0) { - error = hfs_relocate(vp, hfsmp->hfs_metazone_end + 1, kauth_cred_get(), current_proc()); + /* Relocating directory hard links is not supported, so we punt (see radar 6217026). */ + if ((cp->c_flag & C_HARDLINK) && vnode_isdir(vp)) { + printf("hfs_reclaimspace: Unable to relocate directory hard link id=%d\n", cp->c_cnid); + error = EINVAL; + goto out; + } + + /* Relocate any overlapping data fork blocks. */ + if (datafork && datafork->ff_blocks > 0) { + error = hfs_reclaim_file(hfsmp, vp, startblk, 0, &blks_moved, context); + if (error) { + printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", cnidbufp[i], error); + break; + } + total_blks_moved += blks_moved; } - if (error) - break; - /* Relocate any resource fork blocks. */ - if ((cp->c_blocks - (VTOF(vp) ? VTOF((vp))->ff_blocks : 0)) > 0) { - error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE); - if (error) + /* Relocate any overlapping resource fork blocks. */ + if ((cp->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) { + error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, TRUE); + if (error) { + printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", cnidbufp[i], error); break; - error = hfs_relocate(rvp, hfsmp->hfs_metazone_end + 1, kauth_cred_get(), current_proc()); + } + error = hfs_reclaim_file(hfsmp, rvp, startblk, 0, &blks_moved, context); VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT; - if (error) + if (error) { + printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", cnidbufp[i], error); break; + } + total_blks_moved += blks_moved; } hfs_unlock(cp); vnode_put(vp); @@ -4583,8 +5115,8 @@ end_iteration: vp = NULL; } if (hfsmp->hfs_resize_filesmoved != 0) { - printf("hfs_reclaimspace: relocated %d files on \"%s\"\n", - (int)hfsmp->hfs_resize_filesmoved, hfsmp->vcbVN); + printf("hfs_reclaimspace: relocated %u blocks from %d files on \"%s\"\n", + total_blks_moved, (int)hfsmp->hfs_resize_filesmoved, hfsmp->vcbVN); } out: kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); @@ -4602,32 +5134,34 @@ out: /* - * Check if there are any overflow extents that overlap. + * Check if there are any overflow data or resource fork extents that overlap + * into the disk space that is being reclaimed. + * + * Output - + * 1 - One of the overflow extents need to be relocated + * 0 - No overflow extents need to be relocated, or there was an error */ static int -hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t catblks, u_int32_t fileID, int rsrcfork) +hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t fileID) { struct BTreeIterator * iterator = NULL; struct FSBufferDescriptor btdata; HFSPlusExtentRecord extrec; HFSPlusExtentKey *extkeyptr; FCB *fcb; - u_int32_t block; - u_int8_t forktype; int overlapped = 0; int i; int error; - forktype = rsrcfork ? 0xFF : 0; if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { - return (0); + return 0; } bzero(iterator, sizeof(*iterator)); extkeyptr = (HFSPlusExtentKey *)&iterator->key; extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength; - extkeyptr->forkType = forktype; + extkeyptr->forkType = 0; extkeyptr->fileID = fileID; - extkeyptr->startBlock = catblks; + extkeyptr->startBlock = 0; btdata.bufferAddress = &extrec; btdata.itemSize = sizeof(extrec); @@ -4635,32 +5169,41 @@ hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_in fcb = VTOF(hfsmp->hfs_extents_vp); + /* This will position the iterator just before the first overflow + * extent record for given fileID. It will always return btNotFound, + * so we special case the error code. + */ error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator); + if (error && (error != btNotFound)) { + goto out; + } + + /* BTIterateRecord() might return error if the btree is empty, and + * therefore we return that the extent does not overflow to the caller + */ + error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); while (error == 0) { /* Stop when we encounter a different file. */ - if ((extkeyptr->fileID != fileID) || - (extkeyptr->forkType != forktype)) { + if (extkeyptr->fileID != fileID) { break; } - /* - * Check if the file overlaps target space. - */ + /* Check if any of the forks exist in the target space. */ for (i = 0; i < kHFSPlusExtentDensity; ++i) { if (extrec[i].blockCount == 0) { break; } - block = extrec[i].startBlock + extrec[i].blockCount; - if (block >= startblk) { + if ((extrec[i].startBlock + extrec[i].blockCount) >= startblk) { overlapped = 1; - break; + goto out; } } /* Look for more records. */ error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); } +out: kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); - return (overlapped); + return overlapped; } @@ -4684,6 +5227,28 @@ hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress) } +/* + * Creates a UUID from a unique "name" in the HFS UUID Name space. + * See version 3 UUID. + */ +static void +hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result) +{ + MD5_CTX md5c; + uint8_t rawUUID[8]; + + ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6]; + ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7]; + + MD5Init( &md5c ); + MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) ); + MD5Update( &md5c, rawUUID, sizeof (rawUUID) ); + MD5Final( result, &md5c ); + + result[6] = 0x30 | ( result[6] & 0x0F ); + result[8] = 0x80 | ( result[8] & 0x3F ); +} + /* * Get file system attributes. */ @@ -4695,9 +5260,9 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t ExtendedVCB *vcb = VFSTOVCB(mp); struct hfsmount *hfsmp = VFSTOHFS(mp); - u_long freeCNIDs; + u_int32_t freeCNIDs; - freeCNIDs = (u_long)0xFFFFFFFF - (u_long)hfsmp->vcbNxtCNID; + freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID; VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt); VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt); @@ -4746,7 +5311,12 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t VOL_CAP_FMT_FAST_STATFS | VOL_CAP_FMT_2TB_FILESIZE | VOL_CAP_FMT_HIDDEN_FILES | +#if HFS_COMPRESSION + VOL_CAP_FMT_PATH_FROM_ID | + VOL_CAP_FMT_DECMPFS_COMPRESSION; +#else VOL_CAP_FMT_PATH_FROM_ID; +#endif } cap->capabilities[VOL_CAPABILITIES_INTERFACES] = VOL_CAP_INT_SEARCHFS | @@ -4782,7 +5352,12 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t VOL_CAP_FMT_2TB_FILESIZE | VOL_CAP_FMT_OPENDENYMODES | VOL_CAP_FMT_HIDDEN_FILES | +#if HFS_COMPRESSION + VOL_CAP_FMT_PATH_FROM_ID | + VOL_CAP_FMT_DECMPFS_COMPRESSION; +#else VOL_CAP_FMT_PATH_FROM_ID; +#endif cap->valid[VOL_CAPABILITIES_INTERFACES] = VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_ATTRLIST | @@ -4860,6 +5435,10 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN); VFSATTR_SET_SUPPORTED(fsap, f_vol_name); } + if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) { + hfs_getvoluuid(hfsmp, fsap->f_uuid); + VFSATTR_SET_SUPPORTED(fsap, f_uuid); + } return (0); } @@ -4994,28 +5573,23 @@ void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp) hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask; MarkVCBDirty(hfsmp); } - /* Log information to ASL log */ - fslog_fs_corrupt(hfsmp->hfs_mp); - printf("HFS: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN); + if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) { + /* Log information to ASL log */ + fslog_fs_corrupt(hfsmp->hfs_mp); + printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN); + } HFS_MOUNT_UNLOCK(hfsmp, TRUE); } /* Replay the journal on the device node provided. Returns zero if * journal replay succeeded or no journal was supposed to be replayed. */ -static int hfs_journal_replay(const char *devnode, vfs_context_t context) +static int hfs_journal_replay(vnode_t devvp, vfs_context_t context) { int retval = 0; - struct vnode *devvp = NULL; struct mount *mp = NULL; struct hfs_mount_args *args = NULL; - /* Lookup vnode for given raw device path */ - retval = vnode_open(devnode, FREAD|FWRITE, 0, 0, &devvp, NULL); - if (retval) { - goto out; - } - /* Replay allowed only on raw devices */ if (!vnode_ischr(devvp)) { retval = EINVAL; @@ -5024,10 +5598,18 @@ static int hfs_journal_replay(const char *devnode, vfs_context_t context) /* Create dummy mount structures */ MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK); + if (mp == NULL) { + retval = ENOMEM; + goto out; + } bzero(mp, sizeof(struct mount)); mount_lock_init(mp); MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK); + if (args == NULL) { + retval = ENOMEM; + goto out; + } bzero(args, sizeof(struct hfs_mount_args)); retval = hfs_mountfs(devvp, mp, args, 1, context); @@ -5041,9 +5623,6 @@ out: if (args) { FREE(args, M_TEMP); } - if (devvp) { - vnode_close(devvp, FREAD|FWRITE, NULL); - } return retval; }