+ vfsp = vfs_statfs(mp);
+
+ if (kauth_cred_getuid(cred) != vfsp->f_owner &&
+ !kauth_cred_issuser(cred))
+ return (EACCES);
+
+ lck_rw_lock_exclusive(&hfsmp->hfs_insync);
+
+ // flush things before we get started to try and prevent
+ // dirty data from being paged out while we're frozen.
+ // note: can't do this after taking the lock as it will
+ // deadlock against ourselves.
+ vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
+ hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
+
+ // DO NOT call hfs_journal_flush() because that takes a
+ // shared lock on the global exclusive lock!
+ journal_flush(hfsmp->jnl, TRUE);
+
+ // don't need to iterate on all vnodes, we just need to
+ // wait for writes to the system files and the device vnode
+ //
+ // Now that journal flush waits for all metadata blocks to
+ // be written out, waiting for btree writes is probably no
+ // longer required.
+ if (HFSTOVCB(hfsmp)->extentsRefNum)
+ vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
+ if (HFSTOVCB(hfsmp)->catalogRefNum)
+ vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
+ if (HFSTOVCB(hfsmp)->allocationsRefNum)
+ vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
+ if (hfsmp->hfs_attribute_vp)
+ vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
+ vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
+
+ hfsmp->hfs_freezing_proc = current_proc();
+
+ return (0);
+ }
+
+ case F_THAW_FS: {
+ vfsp = vfs_statfs(vnode_mount(vp));
+ if (kauth_cred_getuid(cred) != vfsp->f_owner &&
+ !kauth_cred_issuser(cred))
+ return (EACCES);
+
+ // if we're not the one who froze the fs then we
+ // can't thaw it.
+ if (hfsmp->hfs_freezing_proc != current_proc()) {
+ return EPERM;
+ }
+
+ // NOTE: if you add code here, also go check the
+ // code that "thaws" the fs in hfs_vnop_close()
+ //
+ hfsmp->hfs_freezing_proc = NULL;
+ hfs_unlock_global (hfsmp);
+ lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
+
+ return (0);
+ }
+
+ case HFS_BULKACCESS_FSCTL: {
+ int size;
+
+ if (hfsmp->hfs_flags & HFS_STANDARD) {
+ return EINVAL;
+ }
+
+ if (is64bit) {
+ size = sizeof(struct user64_access_t);
+ } else {
+ size = sizeof(struct user32_access_t);
+ }
+
+ return do_bulk_access_check(hfsmp, vp, ap, size, context);
+ }
+
+ case HFS_EXT_BULKACCESS_FSCTL: {
+ int size;
+
+ if (hfsmp->hfs_flags & HFS_STANDARD) {
+ return EINVAL;
+ }
+
+ if (is64bit) {
+ size = sizeof(struct user64_ext_access_t);
+ } else {
+ size = sizeof(struct user32_ext_access_t);
+ }
+
+ return do_bulk_access_check(hfsmp, vp, ap, size, context);
+ }
+
+ case HFS_SET_XATTREXTENTS_STATE: {
+ int state;
+
+ if (ap->a_data == NULL) {
+ return (EINVAL);
+ }
+
+ state = *(int *)ap->a_data;
+
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+
+ /* Super-user can enable or disable extent-based extended
+ * attribute support on a volume
+ * Note: Starting Mac OS X 10.7, extent-based extended attributes
+ * are enabled by default, so any change will be transient only
+ * till the volume is remounted.
+ */
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return (EPERM);
+ }
+ if (state == 0 || state == 1)
+ return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
+ else
+ return (EINVAL);
+ }
+
+ case F_SETSTATICCONTENT: {
+ int error;
+ int enable_static = 0;
+ struct cnode *cp = NULL;
+ /*
+ * lock the cnode, decorate the cnode flag, and bail out.
+ * VFS should have already authenticated the caller for us.
+ */
+
+ if (ap->a_data) {
+ /*
+ * Note that even though ap->a_data is of type caddr_t,
+ * the fcntl layer at the syscall handler will pass in NULL
+ * or 1 depending on what the argument supplied to the fcntl
+ * was. So it is in fact correct to check the ap->a_data
+ * argument for zero or non-zero value when deciding whether or not
+ * to enable the static bit in the cnode.
+ */
+ enable_static = 1;
+ }
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return EROFS;
+ }
+ cp = VTOC(vp);
+
+ error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error == 0) {
+ if (enable_static) {
+ cp->c_flag |= C_SSD_STATIC;
+ }
+ else {
+ cp->c_flag &= ~C_SSD_STATIC;
+ }
+ hfs_unlock (cp);
+ }
+ return error;
+ }
+
+ case F_SET_GREEDY_MODE: {
+ int error;
+ int enable_greedy_mode = 0;
+ struct cnode *cp = NULL;
+ /*
+ * lock the cnode, decorate the cnode flag, and bail out.
+ * VFS should have already authenticated the caller for us.
+ */
+
+ if (ap->a_data) {
+ /*
+ * Note that even though ap->a_data is of type caddr_t,
+ * the fcntl layer at the syscall handler will pass in NULL
+ * or 1 depending on what the argument supplied to the fcntl
+ * was. So it is in fact correct to check the ap->a_data
+ * argument for zero or non-zero value when deciding whether or not
+ * to enable the greedy mode bit in the cnode.
+ */
+ enable_greedy_mode = 1;
+ }
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return EROFS;
+ }
+ cp = VTOC(vp);
+
+ error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error == 0) {
+ if (enable_greedy_mode) {
+ cp->c_flag |= C_SSD_GREEDY_MODE;
+ }
+ else {
+ cp->c_flag &= ~C_SSD_GREEDY_MODE;
+ }
+ hfs_unlock (cp);
+ }
+ return error;
+ }
+
+ case F_MAKECOMPRESSED: {
+ int error = 0;
+ uint32_t gen_counter;
+ struct cnode *cp = NULL;
+ int reset_decmp = 0;
+
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return EROFS;
+ }
+
+ /*
+ * acquire & lock the cnode.
+ * VFS should have already authenticated the caller for us.
+ */
+
+ if (ap->a_data) {
+ /*
+ * Cast the pointer into a uint32_t so we can extract the
+ * supplied generation counter.
+ */
+ gen_counter = *((uint32_t*)ap->a_data);
+ }
+ else {
+ return EINVAL;
+ }
+
+#if HFS_COMPRESSION
+ cp = VTOC(vp);
+ /* Grab truncate lock first; we may truncate the file */
+ hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+ error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error) {
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+ return error;
+ }
+
+ /* Are there any other usecounts/FDs? */
+ if (vnode_isinuse(vp, 1)) {
+ hfs_unlock(cp);
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+ return EBUSY;
+ }
+
+
+ /* now we have the cnode locked down; Validate arguments */
+ if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) {
+ /* EINVAL if you are trying to manipulate an IMMUTABLE file */
+ hfs_unlock(cp);
+ hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
+ return EINVAL;
+ }
+
+ if ((hfs_get_gencount (cp)) == gen_counter) {
+ /*
+ * OK, the gen_counter matched. Go for it:
+ * Toggle state bits, truncate file, and suppress mtime update
+ */
+ reset_decmp = 1;
+ cp->c_bsdflags |= UF_COMPRESSED;
+
+ error = hfs_truncate(vp, 0, IO_NDELAY, 0, (HFS_TRUNCATE_SKIPTIMES), ap->a_context);
+ }
+ else {
+ error = ESTALE;
+ }
+
+ /* Unlock cnode before executing decmpfs ; they may need to get an EA */
+ hfs_unlock(cp);
+
+ /*
+ * Reset the decmp state while still holding the truncate lock. We need to
+ * serialize here against a listxattr on this node which may occur at any
+ * time.
+ *
+ * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
+ * that will still potentially require getting the com.apple.decmpfs EA. If the
+ * EA is required, then we can't hold the cnode lock, because the getxattr call is
+ * generic(through VFS), and can't pass along any info telling it that we're already
+ * holding it (the lock). If we don't serialize, then we risk listxattr stopping
+ * and trying to fill in the hfs_file_is_compressed info during the callback
+ * operation, which will result in deadlock against the b-tree node.
+ *
+ * So, to serialize against listxattr (which will grab buf_t meta references on
+ * the b-tree blocks), we hold the truncate lock as we're manipulating the
+ * decmpfs payload.
+ */
+ if ((reset_decmp) && (error == 0)) {
+ decmpfs_cnode *dp = VTOCMP (vp);
+ if (dp != NULL) {
+ decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
+ }
+
+ /* Initialize the decmpfs node as needed */
+ (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */
+ }
+
+ hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
+
+#endif
+ return error;
+ }
+
+ case F_SETBACKINGSTORE: {
+
+ int error = 0;
+
+ /*
+ * See comment in F_SETSTATICCONTENT re: using
+ * a null check for a_data
+ */
+ if (ap->a_data) {
+ error = hfs_set_backingstore (vp, 1);
+ }
+ else {
+ error = hfs_set_backingstore (vp, 0);
+ }
+
+ return error;
+ }
+
+ case F_GETPATH_MTMINFO: {
+ int error = 0;
+
+ int *data = (int*) ap->a_data;
+
+ /* Ask if this is a backingstore vnode */
+ error = hfs_is_backingstore (vp, data);
+
+ return error;
+ }
+
+ case F_FULLFSYNC: {
+ int error;
+
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error == 0) {
+ error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
+ hfs_unlock(VTOC(vp));
+ }
+
+ return error;
+ }
+
+ case F_CHKCLEAN: {
+ register struct cnode *cp;
+ int error;
+
+ if (!vnode_isreg(vp))
+ return EINVAL;
+
+ error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error == 0) {
+ cp = VTOC(vp);
+ /*
+ * used by regression test to determine if
+ * all the dirty pages (via write) have been cleaned
+ * after a call to 'fsysnc'.
+ */
+ error = is_file_clean(vp, VTOF(vp)->ff_size);
+ hfs_unlock(cp);
+ }
+ return (error);
+ }
+
+ case F_RDADVISE: {
+ register struct radvisory *ra;
+ struct filefork *fp;
+ int error;
+
+ if (!vnode_isreg(vp))
+ return EINVAL;
+
+ ra = (struct radvisory *)(ap->a_data);
+ fp = VTOF(vp);
+
+ /* Protect against a size change. */
+ hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+#if HFS_COMPRESSION
+ if (compressed && (uncompressed_size == -1)) {
+ /* fetching the uncompressed size failed above, so return the error */
+ error = decmpfs_error;
+ } else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
+ (!compressed && (ra->ra_offset >= fp->ff_size))) {
+ error = EFBIG;
+ }
+#else /* HFS_COMPRESSION */
+ if (ra->ra_offset >= fp->ff_size) {
+ error = EFBIG;
+ }
+#endif /* HFS_COMPRESSION */
+ else {
+ error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
+ }
+
+ hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
+ return (error);
+ }
+
+ case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
+ {
+ if (is64bit) {
+ *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
+ }
+ else {
+ *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
+ }
+ return 0;
+ }
+
+ case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
+ *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
+ break;
+
+ case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
+ *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
+ break;
+
+ case HFS_FSCTL_GET_VERY_LOW_DISK:
+ *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
+ break;
+
+ case HFS_FSCTL_SET_VERY_LOW_DISK:
+ if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
+ return EINVAL;
+ }
+
+ hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
+ break;
+
+ case HFS_FSCTL_GET_LOW_DISK:
+ *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
+ break;
+
+ case HFS_FSCTL_SET_LOW_DISK:
+ if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
+ || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
+
+ return EINVAL;
+ }
+
+ hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
+ break;
+
+ case HFS_FSCTL_GET_DESIRED_DISK:
+ *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
+ break;
+
+ case HFS_FSCTL_SET_DESIRED_DISK:
+ if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
+ return EINVAL;
+ }
+
+ hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
+ break;
+
+ case HFS_VOLUME_STATUS:
+ *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
+ break;
+
+ case HFS_SET_BOOT_INFO:
+ if (!vnode_isvroot(vp))
+ return(EINVAL);
+ if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
+ return(EACCES); /* must be superuser or owner of filesystem */
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ hfs_lock_mount (hfsmp);
+ bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
+ hfs_unlock_mount (hfsmp);
+ (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+ break;
+
+ case HFS_GET_BOOT_INFO:
+ if (!vnode_isvroot(vp))
+ return(EINVAL);
+ hfs_lock_mount (hfsmp);
+ bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
+ hfs_unlock_mount(hfsmp);
+ break;
+
+ case HFS_MARK_BOOT_CORRUPT:
+ /* Mark the boot volume corrupt by setting
+ * kHFSVolumeInconsistentBit in the volume header. This will
+ * force fsck_hfs on next mount.
+ */
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return EACCES;
+ }
+
+ /* Allowed only on the root vnode of the boot volume */
+ if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
+ !vnode_isvroot(vp)) {
+ return EINVAL;
+ }
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+ printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
+ hfs_mark_volume_inconsistent(hfsmp);
+ break;
+
+ case HFS_FSCTL_GET_JOURNAL_INFO:
+ jip = (struct hfs_journal_info*)ap->a_data;
+
+ if (vp == NULLVP)
+ return EINVAL;
+
+ if (hfsmp->jnl == NULL) {
+ jnl_start = 0;
+ jnl_size = 0;
+ } else {
+ jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
+ jnl_size = (off_t)hfsmp->jnl_size;
+ }
+
+ jip->jstart = jnl_start;
+ jip->jsize = jnl_size;
+ break;
+
+ case HFS_SET_ALWAYS_ZEROFILL: {
+ struct cnode *cp = VTOC(vp);
+
+ if (*(int *)ap->a_data) {
+ cp->c_flag |= C_ALWAYS_ZEROFILL;
+ } else {
+ cp->c_flag &= ~C_ALWAYS_ZEROFILL;
+ }
+ break;
+ }
+
+ case HFS_DISABLE_METAZONE: {
+ /* Only root can disable metadata zone */
+ if (!kauth_cred_issuser(kauth_cred_get())) {
+ return EACCES;
+ }
+ if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+ return (EROFS);
+ }
+
+ /* Disable metadata zone now */
+ (void) hfs_metadatazone_init(hfsmp, true);
+ printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
+ break;
+ }
+
+ default:
+ return (ENOTTY);
+ }
+
+ return 0;
+}
+
+/*
+ * select
+ */
+int
+hfs_vnop_select(__unused struct vnop_select_args *ap)
+/*
+ struct vnop_select_args {
+ vnode_t a_vp;
+ int a_which;
+ int a_fflags;
+ void *a_wql;
+ vfs_context_t a_context;
+ };
+*/
+{
+ /*
+ * We should really check to see if I/O is possible.
+ */
+ return (1);
+}
+
+/*
+ * Converts a logical block number to a physical block, and optionally returns
+ * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
+ * The physical block number is based on the device block size, currently its 512.
+ * The block run is returned in logical blocks, and is the REMAINING amount of blocks
+ */
+int
+hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
+{
+ struct filefork *fp = VTOF(vp);
+ struct hfsmount *hfsmp = VTOHFS(vp);
+ int retval = E_NONE;
+ u_int32_t logBlockSize;
+ size_t bytesContAvail = 0;
+ off_t blockposition;
+ int lockExtBtree;
+ int lockflags = 0;
+
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (vpp != NULL)
+ *vpp = hfsmp->hfs_devvp;
+ if (bnp == NULL)
+ return (0);
+
+ logBlockSize = GetLogicalBlockSize(vp);
+ blockposition = (off_t)bn * logBlockSize;
+
+ lockExtBtree = overflow_extents(fp);
+
+ if (lockExtBtree)
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
+
+ retval = MacToVFSError(
+ MapFileBlockC (HFSTOVCB(hfsmp),
+ (FCB*)fp,
+ MAXPHYSIO,
+ blockposition,
+ bnp,
+ &bytesContAvail));
+
+ if (lockExtBtree)
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
+ if (retval == E_NONE) {
+ /* Figure out how many read ahead blocks there are */
+ if (runp != NULL) {
+ if (can_cluster(logBlockSize)) {
+ /* Make sure this result never goes negative: */
+ *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
+ } else {
+ *runp = 0;
+ }
+ }
+ }
+ return (retval);
+}
+
+/*
+ * Convert logical block number to file offset.
+ */
+int
+hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
+/*
+ struct vnop_blktooff_args {
+ vnode_t a_vp;
+ daddr64_t a_lblkno;
+ off_t *a_offset;
+ };
+*/
+{
+ if (ap->a_vp == NULL)
+ return (EINVAL);
+ *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
+
+ return(0);
+}
+
+/*
+ * Convert file offset to logical block number.
+ */
+int
+hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
+/*
+ struct vnop_offtoblk_args {
+ vnode_t a_vp;
+ off_t a_offset;
+ daddr64_t *a_lblkno;
+ };
+*/
+{
+ if (ap->a_vp == NULL)
+ return (EINVAL);
+ *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
+
+ return(0);
+}
+
+/*
+ * Map file offset to physical block number.
+ *
+ * If this function is called for write operation, and if the file
+ * had virtual blocks allocated (delayed allocation), real blocks
+ * are allocated by calling ExtendFileC().
+ *
+ * If this function is called for read operation, and if the file
+ * had virtual blocks allocated (delayed allocation), no change
+ * to the size of file is done, and if required, rangelist is
+ * searched for mapping.
+ *
+ * System file cnodes are expected to be locked (shared or exclusive).
+ */
+int
+hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
+/*
+ struct vnop_blockmap_args {
+ vnode_t a_vp;
+ off_t a_foffset;
+ size_t a_size;
+ daddr64_t *a_bpn;
+ size_t *a_run;
+ void *a_poff;
+ int a_flags;
+ vfs_context_t a_context;
+ };
+*/
+{
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp;
+ struct filefork *fp;
+ struct hfsmount *hfsmp;
+ size_t bytesContAvail = 0;
+ int retval = E_NONE;
+ int syslocks = 0;
+ int lockflags = 0;
+ struct rl_entry *invalid_range;
+ enum rl_overlaptype overlaptype;
+ int started_tr = 0;
+ int tooklock = 0;
+
+#if HFS_COMPRESSION
+ if (VNODE_IS_RSRC(vp)) {
+ /* allow blockmaps to the resource fork */
+ } else {
+ if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
+ int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
+ switch(state) {
+ case FILE_IS_COMPRESSED:
+ return ENOTSUP;
+ case FILE_IS_CONVERTING:
+ /* if FILE_IS_CONVERTING, we allow blockmap */
+ break;
+ default:
+ printf("invalid state %d for compressed file\n", state);
+ /* fall through */
+ }
+ }
+ }
+#endif /* HFS_COMPRESSION */
+
+ /* Do not allow blockmap operation on a directory */
+ if (vnode_isdir(vp)) {
+ return (ENOTSUP);
+ }
+
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (ap->a_bpn == NULL)
+ return (0);
+
+ if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
+ if (VTOC(vp)->c_lockowner != current_thread()) {
+ hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+ tooklock = 1;
+ }
+ }
+ hfsmp = VTOHFS(vp);
+ cp = VTOC(vp);
+ fp = VTOF(vp);
+
+retry:
+ /* Check virtual blocks only when performing write operation */
+ if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto exit;
+ } else {
+ started_tr = 1;
+ }
+ syslocks = SFL_EXTENTS | SFL_BITMAP;
+
+ } else if (overflow_extents(fp)) {
+ syslocks = SFL_EXTENTS;
+ }
+
+ if (syslocks)
+ lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
+
+ /*
+ * Check for any delayed allocations.
+ */
+ if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
+ int64_t actbytes;
+ u_int32_t loanedBlocks;
+
+ //
+ // Make sure we have a transaction. It's possible
+ // that we came in and fp->ff_unallocblocks was zero
+ // but during the time we blocked acquiring the extents
+ // btree, ff_unallocblocks became non-zero and so we
+ // will need to start a transaction.
+ //
+ if (started_tr == 0) {
+ if (syslocks) {
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ syslocks = 0;
+ }
+ goto retry;
+ }
+
+ /*
+ * Note: ExtendFileC will Release any blocks on loan and
+ * aquire real blocks. So we ask to extend by zero bytes
+ * since ExtendFileC will account for the virtual blocks.
+ */
+
+ loanedBlocks = fp->ff_unallocblocks;
+ retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
+ kEFAllMask | kEFNoClumpMask, &actbytes);
+
+ if (retval) {
+ fp->ff_unallocblocks = loanedBlocks;
+ cp->c_blocks += loanedBlocks;
+ fp->ff_blocks += loanedBlocks;
+
+ hfs_lock_mount (hfsmp);
+ hfsmp->loanedBlocks += loanedBlocks;
+ hfs_unlock_mount (hfsmp);
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ cp->c_flag |= C_MODIFIED;
+ if (started_tr) {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+
+ hfs_end_transaction(hfsmp);
+ started_tr = 0;
+ }
+ goto exit;
+ }
+ }
+
+ retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
+ ap->a_bpn, &bytesContAvail);
+ if (syslocks) {
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ syslocks = 0;
+ }
+
+ if (started_tr) {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ hfs_end_transaction(hfsmp);
+ started_tr = 0;
+ }
+ if (retval) {
+ /* On write, always return error because virtual blocks, if any,
+ * should have been allocated in ExtendFileC(). We do not
+ * allocate virtual blocks on read, therefore return error
+ * only if no virtual blocks are allocated. Otherwise we search
+ * rangelist for zero-fills
+ */
+ if ((MacToVFSError(retval) != ERANGE) ||
+ (ap->a_flags & VNODE_WRITE) ||
+ ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
+ goto exit;
+ }
+
+ /* Validate if the start offset is within logical file size */
+ if (ap->a_foffset >= fp->ff_size) {
+ goto exit;
+ }
+
+ /*
+ * At this point, we have encountered a failure during
+ * MapFileBlockC that resulted in ERANGE, and we are not servicing
+ * a write, and there are borrowed blocks.
+ *
+ * However, the cluster layer will not call blockmap for
+ * blocks that are borrowed and in-cache. We have to assume that
+ * because we observed ERANGE being emitted from MapFileBlockC, this
+ * extent range is not valid on-disk. So we treat this as a
+ * mapping that needs to be zero-filled prior to reading.
+ *
+ * Note that under certain circumstances (such as non-contiguous
+ * userland VM mappings in the calling process), cluster_io
+ * may be forced to split a large I/O driven by hfs_vnop_write
+ * into multiple sub-I/Os that necessitate a RMW cycle. If this is
+ * the case here, then we have already removed the invalid range list
+ * mapping prior to getting to this blockmap call, so we should not
+ * search the invalid rangelist for this byte range.
+ */
+
+ bytesContAvail = fp->ff_size - ap->a_foffset;
+ /*
+ * Clip the contiguous available bytes to, at most, the allowable
+ * maximum or the amount requested.
+ */
+
+ if (bytesContAvail > ap->a_size) {
+ bytesContAvail = ap->a_size;
+ }
+
+ *ap->a_bpn = (daddr64_t) -1;
+ retval = 0;
+
+ goto exit;
+ }
+
+ /* MapFileC() found a valid extent in the filefork. Search the
+ * mapping information further for invalid file ranges
+ */
+ overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
+ ap->a_foffset + (off_t)bytesContAvail - 1,
+ &invalid_range);
+ if (overlaptype != RL_NOOVERLAP) {
+ switch(overlaptype) {
+ case RL_MATCHINGOVERLAP:
+ case RL_OVERLAPCONTAINSRANGE:
+ case RL_OVERLAPSTARTSBEFORE:
+ /* There's no valid block for this byte offset */
+ *ap->a_bpn = (daddr64_t)-1;
+ /* There's no point limiting the amount to be returned
+ * if the invalid range that was hit extends all the way
+ * to the EOF (i.e. there's no valid bytes between the
+ * end of this range and the file's EOF):
+ */
+ if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
+ ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
+ bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+ }
+ break;
+
+ case RL_OVERLAPISCONTAINED:
+ case RL_OVERLAPENDSAFTER:
+ /* The range of interest hits an invalid block before the end: */
+ if (invalid_range->rl_start == ap->a_foffset) {
+ /* There's actually no valid information to be had starting here: */
+ *ap->a_bpn = (daddr64_t)-1;
+ if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
+ ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
+ bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+ }
+ } else {
+ bytesContAvail = invalid_range->rl_start - ap->a_foffset;
+ }
+ break;
+
+ case RL_NOOVERLAP:
+ break;
+ } /* end switch */
+ if (bytesContAvail > ap->a_size)
+ bytesContAvail = ap->a_size;
+ }
+
+exit:
+ if (retval == 0) {
+ if (ap->a_run)
+ *ap->a_run = bytesContAvail;
+
+ if (ap->a_poff)
+ *(int *)ap->a_poff = 0;
+ }
+
+ if (tooklock)
+ hfs_unlock(cp);
+
+ return (MacToVFSError(retval));
+}
+
+/*
+ * prepare and issue the I/O
+ * buf_strategy knows how to deal
+ * with requests that require
+ * fragmented I/Os
+ */
+int
+hfs_vnop_strategy(struct vnop_strategy_args *ap)
+{
+ buf_t bp = ap->a_bp;
+ vnode_t vp = buf_vnode(bp);
+ int error = 0;
+
+ /* Mark buffer as containing static data if cnode flag set */
+ if (VTOC(vp)->c_flag & C_SSD_STATIC) {
+ buf_markstatic(bp);
+ }
+
+ /* Mark buffer as containing static data if cnode flag set */
+ if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) {
+ bufattr_markgreedymode((bufattr_t)(&bp->b_attr));
+ }
+
+#if CONFIG_PROTECT
+ cnode_t *cp = NULL;
+
+ if ((cp = cp_get_protected_cnode(vp)) != NULL) {
+ /*
+ * We rely upon the truncate lock to protect the
+ * CP cache key from getting tossed prior to our IO finishing here.
+ * Nearly all cluster io calls to manipulate file payload from HFS
+ * take the truncate lock before calling into the cluster
+ * layer to ensure the file size does not change, or that they
+ * have exclusive right to change the EOF of the file.
+ * That same guarantee protects us here since the code that
+ * deals with CP lock events must now take the truncate lock
+ * before doing anything.
+ *
+ * There is 1 exception here:
+ * 1) One exception should be the VM swapfile IO, because HFS will
+ * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the
+ * swapfile code only without holding the truncate lock. This is because
+ * individual swapfiles are maintained at fixed-length sizes by the VM code.
+ * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to
+ * create our own UPL and thus take the truncate lock before calling
+ * into the cluster layer. In that case, however, we are not concerned
+ * with the CP blob being wiped out in the middle of the IO
+ * because there isn't anything to toss; the VM swapfile key stays
+ * in-core as long as the file is open.
+ *
+ * NB:
+ * For filesystem resize, we may not have access to the underlying
+ * file's cache key for whatever reason (device may be locked). However,
+ * we do not need it since we are going to use the temporary HFS-wide resize key
+ * which is generated once we start relocating file content. If this file's I/O
+ * should be done using the resize key, it will have been supplied already, so
+ * do not attach the file's cp blob to the buffer.
+ */
+ if ((cp->c_cpentry->cp_flags & CP_RELOCATION_INFLIGHT) == 0) {
+ buf_setcpaddr(bp, cp->c_cpentry);
+ }
+ }
+#endif /* CONFIG_PROTECT */
+
+ error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
+
+ return error;
+}
+
+static int
+hfs_minorupdate(struct vnode *vp) {
+ struct cnode *cp = VTOC(vp);
+ cp->c_flag &= ~C_MODIFIED;
+ cp->c_touch_acctime = 0;
+ cp->c_touch_chgtime = 0;
+ cp->c_touch_modtime = 0;
+
+ return 0;
+}
+
+int
+do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context)
+{
+ register struct cnode *cp = VTOC(vp);
+ struct filefork *fp = VTOF(vp);
+ struct proc *p = vfs_context_proc(context);;
+ kauth_cred_t cred = vfs_context_ucred(context);
+ int retval;
+ off_t bytesToAdd;
+ off_t actualBytesAdded;
+ off_t filebytes;
+ u_int32_t fileblocks;
+ int blksize;
+ struct hfsmount *hfsmp;
+ int lockflags;
+ int skipupdate = (truncateflags & HFS_TRUNCATE_SKIPUPDATE);
+ int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
+
+ blksize = VTOVCB(vp)->blockSize;
+ fileblocks = fp->ff_blocks;
+ filebytes = (off_t)fileblocks * (off_t)blksize;
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
+ (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+
+ if (length < 0)
+ return (EINVAL);
+
+ /* This should only happen with a corrupt filesystem */
+ if ((off_t)fp->ff_size < 0)
+ return (EINVAL);
+
+ if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
+ return (EFBIG);
+
+ hfsmp = VTOHFS(vp);
+
+ retval = E_NONE;
+
+ /* Files that are changing size are not hot file candidates. */
+ if (hfsmp->hfc_stage == HFC_RECORDING) {
+ fp->ff_bytesread = 0;
+ }
+
+ /*
+ * We cannot just check if fp->ff_size == length (as an optimization)
+ * since there may be extra physical blocks that also need truncation.
+ */
+#if QUOTA
+ if ((retval = hfs_getinoquota(cp)))
+ return(retval);
+#endif /* QUOTA */
+
+ /*
+ * Lengthen the size of the file. We must ensure that the
+ * last byte of the file is allocated. Since the smallest
+ * value of ff_size is 0, length will be at least 1.
+ */
+ if (length > (off_t)fp->ff_size) {
+#if QUOTA
+ retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
+ cred, 0);
+ if (retval)
+ goto Err_Exit;
+#endif /* QUOTA */
+ /*
+ * If we don't have enough physical space then
+ * we need to extend the physical size.
+ */
+ if (length > filebytes) {
+ int eflags;
+ u_int32_t blockHint = 0;
+
+ /* All or nothing and don't round up to clumpsize. */
+ eflags = kEFAllMask | kEFNoClumpMask;
+
+ if (cred && suser(cred, NULL) != 0)
+ eflags |= kEFReserveMask; /* keep a reserve */
+
+ /*
+ * Allocate Journal and Quota files in metadata zone.
+ */
+ if (filebytes == 0 &&
+ hfsmp->hfs_flags & HFS_METADATA_ZONE &&
+ hfs_virtualmetafile(cp)) {
+ eflags |= kEFMetadataMask;
+ blockHint = hfsmp->hfs_metazone_start;
+ }
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
+
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ while ((length > filebytes) && (retval == E_NONE)) {
+ bytesToAdd = length - filebytes;
+ retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
+ (FCB*)fp,
+ bytesToAdd,
+ blockHint,
+ eflags,
+ &actualBytesAdded));
+
+ filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+ if (actualBytesAdded == 0 && retval == E_NONE) {
+ if (length > filebytes)
+ length = filebytes;
+ break;
+ }
+ } /* endwhile */
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
+ if (hfsmp->jnl) {
+ if (skipupdate) {
+ (void) hfs_minorupdate(vp);
+ }
+ else {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ }
+ }
+
+ hfs_end_transaction(hfsmp);
+
+ if (retval)
+ goto Err_Exit;
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+ (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+ }
+
+ if (!(flags & IO_NOZEROFILL)) {
+ if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
+ struct rl_entry *invalid_range;
+ off_t zero_limit;
+
+ zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
+ if (length < zero_limit) zero_limit = length;
+
+ if (length > (off_t)fp->ff_size) {
+ struct timeval tv;
+
+ /* Extending the file: time to fill out the current last page w. zeroes? */
+ if ((fp->ff_size & PAGE_MASK_64) &&
+ (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
+ fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
+
+ /* There's some valid data at the start of the (current) last page
+ of the file, so zero out the remainder of that page to ensure the
+ entire page contains valid data. Since there is no invalid range
+ possible past the (current) eof, there's no need to remove anything
+ from the invalid range list before calling cluster_write(): */
+ hfs_unlock(cp);
+ retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
+ fp->ff_size, (off_t)0,
+ (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
+ hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+ if (retval) goto Err_Exit;
+
+ /* Merely invalidate the remaining area, if necessary: */
+ if (length > zero_limit) {
+ microuptime(&tv);
+ rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
+ cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
+ }
+ } else {
+ /* The page containing the (current) eof is invalid: just add the
+ remainder of the page to the invalid list, along with the area
+ being newly allocated:
+ */
+ microuptime(&tv);
+ rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
+ cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
+ };
+ }
+ } else {
+ panic("hfs_truncate: invoked on non-UBC object?!");
+ };
+ }
+ if (suppress_times == 0) {
+ cp->c_touch_modtime = TRUE;
+ }
+ fp->ff_size = length;
+
+ } else { /* Shorten the size of the file */
+
+ if ((off_t)fp->ff_size > length) {
+ /* Any space previously marked as invalid is now irrelevant: */
+ rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
+ }
+
+ /*
+ * Account for any unmapped blocks. Note that the new
+ * file length can still end up with unmapped blocks.
+ */
+ if (fp->ff_unallocblocks > 0) {
+ u_int32_t finalblks;
+ u_int32_t loanedBlocks;
+
+ hfs_lock_mount(hfsmp);
+ loanedBlocks = fp->ff_unallocblocks;
+ cp->c_blocks -= loanedBlocks;
+ fp->ff_blocks -= loanedBlocks;
+ fp->ff_unallocblocks = 0;
+
+ hfsmp->loanedBlocks -= loanedBlocks;
+
+ finalblks = (length + blksize - 1) / blksize;
+ if (finalblks > fp->ff_blocks) {
+ /* calculate required unmapped blocks */
+ loanedBlocks = finalblks - fp->ff_blocks;
+ hfsmp->loanedBlocks += loanedBlocks;
+
+ fp->ff_unallocblocks = loanedBlocks;
+ cp->c_blocks += loanedBlocks;
+ fp->ff_blocks += loanedBlocks;
+ }
+ hfs_unlock_mount (hfsmp);
+ }
+
+ /*
+ * For a TBE process the deallocation of the file blocks is
+ * delayed until the file is closed. And hfs_close calls
+ * truncate with the IO_NDELAY flag set. So when IO_NDELAY
+ * isn't set, we make sure this isn't a TBE process.
+ */
+ if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
+#if QUOTA
+ off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
+#endif /* QUOTA */
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
+
+ if (fp->ff_unallocblocks == 0) {
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
+ FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ }
+ if (hfsmp->jnl) {
+ if (retval == 0) {
+ fp->ff_size = length;
+ }
+ if (skipupdate) {
+ (void) hfs_minorupdate(vp);
+ }
+ else {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ }
+ }
+ hfs_end_transaction(hfsmp);
+
+ filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+ if (retval)
+ goto Err_Exit;
+#if QUOTA
+ /* These are bytesreleased */
+ (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
+#endif /* QUOTA */
+ }
+ /*
+ * Only set update flag if the logical length changes & we aren't
+ * suppressing modtime updates.
+ */
+ if (((off_t)fp->ff_size != length) && (suppress_times == 0)) {
+ cp->c_touch_modtime = TRUE;
+ }
+ fp->ff_size = length;
+ }
+ if (cp->c_mode & (S_ISUID | S_ISGID)) {
+ if (!vfs_context_issuser(context)) {
+ cp->c_mode &= ~(S_ISUID | S_ISGID);
+ skipupdate = 0;
+ }
+ }
+ if (skipupdate) {
+ retval = hfs_minorupdate(vp);
+ }
+ else {
+ cp->c_touch_chgtime = TRUE; /* status changed */
+ if (suppress_times == 0) {
+ cp->c_touch_modtime = TRUE; /* file data was modified */
+
+ /*
+ * If we are not suppressing the modtime update, then
+ * update the gen count as well.
+ */
+ if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
+ hfs_incr_gencount(cp);
+ }
+ }
+
+ retval = hfs_update(vp, MNT_WAIT);
+ }
+ if (retval) {
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
+ -1, -1, -1, retval, 0);
+ }
+
+Err_Exit:
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
+ (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
+
+ return (retval);
+}
+
+/*
+ * Preparation which must be done prior to deleting the catalog record
+ * of a file or directory. In order to make the on-disk as safe as possible,
+ * we remove the catalog entry before releasing the bitmap blocks and the
+ * overflow extent records. However, some work must be done prior to deleting
+ * the catalog record.
+ *
+ * When calling this function, the cnode must exist both in memory and on-disk.
+ * If there are both resource fork and data fork vnodes, this function should
+ * be called on both.
+ */
+
+int
+hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
+
+ struct filefork *fp = VTOF(vp);
+ struct cnode *cp = VTOC(vp);
+#if QUOTA
+ int retval = 0;
+#endif /* QUOTA */
+
+ /* Cannot truncate an HFS directory! */
+ if (vnode_isdir(vp)) {
+ return (EISDIR);
+ }
+
+ /*
+ * See the comment below in hfs_truncate for why we need to call
+ * setsize here. Essentially we want to avoid pending IO if we
+ * already know that the blocks are going to be released here.
+ * This function is only called when totally removing all storage for a file, so
+ * we can take a shortcut and immediately setsize (0);
+ */
+ ubc_setsize(vp, 0);
+
+ /* This should only happen with a corrupt filesystem */
+ if ((off_t)fp->ff_size < 0)
+ return (EINVAL);
+
+ /*
+ * We cannot just check if fp->ff_size == length (as an optimization)
+ * since there may be extra physical blocks that also need truncation.
+ */
+#if QUOTA
+ if ((retval = hfs_getinoquota(cp))) {
+ return(retval);
+ }
+#endif /* QUOTA */
+
+ /* Wipe out any invalid ranges which have yet to be backed by disk */
+ rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
+
+ /*
+ * Account for any unmapped blocks. Since we're deleting the
+ * entire file, we don't have to worry about just shrinking
+ * to a smaller number of borrowed blocks.
+ */
+ if (fp->ff_unallocblocks > 0) {
+ u_int32_t loanedBlocks;
+
+ hfs_lock_mount (hfsmp);
+ loanedBlocks = fp->ff_unallocblocks;
+ cp->c_blocks -= loanedBlocks;
+ fp->ff_blocks -= loanedBlocks;
+ fp->ff_unallocblocks = 0;
+
+ hfsmp->loanedBlocks -= loanedBlocks;
+
+ hfs_unlock_mount (hfsmp);
+ }
+
+ return 0;
+}
+
+
+/*
+ * Special wrapper around calling TruncateFileC. This function is useable
+ * even when the catalog record does not exist any longer, making it ideal
+ * for use when deleting a file. The simplification here is that we know
+ * that we are releasing all blocks.
+ *
+ * Note that this function may be called when there is no vnode backing
+ * the file fork in question. We may call this from hfs_vnop_inactive
+ * to clear out resource fork data (and may not want to clear out the data
+ * fork yet). As a result, we pointer-check both sets of inputs before
+ * doing anything with them.
+ *
+ * The caller is responsible for saving off a copy of the filefork(s)
+ * embedded within the cnode prior to calling this function. The pointers
+ * supplied as arguments must be valid even if the cnode is no longer valid.
+ */
+
+int
+hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
+ struct filefork *rsrcfork, u_int32_t fileid) {
+
+ off_t filebytes;
+ u_int32_t fileblocks;
+ int blksize = 0;
+ int error = 0;
+ int lockflags;
+
+ blksize = hfsmp->blockSize;
+
+ /* Data Fork */
+ if ((datafork != NULL) && (datafork->ff_blocks > 0)) {
+ fileblocks = datafork->ff_blocks;
+ filebytes = (off_t)fileblocks * (off_t)blksize;
+
+ /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
+
+ while (filebytes > 0) {
+ if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(datafork)) {
+ filebytes -= HFS_BIGFILE_SIZE;
+ } else {
+ filebytes = 0;
+ }
+
+ /* Start a transaction, and wipe out as many blocks as we can in this iteration */
+ if (hfs_start_transaction(hfsmp) != 0) {
+ error = EINVAL;
+ break;
+ }
+
+ if (datafork->ff_unallocblocks == 0) {
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(datafork))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ }
+ if (error == 0) {
+ datafork->ff_size = filebytes;
+ }
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+
+ /* Finish the transaction and start over if necessary */
+ hfs_end_transaction(hfsmp);
+
+ if (error) {
+ break;
+ }
+ }
+ }
+
+ /* Resource fork */
+ if (error == 0 && (rsrcfork != NULL) && rsrcfork->ff_blocks > 0) {
+ fileblocks = rsrcfork->ff_blocks;
+ filebytes = (off_t)fileblocks * (off_t)blksize;
+
+ /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
+
+ while (filebytes > 0) {
+ if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(rsrcfork)) {
+ filebytes -= HFS_BIGFILE_SIZE;
+ } else {
+ filebytes = 0;
+ }
+
+ /* Start a transaction, and wipe out as many blocks as we can in this iteration */
+ if (hfs_start_transaction(hfsmp) != 0) {
+ error = EINVAL;
+ break;
+ }
+
+ if (rsrcfork->ff_unallocblocks == 0) {
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(rsrcfork))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ }
+ if (error == 0) {
+ rsrcfork->ff_size = filebytes;
+ }
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+
+ /* Finish the transaction and start over if necessary */
+ hfs_end_transaction(hfsmp);
+
+ if (error) {
+ break;
+ }
+ }
+ }
+
+ return error;
+}
+
+
+/*
+ * Truncate a cnode to at most length size, freeing (or adding) the
+ * disk blocks.
+ */
+int
+hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
+ int truncateflags, vfs_context_t context)
+{
+ struct filefork *fp = VTOF(vp);
+ off_t filebytes;
+ u_int32_t fileblocks;
+ int blksize, error = 0;
+ struct cnode *cp = VTOC(vp);
+
+ /* Cannot truncate an HFS directory! */
+ if (vnode_isdir(vp)) {
+ return (EISDIR);
+ }
+ /* A swap file cannot change size. */
+ if (vnode_isswap(vp) && (length != 0)) {
+ return (EPERM);
+ }
+
+ blksize = VTOVCB(vp)->blockSize;
+ fileblocks = fp->ff_blocks;
+ filebytes = (off_t)fileblocks * (off_t)blksize;
+
+ //
+ // Have to do this here so that we don't wind up with
+ // i/o pending for blocks that are about to be released
+ // if we truncate the file.
+ //
+ // If skipsetsize is set, then the caller is responsible
+ // for the ubc_setsize.
+ //
+ // Even if skipsetsize is set, if the length is zero we
+ // want to call ubc_setsize() because as of SnowLeopard
+ // it will no longer cause any page-ins and it will drop
+ // any dirty pages so that we don't do any i/o that we
+ // don't have to. This also prevents a race where i/o
+ // for truncated blocks may overwrite later data if the
+ // blocks get reallocated to a different file.
+ //
+ if (!skipsetsize || length == 0)
+ ubc_setsize(vp, length);
+
+ // have to loop truncating or growing files that are
+ // really big because otherwise transactions can get
+ // enormous and consume too many kernel resources.
+
+ if (length < filebytes) {
+ while (filebytes > length) {
+ if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
+ filebytes -= HFS_BIGFILE_SIZE;
+ } else {
+ filebytes = length;
+ }
+ cp->c_flag |= C_FORCEUPDATE;
+ error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
+ if (error)
+ break;
+ }
+ } else if (length > filebytes) {
+ while (filebytes < length) {
+ if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
+ filebytes += HFS_BIGFILE_SIZE;
+ } else {
+ filebytes = length;
+ }
+ cp->c_flag |= C_FORCEUPDATE;
+ error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
+ if (error)
+ break;
+ }
+ } else /* Same logical size */ {
+
+ error = do_hfs_truncate(vp, length, flags, truncateflags, context);
+ }
+ /* Files that are changing size are not hot file candidates. */
+ if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+ fp->ff_bytesread = 0;
+ }
+
+ return (error);
+}
+
+
+
+/*
+ * Preallocate file storage space.
+ */
+int
+hfs_vnop_allocate(struct vnop_allocate_args /* {
+ vnode_t a_vp;
+ off_t a_length;
+ u_int32_t a_flags;
+ off_t *a_bytesallocated;
+ off_t a_offset;
+ vfs_context_t a_context;
+ } */ *ap)
+{
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp;
+ struct filefork *fp;
+ ExtendedVCB *vcb;
+ off_t length = ap->a_length;
+ off_t startingPEOF;
+ off_t moreBytesRequested;
+ off_t actualBytesAdded;
+ off_t filebytes;
+ u_int32_t fileblocks;
+ int retval, retval2;
+ u_int32_t blockHint;
+ u_int32_t extendFlags; /* For call to ExtendFileC */
+ struct hfsmount *hfsmp;
+ kauth_cred_t cred = vfs_context_ucred(ap->a_context);
+ int lockflags;
+ time_t orig_ctime;
+
+ *(ap->a_bytesallocated) = 0;
+
+ if (!vnode_isreg(vp))
+ return (EISDIR);
+ if (length < (off_t)0)
+ return (EINVAL);
+
+ cp = VTOC(vp);
+
+ orig_ctime = VTOC(vp)->c_ctime;
+
+ check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
+
+ hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+ if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+ goto Err_Exit;
+ }
+
+ fp = VTOF(vp);
+ hfsmp = VTOHFS(vp);
+ vcb = VTOVCB(vp);
+
+ fileblocks = fp->ff_blocks;
+ filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
+
+ if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
+
+ /* Fill in the flags word for the call to Extend the file */
+
+ extendFlags = kEFNoClumpMask;
+ if (ap->a_flags & ALLOCATECONTIG)
+ extendFlags |= kEFContigMask;
+ if (ap->a_flags & ALLOCATEALL)
+ extendFlags |= kEFAllMask;
+ if (cred && suser(cred, NULL) != 0)
+ extendFlags |= kEFReserveMask;
+ if (hfs_virtualmetafile(cp))
+ extendFlags |= kEFMetadataMask;
+
+ retval = E_NONE;
+ blockHint = 0;
+ startingPEOF = filebytes;
+
+ if (ap->a_flags & ALLOCATEFROMPEOF)
+ length += filebytes;
+ else if (ap->a_flags & ALLOCATEFROMVOL)
+ blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
+
+ /* If no changes are necesary, then we're done */
+ if (filebytes == length)
+ goto Std_Exit;
+
+ /*
+ * Lengthen the size of the file. We must ensure that the
+ * last byte of the file is allocated. Since the smallest
+ * value of filebytes is 0, length will be at least 1.
+ */
+ if (length > filebytes) {
+ off_t total_bytes_added = 0, orig_request_size;
+
+ orig_request_size = moreBytesRequested = length - filebytes;
+
+#if QUOTA
+ retval = hfs_chkdq(cp,
+ (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
+ cred, 0);
+ if (retval)
+ goto Err_Exit;
+
+#endif /* QUOTA */
+ /*
+ * Metadata zone checks.
+ */
+ if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
+ /*
+ * Allocate Journal and Quota files in metadata zone.
+ */
+ if (hfs_virtualmetafile(cp)) {
+ blockHint = hfsmp->hfs_metazone_start;
+ } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
+ (blockHint <= hfsmp->hfs_metazone_end)) {
+ /*
+ * Move blockHint outside metadata zone.
+ */
+ blockHint = hfsmp->hfs_metazone_end + 1;
+ }
+ }
+
+
+ while ((length > filebytes) && (retval == E_NONE)) {
+ off_t bytesRequested;
+
+ if (hfs_start_transaction(hfsmp) != 0) {
+ retval = EINVAL;
+ goto Err_Exit;
+ }
+
+ /* Protect extents b-tree and allocation bitmap */
+ lockflags = SFL_BITMAP;
+ if (overflow_extents(fp))
+ lockflags |= SFL_EXTENTS;
+ lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+ if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
+ bytesRequested = HFS_BIGFILE_SIZE;
+ } else {
+ bytesRequested = moreBytesRequested;
+ }
+
+ if (extendFlags & kEFContigMask) {
+ // if we're on a sparse device, this will force it to do a
+ // full scan to find the space needed.
+ hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
+ }
+
+ retval = MacToVFSError(ExtendFileC(vcb,
+ (FCB*)fp,
+ bytesRequested,
+ blockHint,
+ extendFlags,
+ &actualBytesAdded));
+
+ if (retval == E_NONE) {
+ *(ap->a_bytesallocated) += actualBytesAdded;
+ total_bytes_added += actualBytesAdded;
+ moreBytesRequested -= actualBytesAdded;
+ if (blockHint != 0) {
+ blockHint += actualBytesAdded / vcb->blockSize;
+ }
+ }
+ filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
+ if (hfsmp->jnl) {
+ (void) hfs_update(vp, TRUE);
+ (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+ }
+
+ hfs_end_transaction(hfsmp);
+ }
+
+
+ /*
+ * if we get an error and no changes were made then exit
+ * otherwise we must do the hfs_update to reflect the changes
+ */
+ if (retval && (startingPEOF == filebytes))
+ goto Err_Exit;
+
+ /*
+ * Adjust actualBytesAdded to be allocation block aligned, not
+ * clump size aligned.
+ * NOTE: So what we are reporting does not affect reality
+ * until the file is closed, when we truncate the file to allocation
+ * block size.
+ */
+ if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
+ *(ap->a_bytesallocated) =
+ roundup(orig_request_size, (off_t)vcb->blockSize);
+
+ } else { /* Shorten the size of the file */
+
+ if (fp->ff_size > length) {
+ /*
+ * Any buffers that are past the truncation point need to be
+ * invalidated (to maintain buffer cache consistency).
+ */
+ }
+
+ retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
+ filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+
+ /*
+ * if we get an error and no changes were made then exit
+ * otherwise we must do the hfs_update to reflect the changes
+ */
+ if (retval && (startingPEOF == filebytes)) goto Err_Exit;
+#if QUOTA
+ /* These are bytesreleased */
+ (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
+#endif /* QUOTA */
+
+ if (fp->ff_size > filebytes) {
+ fp->ff_size = filebytes;
+
+ hfs_unlock(cp);
+ ubc_setsize(vp, fp->ff_size);
+ hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+ }
+ }
+
+Std_Exit:
+ cp->c_touch_chgtime = TRUE;
+ cp->c_touch_modtime = TRUE;
+ retval2 = hfs_update(vp, MNT_WAIT);
+
+ if (retval == 0)
+ retval = retval2;
+Err_Exit:
+ hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+ hfs_unlock(cp);
+ return (retval);
+}
+
+
+/*
+ * Pagein for HFS filesystem
+ */
+int
+hfs_vnop_pagein(struct vnop_pagein_args *ap)
+/*
+ struct vnop_pagein_args {
+ vnode_t a_vp,