X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/3e170ce000f1506b7b5d2c5c7faec85ceabb573d..743345f9a4b36f7e2f9ba37691e70c50baecb56e:/bsd/vfs/vfs_syscalls.c diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c index a949a717d..04b382fb9 100644 --- a/bsd/vfs/vfs_syscalls.c +++ b/bsd/vfs/vfs_syscalls.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 1995-2015 Apple Inc. All rights reserved. + * Copyright (c) 1995-2016 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -100,6 +100,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -113,24 +116,29 @@ #include #include +#include #include #include #include +#if ROUTEFS +#include +#endif /* ROUTEFS */ + #if CONFIG_MACF #include #include #endif -#if CONFIG_FSE +#if CONFIG_FSE #define GET_PATH(x) \ - (x) = get_pathbuff(); + (x) = get_pathbuff(); #define RELEASE_PATH(x) \ release_pathbuff(x); -#else +#else #define GET_PATH(x) \ - MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK); #define RELEASE_PATH(x) \ FREE_ZONE((x), MAXPATHLEN, M_NAMEI); #endif /* CONFIG_FSE */ @@ -152,8 +160,8 @@ static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, i static int sync_callback(mount_t, void *); static void sync_thread(void *, __unused wait_result_t); static int sync_async(int); -static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, - user_addr_t bufp, int *sizep, boolean_t is_64_bit, +static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, + user_addr_t bufp, int *sizep, boolean_t is_64_bit, boolean_t partial_copy); static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp); @@ -217,21 +225,14 @@ unsigned int vfs_nummntops=0; extern const struct fileops vnops; #if CONFIG_APPLEDOUBLE -extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *); +extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *); #endif /* CONFIG_APPLEDOUBLE */ -typedef uint32_t vfs_rename_flags_t; -#if CONFIG_SECLUDED_RENAME -enum { - VFS_SECLUDE_RENAME = 0x00000001 -}; -#endif - /* * Virtual File System System Calls */ -#if NFSCLIENT || DEVFS +#if NFSCLIENT || DEVFS || ROUTEFS /* * Private in-kernel mounting spi (NFS only, not exported) */ @@ -251,7 +252,7 @@ kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path, boolean_t did_namei; int error; - NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT, + NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); /* @@ -303,7 +304,7 @@ mount(proc_t p, struct mount_args *uap, __unused int32_t *retval) } void -vfs_notify_mount(vnode_t pdvp) +vfs_notify_mount(vnode_t pdvp) { vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL); lock_vnode_and_post(pdvp, NOTE_WRITE); @@ -316,14 +317,14 @@ vfs_notify_mount(vnode_t pdvp) * * Parameters: p Process requesting the mount * uap User argument descriptor (see below) - * retval (ignored) + * retval (ignored) * * Indirect: uap->type Filesystem type * uap->path Path to mount - * uap->data Mount arguments - * uap->mac_p MAC info + * uap->data Mount arguments + * uap->mac_p MAC info * uap->flags Mount flags - * + * * * Returns: 0 Success * !0 Not success @@ -343,7 +344,7 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3 char *labelstr = NULL; int flags = uap->flags; int error; -#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF +#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF boolean_t is_64bit = IS_64BIT_PROCESS(p); #else #pragma unused(p) @@ -358,7 +359,7 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3 /* * Get the vnode to be covered */ - NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT, + NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT, UIO_USERSPACE, uap->path, ctx); error = namei(&nd); if (error) { @@ -367,7 +368,7 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3 need_nameidone = 1; vp = nd.ni_vp; pvp = nd.ni_dvp; - + #ifdef CONFIG_IMGSRC_ACCESS /* Mounting image source cannot be batched with other operations */ if (flags == MNT_IMGSRC_BY_INDEX) { @@ -414,36 +415,44 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3 AUDIT_ARG(fflags, flags); +#if SECURE_KERNEL + if (flags & MNT_UNION) { + /* No union mounts on release kernels */ + error = EPERM; + goto out; + } +#endif + if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) { if (!(flags & MNT_UNION)) { flags |= MNT_UPDATE; } else { - /* + /* * For a union mount on '/', treat it as fresh - * mount instead of update. - * Otherwise, union mouting on '/' used to panic the - * system before, since mnt_vnodecovered was found to - * be NULL for '/' which is required for unionlookup + * mount instead of update. + * Otherwise, union mouting on '/' used to panic the + * system before, since mnt_vnodecovered was found to + * be NULL for '/' which is required for unionlookup * after it gets ENOENT on union mount. */ flags = (flags & ~(MNT_UPDATE)); } -#ifdef SECURE_KERNEL +#if SECURE_KERNEL if ((flags & MNT_RDONLY) == 0) { /* Release kernels are not allowed to mount "/" as rw */ error = EPERM; - goto out; + goto out; } #endif /* * See 7392553 for more details on why this check exists. * Suffice to say: If this check is ON and something tries * to mount the rootFS RW, we'll turn off the codesign - * bitmap optimization. - */ + * bitmap optimization. + */ #if CHECK_CS_VALIDATION_BITMAP if ((flags & MNT_RDONLY) == 0 ) { root_fs_upgrade_try = TRUE; @@ -476,7 +485,7 @@ out: /* * common mount implementation (final stage of mounting) - + * Arguments: * fstypename file system type (ie it's vfs name) * pvp parent of covered vnode @@ -547,13 +556,13 @@ mount_common(char *fstypename, vnode_t pvp, vnode_t vp, * If content protection is enabled, update mounts are not * allowed to turn it off. */ - if ((mp->mnt_flag & MNT_CPROTECT) && + if ((mp->mnt_flag & MNT_CPROTECT) && ((flags & MNT_CPROTECT) == 0)) { error = EINVAL; goto out1; } -#ifdef CONFIG_IMGSRC_ACCESS +#ifdef CONFIG_IMGSRC_ACCESS /* Can't downgrade the backer of the root FS */ if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) && (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) { @@ -673,7 +682,7 @@ mount_common(char *fstypename, vnode_t pvp, vnode_t vp, /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */ vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE); -#if NFSCLIENT || DEVFS +#if NFSCLIENT || DEVFS || ROUTEFS if (kernelmount) mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT; if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0) @@ -718,15 +727,16 @@ update: /* * Process device path for local file systems if requested */ - if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) { + if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS && + !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) { if (vfs_context_is64bit(ctx)) { if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) ) - goto out1; + goto out1; fsmountargs += sizeof(devpath); } else { user32_addr_t tmp; if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) ) - goto out1; + goto out1; /* munge into LP64 addr */ devpath = CAST_USER_ADDR_T(tmp); fsmountargs += sizeof(tmp); @@ -778,7 +788,7 @@ update: */ if ( (error = vfs_mountedon(devvp)) ) goto out3; - + if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) { error = EBUSY; goto out3; @@ -816,7 +826,7 @@ update: vnode_getalways(device_vnode); if (suser(vfs_context_ucred(ctx), NULL) && - (error = vnode_authorize(device_vnode, NULL, + (error = vnode_authorize(device_vnode, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) { vnode_put(device_vnode); @@ -854,7 +864,12 @@ update: /* * Mount the filesystem. */ - error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx); + if (internal_flags & KERNEL_MOUNT_SNAPSHOT) { + error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT, + (caddr_t)fsmountargs, 0, ctx); + } else { + error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx); + } if (flags & MNT_UPDATE) { if (mp->mnt_kern_flag & MNTK_WANTRDWR) @@ -924,8 +939,8 @@ update: /* Unmount the filesystem as cdir/rdirs cannot be updated */ goto out4; } - /* - * there is no cleanup code here so I have made it void + /* + * there is no cleanup code here so I have made it void * we need to revisit this */ (void)VFS_START(mp, 0, ctx); @@ -946,7 +961,7 @@ update: VFSATTR_INIT(&vfsattr); VFSATTR_WANTED(&vfsattr, f_capabilities); if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 && - vfs_getattr(mp, &vfsattr, ctx) == 0 && + vfs_getattr(mp, &vfsattr, ctx) == 0 && VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) { if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) && (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) { @@ -966,6 +981,11 @@ update: /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */ mp->mnt_kern_flag |= MNTK_PATH_FROM_ID; } + + if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) && + (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) { + mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS; + } } if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) { mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS; @@ -987,7 +1007,7 @@ update: * defaults will have been set, so no reason to bail or care */ vfs_init_io_attributes(device_vnode, mp); - } + } /* Now that mount is setup, notify the listeners */ vfs_notify_mount(pvp); @@ -996,7 +1016,7 @@ update: } else { /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */ if (mp->mnt_vnodelist.tqh_first != NULL) { - panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.", + panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.", mp->mnt_vtable->vfc_name, error); } @@ -1013,7 +1033,7 @@ update: } lck_rw_done(&mp->mnt_rwlock); is_rwlock_locked = FALSE; - + /* * if we get here, we have a mount structure that needs to be freed, * but since the coveredvp hasn't yet been updated to point at it, @@ -1038,8 +1058,8 @@ exit: /* Error condition exits */ out4: (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx); - - /* + + /* * If the mount has been placed on the covered vp, * it may have been discovered by now, so we have * to treat this just like an unmount @@ -1076,7 +1096,7 @@ out1: if (is_rwlock_locked == TRUE) { lck_rw_done(&mp->mnt_rwlock); } - + if (mntalloc) { if (mp->mnt_crossref) mount_dropcrossref(mp, vp, 0); @@ -1097,7 +1117,7 @@ out1: return(error); } -/* +/* * Flush in-core data, check for competing mount attempts, * and set VMOUNT */ @@ -1119,7 +1139,7 @@ prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, cons VATTR_WANTED(&va, va_uid); if ((error = vnode_getattr(vp, &va, ctx)) || (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) && - (!vfs_context_issuser(ctx)))) { + (!vfs_context_issuser(ctx)))) { error = EPERM; goto out; } @@ -1162,7 +1182,7 @@ out: #define IMGSRC_DEBUG(args...) printf(args) #else #define IMGSRC_DEBUG(args...) do { } while(0) -#endif +#endif static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx) @@ -1321,7 +1341,7 @@ mount_begin_update(mount_t mp, vfs_context_t ctx, int flags) * permitted to update it. */ if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) && - (!vfs_context_issuser(ctx))) { + (!vfs_context_issuser(ctx))) { error = EPERM; goto out; } @@ -1340,7 +1360,7 @@ out: return error; } -static void +static void mount_end_update(mount_t mp) { lck_rw_done(&mp->mnt_rwlock); @@ -1365,8 +1385,8 @@ get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp) } static int -relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, - const char *fsname, vfs_context_t ctx, +relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, + const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index) { int error; @@ -1471,7 +1491,7 @@ relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, goto out0; } - /* + /* * It can only be moved once. Flag is set under the rwlock, * so we're now safe to proceed. */ @@ -1479,8 +1499,8 @@ relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, IMGSRC_DEBUG("Already moved [2]\n"); goto out1; } - - + + IMGSRC_DEBUG("Preparing coveredvp.\n"); /* Mark covered vnode as mount in progress, authorize placing mount on top */ @@ -1489,7 +1509,7 @@ relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error); goto out1; } - + IMGSRC_DEBUG("Covered vp OK.\n"); /* Sanity check the name caller has provided */ @@ -1515,9 +1535,9 @@ relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, } } - /* + /* * Place mp on top of vnode, ref the vnode, call checkdirs(), - * and increment the name cache's mount generation + * and increment the name cache's mount generation */ IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n"); @@ -1561,9 +1581,9 @@ out3: mount_unlock(mp); out2: - /* + /* * Placing the mp on the vnode clears VMOUNT, - * so cleanup is different after that point + * so cleanup is different after that point */ if (placed) { /* Rele the vp, clear VMOUNT and v_mountedhere */ @@ -1598,7 +1618,7 @@ enablequotas(struct mount *mp, vfs_context_t ctx) if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) { return; } - /* + /* * Enable filesystem disk quotas if necessary. * We ignore errors as this should not interfere with final mount */ @@ -1619,7 +1639,7 @@ enablequotas(struct mount *mp, vfs_context_t ctx) static int -checkdirs_callback(proc_t p, void * arg) +checkdirs_callback(proc_t p, void * arg) { struct cdirargs * cdrp = (struct cdirargs * )arg; vnode_t olddp = cdrp->olddp; @@ -1728,7 +1748,7 @@ unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval) struct nameidata nd; vfs_context_t ctx = vfs_context_current(); - NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1, + NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); error = namei(&nd); if (error) @@ -1758,7 +1778,7 @@ unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval) } int -vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx) +vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx) { mount_t mp; @@ -1794,7 +1814,7 @@ safedounmount(struct mount *mp, int flags, vfs_context_t ctx) } /* - * Skip authorization if the mount is tagged as permissive and + * Skip authorization if the mount is tagged as permissive and * this is not a forced-unmount attempt. */ if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) { @@ -1880,7 +1900,7 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx) */ mp->mnt_realrootvp = NULLVP; mount_unlock(mp); - + if (forcedunmount && (flags & MNT_LNOSUB) == 0) { /* * Force unmount any mounts in this filesystem. @@ -1929,7 +1949,7 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx) #if CONFIG_TRIGGERS vfs_nested_trigger_unmounts(mp, flags, ctx); did_vflush = 1; -#endif +#endif if (forcedunmount) lflags |= FORCECLOSE; error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags); @@ -2016,7 +2036,7 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx) out: if (mp->mnt_lflag & MNT_LWAIT) { mp->mnt_lflag &= ~MNT_LWAIT; - needwakeup = 1; + needwakeup = 1; } #if CONFIG_TRIGGERS @@ -2026,9 +2046,9 @@ out: OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag); } - /* + /* * Callback and context are set together under the mount lock, and - * never cleared, so we're safe to examine them here, drop the lock, + * never cleared, so we're safe to examine them here, drop the lock, * and call out. */ if (mp->mnt_triggercallback != NULL) { @@ -2041,7 +2061,7 @@ out: } else { mount_unlock(mp); } -#else +#else mount_unlock(mp); #endif /* CONFIG_TRIGGERS */ @@ -2129,7 +2149,7 @@ dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx) /* * Fill the array with submount fsids. * Since mounts are always added to the tail of the mount list, the - * list is always in mount order. + * list is always in mount order. * For each mount check if the mounted-on vnode belongs to a * mount that's already added to our array of mounts to be unmounted. */ @@ -2172,7 +2192,7 @@ mount_dropcrossref(mount_t mp, vnode_t dp, int need_put) panic("mount cross refs -ve"); if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) { - + if (need_put) vnode_put_locked(dp); vnode_unlock(dp); @@ -2200,7 +2220,7 @@ int syncprt = 0; int print_vmpage_stat=0; int sync_timeout = 60; // Sync time limit (sec) -static int +static int sync_callback(mount_t mp, __unused void *arg) { if ((mp->mnt_flag & MNT_RDONLY) == 0) { @@ -2413,18 +2433,24 @@ statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval) vfs_context_t ctx = vfs_context_current(); vnode_t vp; - NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1, + NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); error = namei(&nd); - if (error) + if (error != 0) return (error); vp = nd.ni_vp; mp = vp->v_mount; sp = &mp->mnt_vfsstat; nameidone(&nd); +#if CONFIG_MACF + error = mac_mount_check_stat(ctx, mp); + if (error != 0) + return (error); +#endif + error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT); - if (error != 0) { + if (error != 0) { vnode_put(vp); return (error); } @@ -2464,8 +2490,15 @@ fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval) error = EBADF; goto out; } + +#if CONFIG_MACF + error = mac_mount_check_stat(vfs_context_current(), mp); + if (error != 0) + goto out; +#endif + sp = &mp->mnt_vfsstat; - if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) { + if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) { goto out; } @@ -2478,15 +2511,15 @@ out: return (error); } -/* - * Common routine to handle copying of statfs64 data to user space +/* + * Common routine to handle copying of statfs64 data to user space */ -static int +static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp) { int error; struct statfs64 sfs; - + bzero(&sfs, sizeof(sfs)); sfs.f_bsize = sfsp->f_bsize; @@ -2514,8 +2547,8 @@ statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp) return(error); } -/* - * Get file system statistics in 64-bit mode +/* + * Get file system statistics in 64-bit mode */ int statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval) @@ -2527,18 +2560,24 @@ statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *r vfs_context_t ctxp = vfs_context_current(); vnode_t vp; - NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1, + NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctxp); error = namei(&nd); - if (error) + if (error != 0) return (error); vp = nd.ni_vp; mp = vp->v_mount; sp = &mp->mnt_vfsstat; nameidone(&nd); +#if CONFIG_MACF + error = mac_mount_check_stat(ctxp, mp); + if (error != 0) + return (error); +#endif + error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT); - if (error != 0) { + if (error != 0) { vnode_put(vp); return (error); } @@ -2549,8 +2588,8 @@ statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *r return (error); } -/* - * Get file system statistics in 64-bit mode +/* + * Get file system statistics in 64-bit mode */ int fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval) @@ -2578,6 +2617,13 @@ fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t error = EBADF; goto out; } + +#if CONFIG_MACF + error = mac_mount_check_stat(vfs_context_current(), mp); + if (error != 0) + goto out; +#endif + sp = &mp->mnt_vfsstat; if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) { goto out; @@ -2605,13 +2651,20 @@ struct getfsstat_struct { static int getfsstat_callback(mount_t mp, void * arg) { - + struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg; struct vfsstatfs *sp; int error, my_size; vfs_context_t ctx = vfs_context_current(); if (fstp->sfsp && fstp->count < fstp->maxcount) { +#if CONFIG_MACF + error = mac_mount_check_stat(ctx, mp); + if (error != 0) { + fstp->error = error; + return(VFS_RETURNED_DONE); + } +#endif sp = &mp->mnt_vfsstat; /* * If MNT_NOWAIT is specified, do not refresh the @@ -2671,14 +2724,14 @@ getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval) * * Parameters: p (ignored) * uap User argument descriptor (see below) - * retval Count of file system statistics (N stats) + * retval Count of file system statistics (N stats) * * Indirect: uap->bufsize Buffer size * uap->macsize MAC info size * uap->buf Buffer where information will be returned * uap->mac MAC info * uap->flags File system flags - * + * * * Returns: 0 Success * !0 Not success @@ -2753,7 +2806,7 @@ __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval fst.error = 0; fst.maxcount = maxcount; - + vfs_iterate(0, getfsstat_callback, &fst); if (mp) @@ -2779,6 +2832,13 @@ getfsstat64_callback(mount_t mp, void * arg) int error; if (fstp->sfsp && fstp->count < fstp->maxcount) { +#if CONFIG_MACF + error = mac_mount_check_stat(vfs_context_current(), mp); + if (error != 0) { + fstp->error = error; + return(VFS_RETURNED_DONE); + } +#endif sp = &mp->mnt_vfsstat; /* * If MNT_NOWAIT is specified, do not refresh the fsstat @@ -2855,7 +2915,7 @@ getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval) * by this call needs a vnode_put * */ -static int +int vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp) { int error; @@ -3067,7 +3127,7 @@ common_chdir(proc_t p, struct chdir_args *uap, int per_thread) vnode_t tvp; vfs_context_t ctx = vfs_context_current(); - NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1, + NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); error = change_dir(&nd, ctx); if (error) @@ -3169,7 +3229,7 @@ chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval) if ((error = suser(kauth_cred_get(), &p->p_acflag))) return (error); - NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1, + NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); error = change_dir(&nd, ctx); if (error) @@ -3396,6 +3456,81 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, if (flags & O_CLOFORK) *fdflags(p, indx) |= UF_FORKCLOSE; procfdtbl_releasefd(p, indx, NULL); + +#if CONFIG_SECLUDED_MEMORY + if (secluded_for_filecache && + FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE && + vnode_vtype(vp) == VREG) { + memory_object_control_t moc; + + moc = ubc_getobject(vp, UBC_FLAGS_NONE); + + if (moc == MEMORY_OBJECT_CONTROL_NULL) { + /* nothing to do... */ + } else if (fp->f_fglob->fg_flag & FWRITE) { + /* writable -> no longer eligible for secluded pages */ + memory_object_mark_eligible_for_secluded(moc, + FALSE); + } else if (secluded_for_filecache == 1) { + char pathname[32] = { 0, }; + size_t copied; + /* XXX FBDP: better way to detect /Applications/ ? */ + if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) { + copyinstr(ndp->ni_dirp, + pathname, + sizeof (pathname), + &copied); + } else { + copystr(CAST_DOWN(void *, ndp->ni_dirp), + pathname, + sizeof (pathname), + &copied); + } + pathname[sizeof (pathname) - 1] = '\0'; + if (strncmp(pathname, + "/Applications/", + strlen("/Applications/")) == 0 && + strncmp(pathname, + "/Applications/Camera.app/", + strlen("/Applications/Camera.app/")) != 0) { + /* + * not writable + * AND from "/Applications/" + * AND not from "/Applications/Camera.app/" + * ==> eligible for secluded + */ + memory_object_mark_eligible_for_secluded(moc, + TRUE); + } + } else if (secluded_for_filecache == 2) { +/* not implemented... */ + if (!strncmp(vp->v_name, + DYLD_SHARED_CACHE_NAME, + strlen(DYLD_SHARED_CACHE_NAME)) || + !strncmp(vp->v_name, + "dyld", + strlen(vp->v_name)) || + !strncmp(vp->v_name, + "launchd", + strlen(vp->v_name)) || + !strncmp(vp->v_name, + "Camera", + strlen(vp->v_name)) || + !strncmp(vp->v_name, + "mediaserverd", + strlen(vp->v_name))) { + /* + * This file matters when launching Camera: + * do not store its contents in the secluded + * pool that will be drained on Camera launch. + */ + memory_object_mark_eligible_for_secluded(moc, + FALSE); + } + } + } +#endif /* CONFIG_SECLUDED_MEMORY */ + fp_drop(p, indx, fp, 1); proc_fdunlock(p); @@ -3405,14 +3540,14 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, bad: context = *vfs_context_current(); context.vc_ucred = fp->f_fglob->fg_cred; - + if ((fp->f_fglob->fg_flag & FHASLOCK) && (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; - + (void)VNOP_ADVLOCK( vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL); } @@ -3534,9 +3669,9 @@ open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval) return ciferror; } -/* +/* * Go through the data-protected atomically controlled open (2) - * + * * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode) */ int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) { @@ -3544,7 +3679,7 @@ int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int class = uap->class; int dpflags = uap->dpflags; - /* + /* * Follow the same path as normal open(2) * Look up the item if it exists, and acquire the vnode. */ @@ -3553,7 +3688,7 @@ int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, struct nameidata nd; int cmode; int error; - + VATTR_INIT(&va); /* Mask off all but regular access permissions */ cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; @@ -3562,13 +3697,13 @@ int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current()); - /* - * Initialize the extra fields in vnode_attr to pass down our + /* + * Initialize the extra fields in vnode_attr to pass down our * extra fields. * 1. target cprotect class. - * 2. set a flag to mark it as requiring open-raw-encrypted semantics. - */ - if (flags & O_CREAT) { + * 2. set a flag to mark it as requiring open-raw-encrypted semantics. + */ + if (flags & O_CREAT) { /* lower level kernel code validates that the class is valid before applying it. */ if (class != PROTECTION_CLASS_DEFAULT) { /* @@ -3578,12 +3713,12 @@ int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, VATTR_SET(&va, va_dataprotect_class, class); } } - + if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) { if ( flags & (O_RDWR | O_WRONLY)) { /* Not allowed to write raw encrypted bytes */ - return EINVAL; - } + return EINVAL; + } if (uap->dpflags & O_DP_GETRAWENCRYPTED) { VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED); } @@ -3683,6 +3818,10 @@ openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval) int pathlen = 0; vfs_context_t ctx = vfs_context_current(); + if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) { + return (error); + } + if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) { return (error); } @@ -3754,7 +3893,7 @@ mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval) if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag))) return (error); - NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1, + NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); error = namei(&nd); if (error) @@ -3841,7 +3980,7 @@ mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap) int error; struct nameidata nd; - NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1, + NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE, upath, ctx); error = namei(&nd); if (error) @@ -3969,7 +4108,7 @@ safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *trunc len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1; if (len > MAXPATHLEN) { char *ptr; - + // the string got truncated! *truncated_path = 1; ptr = my_strrchr(path, '/'); @@ -3987,9 +4126,9 @@ safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *trunc if (ret != ENOSPC) { printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n", dvp, dvp->v_name ? dvp->v_name : "no-name", ret); - } + } *truncated_path = 1; - + do { if (mydvp->v_parent != NULL) { mydvp = mydvp->v_parent; @@ -4002,7 +4141,7 @@ safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *trunc len = 2; mydvp = NULL; } - + if (mydvp == NULL) { break; } @@ -4062,10 +4201,11 @@ linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2, * However, some file systems may have limited support. */ if (vp->v_type == VDIR) { - if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) { + if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) { error = EPERM; /* POSIX */ goto out; } + /* Linking to a directory requires ownership. */ if (!kauth_cred_issuser(vfs_context_ucred(ctx))) { struct vnode_attr dva; @@ -4113,7 +4253,7 @@ linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2, error = EXDEV; goto out2; } - + /* authorize creation of the target note */ if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) goto out2; @@ -4158,11 +4298,11 @@ linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2, link_name_len = MAXPATHLEN; if (vn_getpath(vp, link_to_path, &link_name_len) == 0) { /* - * Call out to allow 3rd party notification of rename. + * Call out to allow 3rd party notification of rename. * Ignore result of kauth_authorize_fileop call. */ - kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK, - (uintptr_t)link_to_path, + kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK, + (uintptr_t)link_to_path, (uintptr_t)target_path); } if (link_to_path != NULL) { @@ -4241,7 +4381,6 @@ symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd, int error; struct nameidata nd; vnode_t vp, dvp; - uint32_t dfflags; // Directory file flags size_t dummy=0; proc_t p; @@ -4270,15 +4409,6 @@ symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd, VATTR_SET(&va, va_type, VLNK); VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask); - /* - * Handle inheritance of restricted flag - */ - error = vnode_flags(dvp, &dfflags, ctx); - if (error) - goto skipit; - if (dfflags & SF_RESTRICTED) - VATTR_SET(&va, va_flags, SF_RESTRICTED); - #if CONFIG_MACF error = mac_vnode_check_create(ctx, dvp, &nd.ni_cnd, &va); @@ -4476,7 +4606,7 @@ lookup_continue: if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) { flags |= VNODE_REMOVE_NODELETEBUSY; } - + /* Skip any potential upcalls if told to. */ if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) { flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT; @@ -4580,13 +4710,13 @@ lookup_continue: } /* - * Call out to allow 3rd party notification of delete. + * Call out to allow 3rd party notification of delete. * Ignore result of kauth_authorize_fileop call. */ if (!error) { if (has_listeners) { - kauth_authorize_fileop(vfs_context_ucred(ctx), - KAUTH_FILEOP_DELETE, + kauth_authorize_fileop(vfs_context_ucred(ctx), + KAUTH_FILEOP_DELETE, (uintptr_t)vp, (uintptr_t)path); } @@ -4625,14 +4755,14 @@ out: RELEASE_PATH(path); #if NAMEDRSRCFORK - /* recycle the deleted rsrc fork vnode to force a reclaim, which + /* recycle the deleted rsrc fork vnode to force a reclaim, which * will cause its shadow file to go away if necessary. */ if (vp && (vnode_isnamedstream(vp)) && (vp->v_parent != NULLVP) && vnode_isshadow(vp)) { vnode_recycle(vp); - } + } #endif /* * nameidone has to happen before we vnode_put(dvp) @@ -4769,7 +4899,7 @@ lseek(proc_t p, struct lseek_args *uap, off_t *retval) } } - /* + /* * An lseek can affect whether data is "available to read." Use * hint of NOTE_NONE so no EVFILT_VNODE events fire */ @@ -4820,7 +4950,7 @@ access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx) /* take advantage of definition of uflags */ action = uflags >> 8; } - + #if CONFIG_MACF error = mac_vnode_check_access(ctx, vp, uflags); if (error) @@ -4843,8 +4973,8 @@ access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx) * access_extended: Check access permissions in bulk. * * Description: uap->entries Pointer to an array of accessx - * descriptor structs, plus one or - * more NULL terminated strings (see + * descriptor structs, plus one or + * more NULL terminated strings (see * "Notes" section below). * uap->size Size of the area pointed to by * uap->entries. @@ -4885,7 +5015,7 @@ access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx) * * since we must have at least one string, and the string must * be at least one character plus the NULL terminator in length. - * + * * XXX: Need to support the check-as uid argument */ int @@ -4977,6 +5107,12 @@ access_extended(__unused proc_t p, struct access_extended_args *uap, __unused in goto out; } + /* Also do not let ad_name_offset point to something beyond the size of the input */ + if (input[i].ad_name_offset >= uap->size) { + error = EINVAL; + goto out; + } + /* * An offset of 0 means use the previous descriptor's offset; * this is used to chain multiple requests for the same file @@ -5038,7 +5174,7 @@ access_extended(__unused proc_t p, struct access_extended_args *uap, __unused in vnode_put(dvp); dvp = NULL; } - + /* * Scan forward in the descriptor list to see if we * need the parent vnode. We will need it if we are @@ -5050,7 +5186,7 @@ access_extended(__unused proc_t p, struct access_extended_args *uap, __unused in for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++) if (input[j].ad_flags & _DELETE_OK) wantdelete = 1; - + niopts = FOLLOW | AUDITVNPATH1; /* need parent for vnode_authorize for deletion test */ @@ -5095,7 +5231,7 @@ access_extended(__unused proc_t p, struct access_extended_args *uap, __unused in /* copy out results */ error = copyout(result, uap->results, desc_actual * sizeof(errno_t)); - + out: if (input && input != stack_input) FREE(input, M_TEMP); @@ -5164,7 +5300,7 @@ faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode, goto out; #if NAMEDRSRCFORK - /* Grab reference on the shadow stream file vnode to + /* Grab reference on the shadow stream file vnode to * force an inactive on release which will mark it * for recycle. */ @@ -5188,7 +5324,7 @@ faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode, if (amode & _DELETE_OK) vnode_put(nd.ni_dvp); nameidone(&nd); - + out: if (!(flag & AT_EACCESS)) kauth_cred_unref(&context.vc_ucred); @@ -5260,8 +5396,8 @@ fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub, statptr = (void *)&source; #if NAMEDRSRCFORK - /* Grab reference on the shadow stream file vnode to - * force an inactive on release which will mark it + /* Grab reference on the shadow stream file vnode to + * force an inactive on release which will mark it * for recycle. */ if (vnode_isnamedstream(nd.ni_vp) && @@ -5290,11 +5426,11 @@ fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub, source.sb64.st_qspare[0] = 0LL; source.sb64.st_qspare[1] = 0LL; if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) { - munge_user64_stat64(&source.sb64, &dest.user64_sb64); + munge_user64_stat64(&source.sb64, &dest.user64_sb64); my_size = sizeof(dest.user64_sb64); sbp = (caddr_t)&dest.user64_sb64; } else { - munge_user32_stat64(&source.sb64, &dest.user32_sb64); + munge_user32_stat64(&source.sb64, &dest.user32_sb64); my_size = sizeof(dest.user32_sb64); sbp = (caddr_t)&dest.user32_sb64; } @@ -5309,11 +5445,11 @@ fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub, source.sb.st_qspare[0] = 0LL; source.sb.st_qspare[1] = 0LL; if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) { - munge_user64_stat(&source.sb, &dest.user64_sb); + munge_user64_stat(&source.sb, &dest.user64_sb); my_size = sizeof(dest.user64_sb); sbp = (caddr_t)&dest.user64_sb; } else { - munge_user32_stat(&source.sb, &dest.user32_sb); + munge_user32_stat(&source.sb, &dest.user32_sb); my_size = sizeof(dest.user32_sb); sbp = (caddr_t)&dest.user32_sb; } @@ -5363,13 +5499,13 @@ out: * * Parameters: p (ignored) * uap User argument descriptor (see below) - * retval (ignored) + * retval (ignored) * * Indirect: uap->path Path of file to get status from * uap->ub User buffer (holds file status info) * uap->xsecurity ACL to get (extended security) * uap->xsecurity_size Size of ACL - * + * * Returns: 0 Success * !0 errno value * @@ -5406,13 +5542,13 @@ stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval) * * Parameters: p (ignored) * uap User argument descriptor (see below) - * retval (ignored) + * retval (ignored) * * Indirect: uap->path Path of file to get status from * uap->ub User buffer (holds file status info) * uap->xsecurity ACL to get (extended security) * uap->xsecurity_size Size of ACL - * + * * Returns: 0 Success * !0 errno value * @@ -5430,13 +5566,13 @@ stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused in * * Parameters: p (ignored) * uap User argument descriptor (see below) - * retval (ignored) + * retval (ignored) * * Indirect: uap->path Path of file to get status from * uap->ub User buffer (holds file status info) * uap->xsecurity ACL to get (extended security) * uap->xsecurity_size Size of ACL - * + * * Returns: 0 Success * !0 errno value * @@ -5472,13 +5608,13 @@ lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval) * * Parameters: p (ignored) * uap User argument descriptor (see below) - * retval (ignored) + * retval (ignored) * * Indirect: uap->path Path of file to get status from * uap->ub User buffer (holds file status info) * uap->xsecurity ACL to get (extended security) * uap->xsecurity_size Size of ACL - * + * * Returns: 0 Success * !0 errno value * @@ -5535,7 +5671,7 @@ pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval) struct nameidata nd; vfs_context_t ctx = vfs_context_current(); - NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1, + NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); error = namei(&nd); if (error) @@ -5646,6 +5782,11 @@ chflags1(vnode_t vp, int flags, vfs_context_t ctx) goto out; error = vnode_setattr(vp, &va, ctx); +#if CONFIG_MACF + if (error == 0) + mac_vnode_notify_setflags(ctx, vp, flags); +#endif + if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) { error = ENOTSUP; } @@ -5667,7 +5808,7 @@ chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval) struct nameidata nd; AUDIT_ARG(fflags, uap->flags); - NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1, + NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); error = namei(&nd); if (error) @@ -5725,7 +5866,7 @@ chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap) { kauth_action_t action; int error; - + AUDIT_ARG(mode, vap->va_mode); /* XXX audit new args */ @@ -5740,6 +5881,17 @@ chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap) if (VATTR_IS_ACTIVE(vap, va_mode) && (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0) return (error); + + if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) { + if ((error = mac_vnode_check_setowner(ctx, vp, + VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1, + VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1))) + return (error); + } + + if (VATTR_IS_ACTIVE(vap, va_acl) && + (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl))) + return (error); #endif /* make sure that the caller is allowed to set this security information */ @@ -5749,8 +5901,22 @@ chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap) error = EPERM; return(error); } - - error = vnode_setattr(vp, vap, ctx); + + if ((error = vnode_setattr(vp, vap, ctx)) != 0) + return (error); + +#if CONFIG_MACF + if (VATTR_IS_ACTIVE(vap, va_mode)) + mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode); + + if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) + mac_vnode_notify_setowner(ctx, vp, + VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1, + VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1); + + if (VATTR_IS_ACTIVE(vap, va_acl)) + mac_vnode_notify_setacl(ctx, vp, vap->va_acl); +#endif return (error); } @@ -5782,7 +5948,7 @@ chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, } /* - * chmod_extended: Change the mode of a file given a path name; with extended + * chmod_extended: Change the mode of a file given a path name; with extended * argument list (including extended security (ACL)). * * Parameters: p Process requesting the open @@ -5909,14 +6075,14 @@ fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap) * * Parameters: p Process requesting to change file mode * uap User argument descriptor (see below) - * retval (ignored) + * retval (ignored) * * Indirect: uap->mode File mode to set (same as 'chmod') * uap->uid UID to set * uap->gid GID to set * uap->xsecurity ACL to set (or delete) * uap->fd File descriptor of file to change mode - * + * * Returns: 0 Success * !0 errno value * @@ -5957,7 +6123,7 @@ fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *re error = fchmod1(p, uap->fd, &va); - + switch(uap->xsecurity) { case USER_ADDR_NULL: case CAST_USER_ADDR_T(-1): @@ -6026,7 +6192,12 @@ fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid, if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) goto out; error = vnode_setattr(vp, &va, ctx); - + +#if CONFIG_MACF + if (error == 0) + mac_vnode_notify_setowner(ctx, vp, uid, gid); +#endif + out: /* * EACCES is only allowed from namei(); permissions failure should @@ -6118,6 +6289,11 @@ fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval) } error = vnode_setattr(vp, &va, ctx); +#if CONFIG_MACF + if (error == 0) + mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid); +#endif + out: (void)vnode_put(vp); file_drop(uap->fd); @@ -6198,6 +6374,11 @@ setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, } error = vnode_setattr(vp, &va, ctx); +#if CONFIG_MACF + if (error == 0) + mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]); +#endif + out: return error; } @@ -6216,10 +6397,10 @@ utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval) vfs_context_t ctx = vfs_context_current(); /* - * AUDIT: Needed to change the order of operations to do the + * AUDIT: Needed to change the order of operations to do the * name lookup first because auditing wants the path. */ - NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1, + NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); error = namei(&nd); if (error) @@ -6286,7 +6467,7 @@ truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval) if (uap->length < 0) return(EINVAL); - NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1, + NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); if ((error = namei(&nd))) return (error); @@ -6308,6 +6489,12 @@ truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval) if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) goto out; error = vnode_setattr(vp, &va, ctx); + +#if CONFIG_MACF + if (error == 0) + mac_vnode_notify_truncate(ctx, NOCRED, vp); +#endif + out: vnode_put(vp); return (error); @@ -6330,7 +6517,7 @@ ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval) AUDIT_ARG(fd, uap->fd); if (uap->length < 0) return(EINVAL); - + if ( (error = fp_lookup(p,fd,&fp,0)) ) { return(error); } @@ -6371,6 +6558,12 @@ ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval) VATTR_INIT(&va); VATTR_SET(&va, va_data_size, uap->length); error = vnode_setattr(vp, &va, ctx); + +#if CONFIG_MACF + if (error == 0) + mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp); +#endif + (void)vnode_put(vp); out: file_drop(fd); @@ -6397,7 +6590,7 @@ fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval) * thread cancellation points. */ /* ARGSUSED */ -int +int fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval) { return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT)); @@ -6465,7 +6658,7 @@ fsync_common(proc_t p, struct fsync_args *uap, int flags) #if NAMEDRSRCFORK /* Sync resource fork shadow file if necessary. */ if ((error == 0) && - (vp->v_flag & VISNAMEDSTREAM) && + (vp->v_flag & VISNAMEDSTREAM) && (vp->v_parent != NULLVP) && vnode_isshadow(vp) && (fp->f_flags & FP_WRITTEN)) { @@ -6479,7 +6672,7 @@ fsync_common(proc_t p, struct fsync_args *uap, int flags) } /* - * Duplicate files. Source must be a file, target must be a file or + * Duplicate files. Source must be a file, target must be a file or * must not exist. * * XXX Copyfile authorisation checking is woefully inadequate, and will not @@ -6493,6 +6686,10 @@ copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval) struct nameidata fromnd, tond; int error; vfs_context_t ctx = vfs_context_current(); +#if CONFIG_MACF + struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd; + struct vnode_attr va; +#endif /* Check that the flags are valid. */ @@ -6500,7 +6697,7 @@ copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval) return(EINVAL); } - NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1, + NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1, UIO_USERSPACE, uap->from, ctx); if ((error = namei(&fromnd))) return (error); @@ -6521,11 +6718,42 @@ copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval) goto out; } } + if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) { error = EISDIR; goto out; } + /* This calls existing MAC hooks for open */ + if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx, + NULL))) { + goto out; + } + + if (tvp) { + /* + * See unlinkat_internal for an explanation of the potential + * ENOENT from the MAC hook but the gist is that the MAC hook + * can fail because vn_getpath isn't able to return the full + * path. We choose to ignore this failure. + */ + error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL); + if (error && error != ENOENT) + goto out; + error = 0; + } + +#if CONFIG_MACF + VATTR_INIT(&va); + VATTR_SET(&va, va_type, fvp->v_type); + /* Mask off all but regular access permissions */ + VATTR_SET(&va, va_mode, + ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS)); + error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va); + if (error) + goto out; +#endif /* CONFIG_MACF */ + if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) goto out; @@ -6555,8 +6783,6 @@ out: out1: vnode_put(fvp); - if (fromnd.ni_startdir) - vnode_put(fromnd.ni_startdir); nameidone(&fromnd); if (error == -1) @@ -6564,43 +6790,329 @@ out1: return (error); } +#define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1 /* - * Rename files. Source and destination must either both be directories, - * or both not be directories. If target is a directory, it must be empty. + * Helper function for doing clones. The caller is expected to provide an + * iocounted source vnode and release it. */ -/* ARGSUSED */ static int -renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from, - int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags) +clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd, + user_addr_t dst, uint32_t flags, vfs_context_t ctx) { vnode_t tvp, tdvp; - vnode_t fvp, fdvp; - struct nameidata *fromnd, *tond; + struct nameidata tond; int error; - int do_retry; - int retry_count; - int mntrename; - int need_event; - const char *oname = NULL; - char *from_name = NULL, *to_name = NULL; - int from_len=0, to_len=0; - int holding_mntlock; - mount_t locked_mp = NULL; - vnode_t oparent = NULLVP; -#if CONFIG_FSE - fse_info from_finfo, to_finfo; -#endif - int from_truncated=0, to_truncated; - int batched = 0; - struct vnode_attr *fvap, *tvap; - int continuing = 0; - /* carving out a chunk for structs that are too big to be on stack. */ - struct { - struct nameidata from_node, to_node; - struct vnode_attr fv_attr, tv_attr; - } * __rename_data; - MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK); + int follow; + boolean_t free_acl; + boolean_t attr_cleanup; + enum vtype v_type; + kauth_action_t action; + struct componentname *cnp; + uint32_t defaulted; + struct vnode_attr va; + + v_type = vnode_vtype(fvp); + switch (v_type) { + case VLNK: + /* FALLTHRU */ + case VREG: + action = KAUTH_VNODE_ADD_FILE; + break; + case VDIR: + if (vnode_isvroot(fvp) || vnode_ismount(fvp) || + fvp->v_mountedhere) { + return (EINVAL); + } + action = KAUTH_VNODE_ADD_SUBDIRECTORY; + break; + default: + return (EINVAL); + } + + AUDIT_ARG(fd2, dst_dirfd); + AUDIT_ARG(value32, flags); + + follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW; + NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2, + UIO_USERSPACE, dst, ctx); + if ((error = nameiat(&tond, dst_dirfd))) + return (error); + cnp = &tond.ni_cnd; + tdvp = tond.ni_dvp; + tvp = tond.ni_vp; + + free_acl = FALSE; + attr_cleanup = FALSE; + + if (tvp != NULL) { + error = EEXIST; + goto out; + } + + if (vnode_mount(tdvp) != vnode_mount(fvp)) { + error = EXDEV; + goto out; + } + +#if CONFIG_MACF + if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp))) + goto out; +#endif + if ((error = vnode_authorize(tdvp, NULL, action, ctx))) + goto out; + + action = KAUTH_VNODE_GENERIC_READ_BITS; + if (data_read_authorised) + action &= ~KAUTH_VNODE_READ_DATA; + if ((error = vnode_authorize(fvp, NULL, action, ctx))) + goto out; + + /* + * certain attributes may need to be changed from the source, we ask for + * those here. + */ + VATTR_INIT(&va); + VATTR_WANTED(&va, va_type); + VATTR_WANTED(&va, va_mode); + VATTR_WANTED(&va, va_flags); + VATTR_WANTED(&va, va_acl); + + if ((error = vnode_getattr(fvp, &va, ctx)) != 0) + goto out; + + if (!VATTR_IS_SUPPORTED(&va, va_acl)) + VATTR_CLEAR_ACTIVE(&va, va_acl); + else if (va.va_acl != NULL) + free_acl = TRUE; + + if (!VATTR_IS_SUPPORTED(&va, va_mode)) { + VATTR_CLEAR_ACTIVE(&va, va_mode); + } else { + proc_t p = vfs_context_proc(ctx); + + VATTR_SET(&va, va_mode, + (va.va_mode & ACCESSPERMS) & ~p->p_fd->fd_cmask); + } + + if (!VATTR_IS_SUPPORTED(&va, va_flags)) { + VATTR_CLEAR_ACTIVE(&va, va_flags); + } else if (va.va_flags & SF_RESTRICTED) { + /* + * Turn off SF_RESTRICTED from source, if the destination needs + * it, it will be handled in vnode_authattr_new. + */ + VATTR_SET(&va, va_flags, (va.va_flags & ~SF_RESTRICTED)); + } + + /* Handle ACL inheritance, initialize vap. */ + if (v_type == VLNK) { + error = vnode_authattr_new(tdvp, &va, 0, ctx); + } else { + error = vn_attribute_prepare(tdvp, &va, &defaulted, ctx); + attr_cleanup = TRUE; + } + + if (error) { + attr_cleanup = FALSE; + goto out; + } + + error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &va, flags, ctx); + + if (!error && tvp) { + int update_flags = 0; +#if CONFIG_FSE + int fsevent; +#endif /* CONFIG_FSE */ + +#if CONFIG_MACF + (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp, + VNODE_LABEL_CREATE, ctx); +#endif + /* + * If some of the requested attributes weren't handled by the + * VNOP, use our fallback code. + */ + if (!VATTR_ALL_SUPPORTED(&va)) + (void)vnode_setattr_fallback(tvp, &va, ctx); + + // Make sure the name & parent pointers are hooked up + if (tvp->v_name == NULL) + update_flags |= VNODE_UPDATE_NAME; + if (tvp->v_parent == NULLVP) + update_flags |= VNODE_UPDATE_PARENT; + + if (update_flags) { + (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr, + cnp->cn_namelen, cnp->cn_hash, update_flags); + } + +#if CONFIG_FSE + switch (vnode_vtype(tvp)) { + case VLNK: + /* FALLTHRU */ + case VREG: + fsevent = FSE_CREATE_FILE; + break; + case VDIR: + fsevent = FSE_CREATE_DIR; + break; + default: + goto out; + } + + if (need_fsevent(fsevent, tvp)) { + add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp, + FSE_ARG_DONE); + } +#endif /* CONFIG_FSE */ + } +#if CLONE_SNAPSHOT_FALLBACKS_ENABLED + else if (error == ENOTSUP) { + struct vfs_attr vfa; + + /* + * Fallback to VNOP_COPYFILE but check first that the + * filesystem supports cloning. + */ + VFSATTR_INIT(&vfa); + VFSATTR_WANTED(&vfa, f_capabilities); + if ((vfs_getattr(vnode_mount(tdvp), &vfa, ctx) == 0) && + VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) && + (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_CLONE) && + (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_CLONE)) { + + error = VNOP_COPYFILE(fvp, tdvp, tvp, cnp, 0, + 0, ctx); + } + } +#endif /* CLONE_SNAPSHOT_FALLBACKS_ENABLED */ + +out: + if (attr_cleanup) + vn_attribute_cleanup(&va, defaulted); + if (free_acl && va.va_acl) + kauth_acl_free(va.va_acl); + nameidone(&tond); + if (tvp) + vnode_put(tvp); + vnode_put(tdvp); + return (error); +} + +/* + * clone files or directories, target must not exist. + */ +/* ARGSUSED */ +int +clonefileat(__unused proc_t p, struct clonefileat_args *uap, + __unused int32_t *retval) +{ + vnode_t fvp; + struct nameidata fromnd; + int follow; + int error; + vfs_context_t ctx = vfs_context_current(); + + /* Check that the flags are valid. */ + if (uap->flags & ~CLONE_NOFOLLOW) + return (EINVAL); + + AUDIT_ARG(fd, uap->src_dirfd); + + follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW; + NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1, + UIO_USERSPACE, uap->src, ctx); + if ((error = nameiat(&fromnd, uap->src_dirfd))) + return (error); + + fvp = fromnd.ni_vp; + nameidone(&fromnd); + + error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst, + uap->flags, ctx); + + vnode_put(fvp); + return (error); +} + +int +fclonefileat(__unused proc_t p, struct fclonefileat_args *uap, + __unused int32_t *retval) +{ + vnode_t fvp; + struct fileproc *fp; + int error; + vfs_context_t ctx = vfs_context_current(); + + AUDIT_ARG(fd, uap->src_fd); + error = fp_getfvp(p, uap->src_fd, &fp, &fvp); + if (error) + return (error); + + if ((fp->f_fglob->fg_flag & FREAD) == 0) { + AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1); + error = EBADF; + goto out; + } + + if ((error = vnode_getwithref(fvp))) + goto out; + + AUDIT_ARG(vnpath, fvp, ARG_VNODE1); + + error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst, + uap->flags, ctx); + + vnode_put(fvp); +out: + file_drop(uap->src_fd); + return (error); +} + +/* + * Rename files. Source and destination must either both be directories, + * or both not be directories. If target is a directory, it must be empty. + */ +/* ARGSUSED */ +static int +renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from, + int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags) +{ + if (flags & ~VFS_RENAME_FLAGS_MASK) + return EINVAL; + + if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL)) + return EINVAL; + + vnode_t tvp, tdvp; + vnode_t fvp, fdvp; + struct nameidata *fromnd, *tond; + int error; + int do_retry; + int retry_count; + int mntrename; + int need_event; + const char *oname = NULL; + char *from_name = NULL, *to_name = NULL; + int from_len=0, to_len=0; + int holding_mntlock; + mount_t locked_mp = NULL; + vnode_t oparent = NULLVP; +#if CONFIG_FSE + fse_info from_finfo, to_finfo; +#endif + int from_truncated=0, to_truncated; + int batched = 0; + struct vnode_attr *fvap, *tvap; + int continuing = 0; + /* carving out a chunk for structs that are too big to be on stack. */ + struct { + struct nameidata from_node, to_node; + struct vnode_attr fv_attr, tv_attr; + } * __rename_data; + MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK); fromnd = &__rename_data->from_node; tond = &__rename_data->to_node; @@ -6645,6 +7157,16 @@ continue_lookup: tvp = tond->ni_vp; } + if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) { + error = ENOENT; + goto out1; + } + + if (tvp && ISSET(flags, VFS_RENAME_EXCL)) { + error = EEXIST; + goto out1; + } + batched = vnode_compound_rename_available(fdvp); if (!fvp) { /* @@ -6665,7 +7187,7 @@ continue_lookup: } if (!batched) { - error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL); + error = vn_authorize_renamex(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, flags, NULL); if (error) { if (error == ENOENT) { assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES); @@ -6712,6 +7234,12 @@ continue_lookup: * * XXX Handle this in VFS after a continued lookup (if we missed * in the cache to start off) + * + * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so + * we'll skip past here. The file system is responsible for + * checking that @tvp is not a descendent of @fvp and vice versa + * so it should always return EINVAL if either @tvp or @fvp is the + * root of a volume. */ if ((fvp->v_flag & VROOT) && (fvp->v_type == VDIR) && @@ -6900,16 +7428,9 @@ skipped_lookup: to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated); } -#if CONFIG_SECLUDED_RENAME - if (flags & VFS_SECLUDE_RENAME) { - fromnd->ni_cnd.cn_flags |= CN_SECLUDE_RENAME; - } -#else - #pragma unused(flags) -#endif error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap, tdvp, &tvp, &tond->ni_cnd, tvap, - 0, ctx); + flags, ctx); if (holding_mntlock) { /* @@ -6967,6 +7488,11 @@ skipped_lookup: kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_RENAME, (uintptr_t)from_name, (uintptr_t)to_name); + if (flags & VFS_RENAME_SWAP) { + kauth_authorize_fileop(vfs_context_ucred(ctx), + KAUTH_FILEOP_RENAME, + (uintptr_t)to_name, (uintptr_t)from_name); + } #if CONFIG_FSE if (from_name != NULL && to_name != NULL) { @@ -6982,13 +7508,27 @@ skipped_lookup: vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap); } - if (tvp) { - add_fsevent(FSE_RENAME, ctx, - FSE_ARG_STRING, from_len, from_name, - FSE_ARG_FINFO, &from_finfo, - FSE_ARG_STRING, to_len, to_name, - FSE_ARG_FINFO, &to_finfo, - FSE_ARG_DONE); + if (tvp) { + add_fsevent(FSE_RENAME, ctx, + FSE_ARG_STRING, from_len, from_name, + FSE_ARG_FINFO, &from_finfo, + FSE_ARG_STRING, to_len, to_name, + FSE_ARG_FINFO, &to_finfo, + FSE_ARG_DONE); + if (flags & VFS_RENAME_SWAP) { + /* + * Strictly speaking, swap is the equivalent of + * *three* renames. FSEvents clients should only take + * the events as a hint, so we only bother reporting + * two. + */ + add_fsevent(FSE_RENAME, ctx, + FSE_ARG_STRING, to_len, to_name, + FSE_ARG_FINFO, &to_finfo, + FSE_ARG_STRING, from_len, from_name, + FSE_ARG_FINFO, &from_finfo, + FSE_ARG_DONE); + } } else { add_fsevent(FSE_RENAME, ctx, FSE_ARG_STRING, from_len, from_name, @@ -7117,17 +7657,15 @@ rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval) AT_FDCWD, uap->to, UIO_USERSPACE, 0)); } -#if CONFIG_SECLUDED_RENAME -int rename_ext(__unused proc_t p, struct rename_ext_args *uap, __unused int32_t *retval) +int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval) { return renameat_internal( - vfs_context_current(), - AT_FDCWD, uap->from, - AT_FDCWD, uap->to, + vfs_context_current(), + uap->fromfd, uap->from, + uap->tofd, uap->to, UIO_USERSPACE, uap->flags); } -#endif - + int renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval) { @@ -7556,7 +8094,7 @@ vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag, int *numdirent, vfs_context_t ctxp) { /* Check if fs natively supports VNODE_READDIR_EXTENDED */ - if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) && + if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) && ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) { return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp); } else { @@ -7577,9 +8115,9 @@ vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag, * will prevent us from reading more than we can pack. * * Since this buffer is wired memory, we will limit the - * buffer size to a maximum of 32K. We would really like to + * buffer size to a maximum of 32K. We would really like to * use 32K in the MIN(), but we use magic number 87371 to - * prevent uio_resid() * 3 / 8 from overflowing. + * prevent uio_resid() * 3 / 8 from overflowing. */ bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8; MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK); @@ -7730,7 +8268,7 @@ unionread: if (offset) { *offset = loff; } - + *bytesread = bufsize - uio_resid(auio); out: file_drop(fd); @@ -7807,7 +8345,7 @@ umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval) * * Indirect: uap->newmask umask to set * uap->xsecurity ACL to set - * + * * Returns: 0 Success * !0 Not success * @@ -7916,14 +8454,14 @@ getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval uint32_t newstate; int error, eofflag; uint32_t loff; - struct attrlist attributelist; + struct attrlist attributelist; vfs_context_t ctx = vfs_context_current(); int fd = uap->fd; char uio_buf[ UIO_SIZEOF(1) ]; kauth_action_t action; AUDIT_ARG(fd, fd); - + /* Get the attributes into kernel space */ if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) { return(error); @@ -7974,7 +8512,7 @@ unionread: loff = fp->f_fglob->fg_offset; auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf)); uio_addiov(auio, uap->buffer, uap->buffersize); - + /* * If the only item requested is file names, we can let that past with * just LIST_DIRECTORY. If they want any other attributes, that means @@ -7984,7 +8522,7 @@ unionread: if ((attributelist.commonattr & ~ATTR_CMN_NAME) || attributelist.fileattr || attributelist.dirattr) action |= KAUTH_VNODE_SEARCH; - + if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) { /* Believe it or not, uap->options only has 32-bits of valid @@ -8026,7 +8564,7 @@ unionread: (void)vnode_put(vp); - if (error) + if (error) goto out; fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */ @@ -8067,7 +8605,7 @@ exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t #if CONFIG_FSE fse_info f_finfo, s_finfo; #endif - + nameiflags = 0; if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; @@ -8081,7 +8619,7 @@ exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t nameidone(&fnd); fvp = fnd.ni_vp; - NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2, + NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2, UIO_USERSPACE, uap->path2, ctx); error = namei(&snd); @@ -8098,7 +8636,7 @@ exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t if (svp == fvp) { error = EINVAL; goto out; - } + } /* * if the files are on different volumes, return an error @@ -8126,7 +8664,7 @@ exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t if ( #if CONFIG_FSE - need_fsevent(FSE_EXCHANGE, fvp) || + need_fsevent(FSE_EXCHANGE, fvp) || #endif kauth_authorize_fileop_has_listeners()) { GET_PATH(fpath); @@ -8138,7 +8676,7 @@ exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated); slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated); - + #if CONFIG_FSE get_fse_info(fvp, &f_finfo, ctx); get_fse_info(svp, &s_finfo, ctx); @@ -8155,10 +8693,10 @@ exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t const char *tmpname; if (fpath != NULL && spath != NULL) { - /* call out to allow 3rd party notification of exchangedata. + /* call out to allow 3rd party notification of exchangedata. * Ignore result of kauth_authorize_fileop call. */ - kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE, + kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE, (uintptr_t)fpath, (uintptr_t)spath); } name_cache_lock(); @@ -8166,7 +8704,7 @@ exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t tmpname = fvp->v_name; fvp->v_name = svp->v_name; svp->v_name = tmpname; - + if (fvp->v_parent != svp->v_parent) { vnode_t tmp; @@ -8207,7 +8745,7 @@ uint32_t freespace_mb(vnode_t vp); uint32_t freespace_mb(vnode_t vp) { - vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT); + vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT); return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail * vp->v_mount->mnt_vfsstat.f_bsize) >> 20); } @@ -8251,7 +8789,7 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer); searchblock.returnbuffersize = tmp_searchblock.returnbuffersize; searchblock.maxmatches = tmp_searchblock.maxmatches; - /* + /* * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary * from a 32 bit long, and tv_usec is already a signed 32 bit int. */ @@ -8266,12 +8804,12 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) if (error) return(error); - /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2. + /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2. */ - if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS || + if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS || searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS) return(EINVAL); - + /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */ /* It all has to do into local memory and it's not that big so we might as well put it all together. */ /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/ @@ -8280,7 +8818,7 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */ /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */ /* assumes the size is still 556 bytes it will continue to work */ - + mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 + sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t)); @@ -8302,7 +8840,7 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist)))) goto freeandexit; - + if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate)))) goto freeandexit; @@ -8313,25 +8851,25 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) */ if (uap->options & SRCHFS_START) state->ss_union_layer = 0; - else + else uap->options |= state->ss_union_flags; state->ss_union_flags = 0; /* * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter, * which is passed in with an attrreference_t, we need to inspect the buffer manually here. - * The KPI does not provide us the ability to pass in the length of the buffers searchparams1 - * and searchparams2. To obviate the need for all searchfs-supporting filesystems to + * The KPI does not provide us the ability to pass in the length of the buffers searchparams1 + * and searchparams2. To obviate the need for all searchfs-supporting filesystems to * validate the user-supplied data offset of the attrreference_t, we'll do it here. */ if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) { attrreference_t* string_ref; u_int32_t* start_length; - user64_size_t param_length; + user64_size_t param_length; /* validate searchparams1 */ - param_length = searchblock.sizeofsearchparams1; + param_length = searchblock.sizeofsearchparams1; /* skip the word that specifies length of the buffer */ start_length= (u_int32_t*) searchparams1; start_length= start_length+1; @@ -8340,13 +8878,13 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) /* ensure no negative offsets or too big offsets */ if (string_ref->attr_dataoffset < 0 ) { error = EINVAL; - goto freeandexit; + goto freeandexit; } if (string_ref->attr_length > MAXPATHLEN) { error = EINVAL; goto freeandexit; } - + /* Check for pointer overflow in the string ref */ if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) { error = EINVAL; @@ -8415,9 +8953,9 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) } #endif - + /* - * If searchblock.maxmatches == 0, then skip the search. This has happened + * If searchblock.maxmatches == 0, then skip the search. This has happened * before and sometimes the underlying code doesnt deal with it well. */ if (searchblock.maxmatches == 0) { @@ -8427,7 +8965,7 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) /* * Allright, we have everything we need, so lets make that call. - * + * * We keep special track of the return value from the file system: * EAGAIN is an acceptable error condition that shouldn't keep us * from copying out any results... @@ -8446,7 +8984,7 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) auio, (struct searchstate *) &state->ss_fsstate, ctx); - + /* * If it's a union mount we need to be called again * to search the mounted-on filesystem. @@ -8469,7 +9007,7 @@ saveandexit: if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0) goto freeandexit; - + error = fserror; freeandexit: @@ -8638,7 +9176,7 @@ void nspace_proc_exit(struct proc *p) { int i, event_mask = 0; - + for (i = 0; i < NSPACE_HANDLER_COUNT; i++) { if (p == nspace_handlers[i].handler_proc) { event_mask |= nspace_item_flags_for_type(i); @@ -8650,16 +9188,16 @@ nspace_proc_exit(struct proc *p) if (event_mask == 0) { return; } - + + lck_mtx_lock(&nspace_handler_lock); if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) { // if this process was the snapshot handler, zero snapshot_timeout snapshot_timestamp = 0; } - + // // unblock anyone that's waiting for the handler that died // - lck_mtx_lock(&nspace_handler_lock); for(i=0; i < MAX_NSPACE_ITEMS; i++) { if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) { @@ -8674,24 +9212,24 @@ nspace_proc_exit(struct proc *p) nspace_items[i].vid = 0; nspace_items[i].flags = NSPACE_ITEM_DONE; nspace_items[i].token = 0; - + wakeup((caddr_t)&(nspace_items[i].vp)); } } } - + wakeup((caddr_t)&nspace_item_idx); lck_mtx_unlock(&nspace_handler_lock); } -int +int resolve_nspace_item(struct vnode *vp, uint64_t op) { return resolve_nspace_item_ext(vp, op, NULL); } -int +int resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg) { int i, error, keep_waiting; @@ -8749,7 +9287,7 @@ retry: } else { nspace_items[i].refcount++; } - + if (i >= MAX_NSPACE_ITEMS) { ts.tv_sec = nspace_handler_timeout; ts.tv_nsec = 0; @@ -8786,7 +9324,7 @@ retry: nspace_items[i].token = 0; nspace_items[i].refcount = 1; - + wakeup((caddr_t)&nspace_item_idx); } @@ -8815,7 +9353,7 @@ retry: // hmmm, why did we get woken up? printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n", nspace_items[i].token); - } + } if (--nspace_items[i].refcount == 0) { nspace_items[i].vp = NULL; // clear this so that no one will match on it again @@ -8832,17 +9370,49 @@ retry: return error; } - -int -get_nspace_item_status(struct vnode *vp, int32_t *status) +int nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg) { - int i; + int snapshot_error = 0; - lck_mtx_lock(&nspace_handler_lock); - for(i=0; i < MAX_NSPACE_ITEMS; i++) { - if (nspace_items[i].vp == vp) { - break; - } + if (vp == NULL) { + return 0; + } + + /* Swap files are special; skip them */ + if (vnode_isswap(vp)) { + return 0; + } + + if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) { + // the change time is within this epoch + int error; + + error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg); + if (error == EDEADLK) { + snapshot_error = 0; + } else if (error) { + if (error == EAGAIN) { + printf("nspace_snapshot_event: timed out waiting for namespace handler...\n"); + } else if (error == EINTR) { + // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n"); + snapshot_error = EINTR; + } + } + } + + return snapshot_error; +} + +int +get_nspace_item_status(struct vnode *vp, int32_t *status) +{ + int i; + + lck_mtx_lock(&nspace_handler_lock); + for(i=0; i < MAX_NSPACE_ITEMS; i++) { + if (nspace_items[i].vp == vp) { + break; + } } if (i >= MAX_NSPACE_ITEMS) { @@ -8854,7 +9424,7 @@ get_nspace_item_status(struct vnode *vp, int32_t *status) lck_mtx_unlock(&nspace_handler_lock); return 0; } - + #if 0 static int @@ -8922,7 +9492,7 @@ vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx) if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) return error; - + // // if the vnode is tagged VOPENEVT and the current process @@ -8945,13 +9515,13 @@ vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx) return error; } - /* Call out to allow 3rd party notification of open. + /* Call out to allow 3rd party notification of open. * Ignore result of kauth_authorize_fileop call. */ #if CONFIG_MACF mac_vnode_notify_open(ctx, vp, fmode); #endif - kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, + kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, (uintptr_t)vp, 0); @@ -8961,157 +9531,163 @@ vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx) static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type) { - int i, error=0, unblock=0; + int i; + int error = 0; + int unblock = 0; task_t curtask; - + lck_mtx_lock(&nspace_handler_exclusion_lock); if (nspace_handlers[nspace_type].handler_busy) { lck_mtx_unlock(&nspace_handler_exclusion_lock); return EBUSY; } + nspace_handlers[nspace_type].handler_busy = 1; lck_mtx_unlock(&nspace_handler_exclusion_lock); - - /* + + /* * Any process that gets here will be one of the namespace handlers. * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation * as we can cause deadlocks to occur, because the namespace handler may prevent - * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE + * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE * process. */ curtask = current_task(); - bsd_set_dependency_capable (curtask); - + bsd_set_dependency_capable (curtask); + lck_mtx_lock(&nspace_handler_lock); if (nspace_handlers[nspace_type].handler_proc == NULL) { nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread()); nspace_handlers[nspace_type].handler_proc = current_proc(); } - + + if (nspace_type == NSPACE_HANDLER_SNAPSHOT && + (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) { + error = EINVAL; + } + while (error == 0) { - - for(i=0; i < MAX_NSPACE_ITEMS; i++) { + + /* Try to find matching namespace item */ + for (i = 0; i < MAX_NSPACE_ITEMS; i++) { if (nspace_items[i].flags & NSPACE_ITEM_NEW) { - if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) { - continue; + if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) { + break; } - break; } } - - if (i < MAX_NSPACE_ITEMS) { - nspace_items[i].flags &= ~NSPACE_ITEM_NEW; - nspace_items[i].flags |= NSPACE_ITEM_PROCESSING; - nspace_items[i].token = ++nspace_token_id; - - if (nspace_items[i].vp) { - struct fileproc *fp; - int32_t indx, fmode; - struct proc *p = current_proc(); - vfs_context_t ctx = vfs_context_current(); - struct vnode_attr va; - - - /* - * Use vnode pointer to acquire a file descriptor for - * hand-off to userland - */ - fmode = nspace_open_flags_for_type(nspace_type); - error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid); - if (error) { - unblock = 1; - break; - } - error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx); - if (error) { - unblock = 1; - vnode_put(nspace_items[i].vp); - break; - } - - if ((error = falloc(p, &fp, &indx, ctx))) { - vn_close(nspace_items[i].vp, fmode, ctx); - vnode_put(nspace_items[i].vp); - unblock = 1; - break; - } - - fp->f_fglob->fg_flag = fmode; - fp->f_fglob->fg_ops = &vnops; - fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp; - - proc_fdlock(p); - procfdtbl_releasefd(p, indx, NULL); - fp_drop(p, indx, fp, 1); - proc_fdunlock(p); - - /* - * All variants of the namespace handler struct support these three fields: - * token, flags, and the FD pointer - */ - error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t)); - error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t)); - error = copyout(&indx, nhd->fdptr, sizeof(uint32_t)); - - /* - * Handle optional fields: - * extended version support an info ptr (offset, length), and the - * - * namedata version supports a unique per-link object ID - * - */ - if (nhd->infoptr) { - uio_t uio = (uio_t)nspace_items[i].arg; - uint64_t u_offset, u_length; - - if (uio) { - u_offset = uio_offset(uio); - u_length = uio_resid(uio); - } else { - u_offset = 0; - u_length = 0; - } - error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t)); - error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t)); - } - - if (nhd->objid) { - VATTR_INIT(&va); - VATTR_WANTED(&va, va_linkid); - error = vnode_getattr(nspace_items[i].vp, &va, ctx); - if (error == 0 ) { - uint64_t linkid = 0; - if (VATTR_IS_SUPPORTED (&va, va_linkid)) { - linkid = (uint64_t)va.va_linkid; - } - error = copyout (&linkid, nhd->objid, sizeof(uint64_t)); - } - } - if (error) { - vn_close(nspace_items[i].vp, fmode, ctx); - fp_free(p, indx, fp); - unblock = 1; - } - - vnode_put(nspace_items[i].vp); - - break; - } else { - printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n", - i, nspace_items[i].vp, error, nspace_items[i].vp->v_name); - } - - } else { + if (i >= MAX_NSPACE_ITEMS) { + /* Nothing is there yet. Wait for wake up and retry */ error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0); if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) { + /* Prevent infinite loop if snapshot handler exited */ error = EINVAL; break; } - + continue; + } + + nspace_items[i].flags &= ~NSPACE_ITEM_NEW; + nspace_items[i].flags |= NSPACE_ITEM_PROCESSING; + nspace_items[i].token = ++nspace_token_id; + + assert(nspace_items[i].vp); + struct fileproc *fp; + int32_t indx; + int32_t fmode; + struct proc *p = current_proc(); + vfs_context_t ctx = vfs_context_current(); + struct vnode_attr va; + bool vn_get_succsessful = false; + bool vn_open_successful = false; + bool fp_alloc_successful = false; + + /* + * Use vnode pointer to acquire a file descriptor for + * hand-off to userland + */ + fmode = nspace_open_flags_for_type(nspace_type); + error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid); + if (error) goto cleanup; + vn_get_succsessful = true; + + error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx); + if (error) goto cleanup; + vn_open_successful = true; + + error = falloc(p, &fp, &indx, ctx); + if (error) goto cleanup; + fp_alloc_successful = true; + + fp->f_fglob->fg_flag = fmode; + fp->f_fglob->fg_ops = &vnops; + fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp; + + proc_fdlock(p); + procfdtbl_releasefd(p, indx, NULL); + fp_drop(p, indx, fp, 1); + proc_fdunlock(p); + + /* + * All variants of the namespace handler struct support these three fields: + * token, flags, and the FD pointer + */ + error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t)); + if (error) goto cleanup; + error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t)); + if (error) goto cleanup; + error = copyout(&indx, nhd->fdptr, sizeof(uint32_t)); + if (error) goto cleanup; + + /* + * Handle optional fields: + * extended version support an info ptr (offset, length), and the + * + * namedata version supports a unique per-link object ID + * + */ + if (nhd->infoptr) { + uio_t uio = (uio_t)nspace_items[i].arg; + uint64_t u_offset, u_length; + + if (uio) { + u_offset = uio_offset(uio); + u_length = uio_resid(uio); + } else { + u_offset = 0; + u_length = 0; + } + error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t)); + if (error) goto cleanup; + error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t)); + if (error) goto cleanup; } + + if (nhd->objid) { + VATTR_INIT(&va); + VATTR_WANTED(&va, va_linkid); + error = vnode_getattr(nspace_items[i].vp, &va, ctx); + if (error) goto cleanup; + + uint64_t linkid = 0; + if (VATTR_IS_SUPPORTED (&va, va_linkid)) { + linkid = (uint64_t)va.va_linkid; + } + error = copyout(&linkid, nhd->objid, sizeof(uint64_t)); + } +cleanup: + if (error) { + if (fp_alloc_successful) fp_free(p, indx, fp); + if (vn_open_successful) vn_close(nspace_items[i].vp, fmode, ctx); + unblock = 1; + } + + if (vn_get_succsessful) vnode_put(nspace_items[i].vp); + + break; } - + if (unblock) { if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) { vnode_lock_spin(nspace_items[i].vp); @@ -9122,34 +9698,34 @@ wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type) nspace_items[i].vid = 0; nspace_items[i].flags = NSPACE_ITEM_DONE; nspace_items[i].token = 0; - + wakeup((caddr_t)&(nspace_items[i].vp)); } - + if (nspace_type == NSPACE_HANDLER_SNAPSHOT) { // just go through every snapshot event and unblock it immediately. if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) { - for(i=0; i < MAX_NSPACE_ITEMS; i++) { + for(i = 0; i < MAX_NSPACE_ITEMS; i++) { if (nspace_items[i].flags & NSPACE_ITEM_NEW) { if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) { nspace_items[i].vp = NULL; nspace_items[i].vid = 0; nspace_items[i].flags = NSPACE_ITEM_DONE; nspace_items[i].token = 0; - - wakeup((caddr_t)&(nspace_items[i].vp)); + + wakeup((caddr_t)&(nspace_items[i].vp)); } } } } } - + lck_mtx_unlock(&nspace_handler_lock); - + lck_mtx_lock(&nspace_handler_exclusion_lock); nspace_handlers[nspace_type].handler_busy = 0; lck_mtx_unlock(&nspace_handler_exclusion_lock); - + return error; } @@ -9192,23 +9768,18 @@ static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int { int error = 0; namespace_handler_data nhd; - + bzero (&nhd, sizeof(namespace_handler_data)); - if (nspace_type == NSPACE_HANDLER_SNAPSHOT && - (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) { - return EINVAL; - } - if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) { return error; } - + error = validate_namespace_args (is64bit, size); if (error) { return error; } - + /* Copy in the userland pointers into our kernel-only struct */ if (is64bit) { @@ -9227,13 +9798,13 @@ static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int } /* Otherwise the fields were pre-zeroed when we did the bzero above. */ } - } + } else { /* 32 bit userland structures */ nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token); nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags); nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr); - + if (size > (sizeof(user32_namespace_handler_info))) { if (size >= (sizeof(user32_namespace_handler_info_ext))) { nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr); @@ -9244,7 +9815,7 @@ static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int /* Otherwise the fields were pre-zeroed when we did the bzero above. */ } } - + return wait_for_namespace_event(&nhd, nspace_type); } @@ -9259,7 +9830,7 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long boolean_t is64bit; u_int size; #define STK_PARAMS 128 - char stkbuf[STK_PARAMS]; + char stkbuf[STK_PARAMS] = {0}; caddr_t data, memp; vnode_t vp = *arg_vp; @@ -9293,13 +9864,13 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long } else { data = &stkbuf[0]; }; - + if (cmd & IOC_IN) { if (size) { error = copyin(udata, data, size); - if (error) { + if (error) { if (memp) { - kfree (memp, size); + kfree (memp, size); } return error; } @@ -9350,7 +9921,7 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long /* issue the sync for this volume */ (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL); - /* + /* * Then release the mount_iterref once we're done syncing; it's not * needed for the VNOP_IOCTL below */ @@ -9369,11 +9940,35 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long } break; + case FSCTL_ROUTEFS_SETROUTEID: { +#if ROUTEFS + char routepath[MAXPATHLEN]; + size_t len = 0; + + if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) { + break; + } + bzero(routepath, MAXPATHLEN); + error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len); + if (error) { + break; + } + error = routefs_kernel_mount(routepath); + if (error) { + break; + } +#endif + } + break; + case FSCTL_SET_PACKAGE_EXTS: { user_addr_t ext_strings; uint32_t num_entries; uint32_t max_width; + if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0))) + break; + if ( (is64bit && size != sizeof(user64_package_ext_info)) || (is64bit == 0 && size != sizeof(user32_package_ext_info))) { @@ -9397,7 +9992,7 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long } break; - /* namespace handlers */ + /* namespace handlers */ case FSCTL_NAMESPACE_HANDLER_GET: { error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data); } @@ -9406,13 +10001,13 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long /* Snapshot handlers */ case FSCTL_OLD_SNAPSHOT_HANDLER_GET: { error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data); - } + } break; case FSCTL_SNAPSHOT_HANDLER_GET_EXT: { error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data); } - break; + break; case FSCTL_NAMESPACE_HANDLER_UPDATE: { uint32_t token, val; @@ -9453,10 +10048,10 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long if (error) { printf("nspace-handler-update: did not find token %u\n", token); } - } + } break; - - case FSCTL_NAMESPACE_HANDLER_UNBLOCK: { + + case FSCTL_NAMESPACE_HANDLER_UNBLOCK: { uint32_t token, val; int i; @@ -9501,7 +10096,7 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long } lck_mtx_unlock(&nspace_handler_lock); - } + } break; case FSCTL_NAMESPACE_HANDLER_CANCEL: { @@ -9538,18 +10133,18 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long vnode_unlock(nspace_items[i].vp); } - nspace_items[i].vp = NULL; - nspace_items[i].arg = NULL; + nspace_items[i].vp = NULL; + nspace_items[i].arg = NULL; nspace_items[i].vid = 0; nspace_items[i].token = val; nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING; - nspace_items[i].flags |= NSPACE_ITEM_CANCELLED; + nspace_items[i].flags |= NSPACE_ITEM_CANCELLED; wakeup((caddr_t)&(nspace_items[i].vp)); } lck_mtx_unlock(&nspace_handler_lock); - } + } break; case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: { @@ -9565,7 +10160,7 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long lck_mtx_unlock(&nspace_handler_lock); printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp); - } + } break; case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS: @@ -9584,8 +10179,8 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long } break; - case FSCTL_SET_FSTYPENAME_OVERRIDE: - { + case FSCTL_SET_FSTYPENAME_OVERRIDE: + { if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) { break; } @@ -9609,7 +10204,7 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long } } break; - + default: { /* Invoke the filesystem-specific code */ error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx); @@ -9621,13 +10216,13 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long * if no errors, copy any data to user. Size was * already set and checked above. */ - if (error == 0 && (cmd & IOC_OUT) && size) + if (error == 0 && (cmd & IOC_OUT) && size) error = copyout(data, udata, size); - + if (memp) { kfree(memp, size); } - + return error; } @@ -9636,7 +10231,7 @@ int fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval) { int error; - struct nameidata nd; + struct nameidata nd; u_long nameiflags; vnode_t vp = NULL; vfs_context_t ctx = vfs_context_current(); @@ -9678,7 +10273,7 @@ ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval) AUDIT_ARG(fd, uap->fd); AUDIT_ARG(cmd, uap->cmd); AUDIT_ARG(value32, uap->options); - + /* Get the vnode for the file we are getting info on: */ if ((error = file_vnode(uap->fd, &vp))) return error; @@ -9750,14 +10345,14 @@ getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval) /* * the specific check for 0xffffffff is a hack to preserve * binaray compatibilty in K64 with applications that discovered - * that passing in a buf pointer and a size of -1 resulted in + * that passing in a buf pointer and a size of -1 resulted in * just the size of the indicated extended attribute being returned. * this isn't part of the documented behavior, but because of the * original implemtation's check for "uap->size > 0", this behavior * was allowed. In K32 that check turned into a signed comparison * even though uap->size is unsigned... in K64, we blow by that * check because uap->size is unsigned and doesn't get sign smeared - * in the munger for a 32 bit user app. we also need to add a + * in the munger for a 32 bit user app. we also need to add a * check to limit the maximum size of the buffer being passed in... * unfortunately, the underlying fileystems seem to just malloc * the requested size even if the actual extended attribute is tiny. @@ -9774,7 +10369,7 @@ getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval) if (uap->value) { if (uap->size > (size_t)XATTR_MAXSIZE) uap->size = XATTR_MAXSIZE; - + auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf)); uio_addiov(auio, uap->value, uap->size); @@ -10122,7 +10717,7 @@ flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval) return(error); } if (uap->namebuf != 0 && uap->bufsize > 0) { - auio = uio_createwithbuffer(1, 0, spacetype, + auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf)); uio_addiov(auio, uap->namebuf, uap->bufsize); } @@ -10253,23 +10848,23 @@ fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval) AUDIT_ARG(value32, fsid.val[0]); AUDIT_ARG(value64, uap->objid); /* Restrict output buffer size for now. */ - + if (uap->bufsize > PAGE_SIZE) { return (EINVAL); - } + } MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK); if (realpath == NULL) { return (ENOMEM); } error = fsgetpath_internal( - ctx, fsid.val[0], uap->objid, + ctx, fsid.val[0], uap->objid, uap->bufsize, realpath, &length); if (error) { goto out; } - + error = copyout((caddr_t)realpath, uap->buf, length); *retval = (user_ssize_t)length; /* may be superseded by error */ @@ -10288,8 +10883,8 @@ out: * EFAULT */ static int -munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, - user_addr_t bufp, int *sizep, boolean_t is_64_bit, +munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, + user_addr_t bufp, int *sizep, boolean_t is_64_bit, boolean_t partial_copy) { int error; @@ -10329,23 +10924,23 @@ munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, my_size = copy_size = sizeof(sfs); bzero(&sfs, my_size); - + sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; sfs.f_type = mp->mnt_vtable->vfc_typenum; sfs.f_reserved1 = (short)sfsp->f_fssubtype; - + /* * It's possible for there to be more than 2^^31 blocks in the filesystem, so we * have to fudge the numbers here in that case. We inflate the blocksize in order * to reflect the filesystem size as best we can. */ - if ((sfsp->f_blocks > INT_MAX) - /* Hack for 4061702 . I think the real fix is for Carbon to + if ((sfsp->f_blocks > INT_MAX) + /* Hack for 4061702 . I think the real fix is for Carbon to * look for some volume capability and not depend on hidden - * semantics agreed between a FS and carbon. + * semantics agreed between a FS and carbon. * f_blocks, f_bfree, and f_bavail set to -1 is the trigger * for Carbon to set bNoVolumeSizes volume attribute. - * Without this the webdavfs files cannot be copied onto + * Without this the webdavfs files cannot be copied onto * disk as they look huge. This change should not affect * XSAN as they should not setting these to -1.. */ @@ -10401,7 +10996,7 @@ munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, } error = copyout((caddr_t)&sfs, bufp, copy_size); } - + if (sizep != NULL) { *sizep = my_size; } @@ -10594,3 +11189,534 @@ vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused return 0; } +/* + * gets the vnode associated with the (unnamed) snapshot directory + * for a Filesystem. The snapshot directory vnode is returned with + * an iocount on it. + */ +int +vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx) +{ + int error; + + error = VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx); + +#if CLONE_SNAPSHOT_FALLBACKS_ENABLED + if (error == ENOTSUP) { + struct nameidata snapnd; + + /* + * Temporary fallback to /.snaps lookup + * XXX: To be removed. + */ + NDINIT(&snapnd, LOOKUP, OP_LOOKUP, USEDVP, + UIO_SYSSPACE, CAST_USER_ADDR_T(".snaps"), ctx); + snapnd.ni_dvp = rvp; + + if ((error = namei(&snapnd))) { + error = ENOTSUP; + *sdvpp = NULLVP; + } else { + *sdvpp = snapnd.ni_vp; + nameidone(&snapnd); + } + } +#endif /* CLONE_SNAPSHOT_FALLBACKS_ENABLED */ + return (error); +} + +/* + * Get the snapshot vnode. + * + * If successful, the call returns with an iocount on *rvpp ,*sdvpp and + * needs nameidone() on ndp. + * + * If the snapshot vnode exists it is returned in ndp->ni_vp. + * + * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is + * not needed. + */ +static int +vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp, + user_addr_t name, struct nameidata *ndp, int32_t op, +#if !CONFIG_TRIGGERS + __unused +#endif + enum path_operation pathop, + vfs_context_t ctx) +{ + int error, i; + caddr_t name_buf; + size_t name_len; + struct vfs_attr vfa; + + *sdvpp = NULLVP; + *rvpp = NULLVP; + + error = vnode_getfromfd(ctx, dirfd, rvpp); + if (error) + return (error); + + if (!vnode_isvroot(*rvpp)) { + error = EINVAL; + goto out; + } + + /* Make sure the filesystem supports snapshots */ + VFSATTR_INIT(&vfa); + VFSATTR_WANTED(&vfa, f_capabilities); + if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) || + !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) || + !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & + VOL_CAP_INT_SNAPSHOT)) || + !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & + VOL_CAP_INT_SNAPSHOT))) { + error = ENOTSUP; + goto out; + } + + error = vnode_get_snapdir(*rvpp, sdvpp, ctx); + if (error) + goto out; + + MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK); + error = copyinstr(name, name_buf, MAXPATHLEN, &name_len); + if (error) + goto out1; + + /* + * Some sanity checks- name can't be empty, "." or ".." or have slashes. + * (the length returned by copyinstr includes the terminating NUL) + */ + if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') || + (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) { + error = EINVAL; + goto out1; + } + for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++); + if (i < (int)name_len) { + error = EINVAL; + goto out1; + } + +#if CONFIG_MACF + if (op == CREATE) { + error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp), + name_buf); + } else if (op == DELETE) { + error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp), + name_buf); + } + if (error) + goto out1; +#endif + + /* Check if the snapshot already exists ... */ + NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1, + UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx); + ndp->ni_dvp = *sdvpp; + + error = namei(ndp); +out1: + FREE(name_buf, M_TEMP); +out: + if (error) { + if (*sdvpp) { + vnode_put(*sdvpp); + *sdvpp = NULLVP; + } + if (*rvpp) { + vnode_put(*rvpp); + *rvpp = NULLVP; + } + } + return (error); +} + +/* + * create a filesystem snapshot (for supporting filesystems) + * + * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL) + * We get to the (unnamed) snapshot directory vnode and create the vnode + * for the snapshot in it. + * + * Restrictions: + * + * a) Passed in name for snapshot cannot have slashes. + * b) name can't be "." or ".." + * + * Since this requires superuser privileges, vnode_authorize calls are not + * made. + */ +static int +snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags, + vfs_context_t ctx) +{ + vnode_t rvp, snapdvp; + int error; + struct nameidata namend; + + error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE, + OP_LINK, ctx); + if (error) + return (error); + + if (namend.ni_vp) { + vnode_put(namend.ni_vp); + error = EEXIST; + } else { + struct vnode_attr va; + vnode_t vp = NULLVP; + + VATTR_INIT(&va); + VATTR_SET(&va, va_type, VREG); + VATTR_SET(&va, va_mode, 0); + + error = vn_create(snapdvp, &vp, &namend, &va, + VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx); + if (!error && vp) + vnode_put(vp); +#if CLONE_SNAPSHOT_FALLBACKS_ENABLED + else if (error) { + error = VNOP_COPYFILE(rvp, rvp, NULLVP, &namend.ni_cnd, + 0, 0, ctx); + } +#endif /* CLONE_SNAPSHOT_FALLBACKS_ENABLED */ + } + + nameidone(&namend); + vnode_put(snapdvp); + vnode_put(rvp); + return (error); +} + +/* + * Delete a Filesystem snapshot + * + * get the vnode for the unnamed snapshot directory and the snapshot and + * delete the snapshot. + */ +static int +snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags, + vfs_context_t ctx) +{ + vnode_t rvp, snapdvp; + int error; + struct nameidata namend; + + error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE, + OP_UNLINK, ctx); + if (error) + goto out; + + error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd, + VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx); + + vnode_put(namend.ni_vp); + nameidone(&namend); + vnode_put(snapdvp); + vnode_put(rvp); +out: + return (error); +} + +/* + * Revert a filesystem to a snapshot + * + * Marks the filesystem to revert to the given snapshot on next mount. + */ +static int +snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags, + vfs_context_t ctx) +{ + int error; + vnode_t rvp; + mount_t mp; + struct fs_snapshot_revert_args revert_data; + struct componentname cnp; + caddr_t name_buf; + size_t name_len; + + error = vnode_getfromfd(ctx, dirfd, &rvp); + if (error) { + return (error); + } + mp = vnode_mount(rvp); + + /* + * Grab mount_iterref so that we can release the vnode, + * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync. + */ + error = mount_iterref (mp, 0); + vnode_put(rvp); + if (error) { + return (error); + } + + MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK); + error = copyinstr(name, name_buf, MAXPATHLEN, &name_len); + if (error) { + mount_iterdrop(mp); + FREE(name_buf, M_TEMP); + return (error); + } + + memset(&cnp, 0, sizeof(cnp)); + cnp.cn_pnbuf = (char *)name_buf; + cnp.cn_nameiop = LOOKUP; + cnp.cn_flags = ISLASTCN | HASBUF; + cnp.cn_pnlen = MAXPATHLEN; + cnp.cn_nameptr = cnp.cn_pnbuf; + cnp.cn_namelen = (int)name_len; + revert_data.sr_cnp = &cnp; + + error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx); + mount_iterdrop(mp); + FREE(name_buf, M_TEMP); + + if (error) { + /* If there was any error, try again using VNOP_IOCTL */ + + vnode_t snapdvp; + struct nameidata namend; + + error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP, + OP_LOOKUP, ctx); + if (error) { + return (error); + } + + +#ifndef APFSIOC_REVERT_TO_SNAPSHOT +#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t) +#endif + +#ifndef APFS_REVERT_TO_SNAPSHOT +#define APFS_REVERT_TO_SNAPSHOT IOCBASECMD(APFSIOC_REVERT_TO_SNAPSHOT) +#endif + + error = VNOP_IOCTL(namend.ni_vp, APFS_REVERT_TO_SNAPSHOT, (caddr_t) NULL, + 0, ctx); + + vnode_put(namend.ni_vp); + nameidone(&namend); + vnode_put(snapdvp); + vnode_put(rvp); + } + + return (error); +} + +/* + * rename a Filesystem snapshot + * + * get the vnode for the unnamed snapshot directory and the snapshot and + * rename the snapshot. This is a very specialised (and simple) case of + * rename(2) (which has to deal with a lot more complications). It differs + * slightly from rename(2) in that EEXIST is returned if the new name exists. + */ +static int +snapshot_rename(int dirfd, user_addr_t old, user_addr_t new, + __unused uint32_t flags, vfs_context_t ctx) +{ + vnode_t rvp, snapdvp; + int error, i; + caddr_t newname_buf; + size_t name_len; + vnode_t fvp; + struct nameidata *fromnd, *tond; + /* carving out a chunk for structs that are too big to be on stack. */ + struct { + struct nameidata from_node; + struct nameidata to_node; + } * __rename_data; + + MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK); + fromnd = &__rename_data->from_node; + tond = &__rename_data->to_node; + + error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE, + OP_UNLINK, ctx); + if (error) + goto out; + fvp = fromnd->ni_vp; + + MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK); + error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len); + if (error) + goto out1; + + /* + * Some sanity checks- new name can't be empty, "." or ".." or have + * slashes. + * (the length returned by copyinstr includes the terminating NUL) + * + * The FS rename VNOP is suppossed to handle this but we'll pick it + * off here itself. + */ + if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') || + (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) { + error = EINVAL; + goto out1; + } + for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++); + if (i < (int)name_len) { + error = EINVAL; + goto out1; + } + +#if CONFIG_MACF + error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp), + newname_buf); + if (error) + goto out1; +#endif + + NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2, + UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx); + tond->ni_dvp = snapdvp; + + error = namei(tond); + if (error) { + goto out2; + } else if (tond->ni_vp) { + /* + * snapshot rename behaves differently than rename(2) - if the + * new name exists, EEXIST is returned. + */ + vnode_put(tond->ni_vp); + error = EEXIST; + goto out2; + } + + error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP, + &tond->ni_cnd, ctx); + +out2: + nameidone(tond); +out1: + FREE(newname_buf, M_TEMP); + vnode_put(fvp); + vnode_put(snapdvp); + vnode_put(rvp); + nameidone(fromnd); +out: + FREE(__rename_data, M_TEMP); + return (error); +} + +/* + * Mount a Filesystem snapshot + * + * get the vnode for the unnamed snapshot directory and the snapshot and + * mount the snapshot. + */ +static int +snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory, + user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx) +{ + vnode_t rvp, snapdvp, snapvp, vp, pvp; + int error; + struct nameidata *snapndp, *dirndp; + /* carving out a chunk for structs that are too big to be on stack. */ + struct { + struct nameidata snapnd; + struct nameidata dirnd; + } * __snapshot_mount_data; + + MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data), + M_TEMP, M_WAITOK); + snapndp = &__snapshot_mount_data->snapnd; + dirndp = &__snapshot_mount_data->dirnd; + + error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP, + OP_LOOKUP, ctx); + if (error) + goto out; + + snapvp = snapndp->ni_vp; + if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) { + error = EIO; + goto out1; + } + + /* Get the vnode to be covered */ + NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT, + UIO_USERSPACE, directory, ctx); + error = namei(dirndp); + if (error) + goto out1; + + vp = dirndp->ni_vp; + pvp = dirndp->ni_dvp; + + if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) { + error = EINVAL; + } else { + mount_t mp = vnode_mount(rvp); + struct fs_snapshot_mount_args smnt_data; + + smnt_data.sm_mp = mp; + smnt_data.sm_cnp = &snapndp->ni_cnd; + error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp, + &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), 0, + KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx); + if (error) { + /* Retry with user passed args */ + error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, + vp, &dirndp->ni_cnd, CAST_USER_ADDR_T(mnt_data), 0, + 0, NULL, FALSE, ctx); + } + } + + vnode_put(vp); + vnode_put(pvp); + nameidone(dirndp); +out1: + vnode_put(snapvp); + vnode_put(snapdvp); + vnode_put(rvp); + nameidone(snapndp); +out: + FREE(__snapshot_mount_data, M_TEMP); + return (error); +} + +/* + * FS snapshot operations dispatcher + */ +int +fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap, + __unused int32_t *retval) +{ + int error; + vfs_context_t ctx = vfs_context_current(); + + error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0); + if (error) + return (error); + + switch (uap->op) { + case SNAPSHOT_OP_CREATE: + error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx); + break; + case SNAPSHOT_OP_DELETE: + error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx); + break; + case SNAPSHOT_OP_RENAME: + error = snapshot_rename(uap->dirfd, uap->name1, uap->name2, + uap->flags, ctx); + break; + case SNAPSHOT_OP_MOUNT: + error = snapshot_mount(uap->dirfd, uap->name1, uap->name2, + uap->data, uap->flags, ctx); + break; + case SNAPSHOT_OP_REVERT: + error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx); + break; + default: + error = ENOSYS; + } + + return (error); +}