+ error = chmod_vnode(vfs_context_current(), vp, vap);
+ (void)vnode_put(vp);
+ file_drop(fd);
+
+ return (error);
+}
+
+/*
+ * fchmod_extended: Change mode of a file given a file descriptor; with
+ * extended argument list (including extended security (ACL)).
+ *
+ * Parameters: p Process requesting to change file mode
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->mode File mode to set (same as 'chmod')
+ * uap->uid UID to set
+ * uap->gid GID to set
+ * uap->xsecurity ACL to set (or delete)
+ * uap->fd File descriptor of file to change mode
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ */
+int
+fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
+{
+ int error;
+ struct vnode_attr va;
+ kauth_filesec_t xsecdst;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+
+ VATTR_INIT(&va);
+ if (uap->mode != -1)
+ VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
+ if (uap->uid != KAUTH_UID_NONE)
+ VATTR_SET(&va, va_uid, uap->uid);
+ if (uap->gid != KAUTH_GID_NONE)
+ VATTR_SET(&va, va_gid, uap->gid);
+
+ xsecdst = NULL;
+ switch(uap->xsecurity) {
+ case USER_ADDR_NULL:
+ VATTR_SET(&va, va_acl, NULL);
+ break;
+ case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
+ VATTR_SET(&va, va_acl, NULL);
+ break;
+ /* not being set */
+ case CAST_USER_ADDR_T(-1):
+ break;
+ default:
+ if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
+ return(error);
+ VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+ }
+
+ error = fchmod1(p, uap->fd, &va);
+
+
+ switch(uap->xsecurity) {
+ case USER_ADDR_NULL:
+ case CAST_USER_ADDR_T(-1):
+ break;
+ default:
+ if (xsecdst != NULL)
+ kauth_filesec_free(xsecdst);
+ }
+ return(error);
+}
+
+int
+fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
+
+ return(fchmod1(p, uap->fd, &va));
+}
+
+
+/*
+ * Set ownership given a path name.
+ */
+/* ARGSUSED */
+static int
+fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
+ gid_t gid, int flag, enum uio_seg segflg)
+{
+ vnode_t vp;
+ struct vnode_attr va;
+ int error;
+ struct nameidata nd;
+ int follow;
+ kauth_action_t action;
+
+ AUDIT_ARG(owner, uid, gid);
+
+ follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
+ NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
+ path, ctx);
+ error = nameiat(&nd, fd);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+
+ nameidone(&nd);
+
+ VATTR_INIT(&va);
+ if (uid != (uid_t)VNOVAL)
+ VATTR_SET(&va, va_uid, uid);
+ if (gid != (gid_t)VNOVAL)
+ VATTR_SET(&va, va_gid, gid);
+
+#if CONFIG_MACF
+ error = mac_vnode_check_setowner(ctx, vp, uid, gid);
+ if (error)
+ goto out;
+#endif
+
+ /* preflight and authorize attribute changes */
+ if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
+ goto out;
+ if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
+ goto out;
+ error = vnode_setattr(vp, &va, ctx);
+
+#if CONFIG_MACF
+ if (error == 0)
+ mac_vnode_notify_setowner(ctx, vp, uid, gid);
+#endif
+
+out:
+ /*
+ * EACCES is only allowed from namei(); permissions failure should
+ * return EPERM, so we need to translate the error code.
+ */
+ if (error == EACCES)
+ error = EPERM;
+
+ vnode_put(vp);
+ return (error);
+}
+
+int
+chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
+{
+ return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
+ uap->uid, uap->gid, 0, UIO_USERSPACE));
+}
+
+int
+lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
+{
+ return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
+ uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
+}
+
+int
+fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
+{
+ if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
+ return (EINVAL);
+
+ return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
+ uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
+}
+
+/*
+ * Set ownership given a file descriptor.
+ */
+/* ARGSUSED */
+int
+fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+ vfs_context_t ctx = vfs_context_current();
+ vnode_t vp;
+ int error;
+ kauth_action_t action;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+ AUDIT_ARG(fd, uap->fd);
+
+ if ( (error = file_vnode(uap->fd, &vp)) )
+ return (error);
+
+ if ( (error = vnode_getwithref(vp)) ) {
+ file_drop(uap->fd);
+ return(error);
+ }
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+ VATTR_INIT(&va);
+ if (uap->uid != VNOVAL)
+ VATTR_SET(&va, va_uid, uap->uid);
+ if (uap->gid != VNOVAL)
+ VATTR_SET(&va, va_gid, uap->gid);
+
+#if NAMEDSTREAMS
+ /* chown calls are not allowed for resource forks. */
+ if (vp->v_flag & VISNAMEDSTREAM) {
+ error = EPERM;
+ goto out;
+ }
+#endif
+
+#if CONFIG_MACF
+ error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
+ if (error)
+ goto out;
+#endif
+
+ /* preflight and authorize attribute changes */
+ if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
+ goto out;
+ if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
+ if (error == EACCES)
+ error = EPERM;
+ goto out;
+ }
+ error = vnode_setattr(vp, &va, ctx);
+
+#if CONFIG_MACF
+ if (error == 0)
+ mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
+#endif
+
+out:
+ (void)vnode_put(vp);
+ file_drop(uap->fd);
+ return (error);
+}
+
+static int
+getutimes(user_addr_t usrtvp, struct timespec *tsp)
+{
+ int error;
+
+ if (usrtvp == USER_ADDR_NULL) {
+ struct timeval old_tv;
+ /* XXX Y2038 bug because of microtime argument */
+ microtime(&old_tv);
+ TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
+ tsp[1] = tsp[0];
+ } else {
+ if (IS_64BIT_PROCESS(current_proc())) {
+ struct user64_timeval tv[2];
+ error = copyin(usrtvp, (void *)tv, sizeof(tv));
+ if (error)
+ return (error);
+ TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
+ TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
+ } else {
+ struct user32_timeval tv[2];
+ error = copyin(usrtvp, (void *)tv, sizeof(tv));
+ if (error)
+ return (error);
+ TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
+ TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
+ }
+ }
+ return 0;
+}
+
+static int
+setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
+ int nullflag)
+{
+ int error;
+ struct vnode_attr va;
+ kauth_action_t action;
+
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_access_time, ts[0]);
+ VATTR_SET(&va, va_modify_time, ts[1]);
+ if (nullflag)
+ va.va_vaflags |= VA_UTIMES_NULL;
+
+#if NAMEDSTREAMS
+ /* utimes calls are not allowed for resource forks. */
+ if (vp->v_flag & VISNAMEDSTREAM) {
+ error = EPERM;
+ goto out;
+ }
+#endif
+
+#if CONFIG_MACF
+ error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
+ if (error)
+ goto out;
+#endif
+ if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
+ if (!nullflag && error == EACCES)
+ error = EPERM;
+ goto out;
+ }
+
+ /* since we may not need to auth anything, check here */
+ if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
+ if (!nullflag && error == EACCES)
+ error = EPERM;
+ goto out;
+ }
+ error = vnode_setattr(vp, &va, ctx);
+
+#if CONFIG_MACF
+ if (error == 0)
+ mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
+#endif
+
+out:
+ return error;
+}
+
+/*
+ * Set the access and modification times of a file.
+ */
+/* ARGSUSED */
+int
+utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
+{
+ struct timespec ts[2];
+ user_addr_t usrtvp;
+ int error;
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+
+ /*
+ * AUDIT: Needed to change the order of operations to do the
+ * name lookup first because auditing wants the path.
+ */
+ NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ nameidone(&nd);
+
+ /*
+ * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
+ * the current time instead.
+ */
+ usrtvp = uap->tptr;
+ if ((error = getutimes(usrtvp, ts)) != 0)
+ goto out;
+
+ error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
+
+out:
+ vnode_put(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * Set the access and modification times of a file.
+ */
+/* ARGSUSED */
+int
+futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
+{
+ struct timespec ts[2];
+ vnode_t vp;
+ user_addr_t usrtvp;
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+ usrtvp = uap->tptr;
+ if ((error = getutimes(usrtvp, ts)) != 0)
+ return (error);
+ if ((error = file_vnode(uap->fd, &vp)) != 0)
+ return (error);
+ if((error = vnode_getwithref(vp))) {
+ file_drop(uap->fd);
+ return(error);
+ }
+
+ error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
+ vnode_put(vp);
+ file_drop(uap->fd);
+ return(error);
+}
+
+/*
+ * Truncate a file given its path name.
+ */
+/* ARGSUSED */
+int
+truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp;
+ struct vnode_attr va;
+ vfs_context_t ctx = vfs_context_current();
+ int error;
+ struct nameidata nd;
+ kauth_action_t action;
+
+ if (uap->length < 0)
+ return(EINVAL);
+ NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ if ((error = namei(&nd)))
+ return (error);
+ vp = nd.ni_vp;
+
+ nameidone(&nd);
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_data_size, uap->length);
+
+#if CONFIG_MACF
+ error = mac_vnode_check_truncate(ctx, NOCRED, vp);
+ if (error)
+ goto out;
+#endif
+
+ if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
+ goto out;
+ if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
+ goto out;
+ error = vnode_setattr(vp, &va, ctx);
+
+#if CONFIG_MACF
+ if (error == 0)
+ mac_vnode_notify_truncate(ctx, NOCRED, vp);
+#endif
+
+out:
+ vnode_put(vp);
+ return (error);
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+/* ARGSUSED */
+int
+ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
+{
+ vfs_context_t ctx = vfs_context_current();
+ struct vnode_attr va;
+ vnode_t vp;
+ struct fileproc *fp;
+ int error ;
+ int fd = uap->fd;
+
+ AUDIT_ARG(fd, uap->fd);
+ if (uap->length < 0)
+ return(EINVAL);
+
+ if ( (error = fp_lookup(p,fd,&fp,0)) ) {
+ return(error);
+ }
+
+ switch (FILEGLOB_DTYPE(fp->f_fglob)) {
+ case DTYPE_PSXSHM:
+ error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
+ goto out;
+ case DTYPE_VNODE:
+ break;
+ default:
+ error = EINVAL;
+ goto out;
+ }
+
+ vp = (vnode_t)fp->f_fglob->fg_data;
+
+ if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
+ AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
+ error = EINVAL;
+ goto out;
+ }
+
+ if ((error = vnode_getwithref(vp)) != 0) {
+ goto out;
+ }
+
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+#if CONFIG_MACF
+ error = mac_vnode_check_truncate(ctx,
+ fp->f_fglob->fg_cred, vp);
+ if (error) {
+ (void)vnode_put(vp);
+ goto out;
+ }
+#endif
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_data_size, uap->length);
+ error = vnode_setattr(vp, &va, ctx);
+
+#if CONFIG_MACF
+ if (error == 0)
+ mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
+#endif
+
+ (void)vnode_put(vp);
+out:
+ file_drop(fd);
+ return (error);
+}
+
+
+/*
+ * Sync an open file with synchronized I/O _file_ integrity completion
+ */
+/* ARGSUSED */
+int
+fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
+{
+ __pthread_testcancel(1);
+ return(fsync_common(p, uap, MNT_WAIT));
+}
+
+
+/*
+ * Sync an open file with synchronized I/O _file_ integrity completion
+ *
+ * Notes: This is a legacy support function that does not test for
+ * thread cancellation points.
+ */
+/* ARGSUSED */
+int
+fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
+{
+ return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
+}
+
+
+/*
+ * Sync an open file with synchronized I/O _data_ integrity completion
+ */
+/* ARGSUSED */
+int
+fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
+{
+ __pthread_testcancel(1);
+ return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
+}
+
+
+/*
+ * fsync_common
+ *
+ * Common fsync code to support both synchronized I/O file integrity completion
+ * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
+ *
+ * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
+ * will only guarantee that the file data contents are retrievable. If
+ * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
+ * includes additional metadata unnecessary for retrieving the file data
+ * contents, such as atime, mtime, ctime, etc., also be committed to stable
+ * storage.
+ *
+ * Parameters: p The process
+ * uap->fd The descriptor to synchronize
+ * flags The data integrity flags
+ *
+ * Returns: int Success
+ * fp_getfvp:EBADF Bad file descriptor
+ * fp_getfvp:ENOTSUP fd does not refer to a vnode
+ * VNOP_FSYNC:??? unspecified
+ *
+ * Notes: We use struct fsync_args because it is a short name, and all
+ * caller argument structures are otherwise identical.
+ */
+static int
+fsync_common(proc_t p, struct fsync_args *uap, int flags)
+{
+ vnode_t vp;
+ struct fileproc *fp;
+ vfs_context_t ctx = vfs_context_current();
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+
+ if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
+ return (error);
+ if ( (error = vnode_getwithref(vp)) ) {
+ file_drop(uap->fd);
+ return(error);
+ }
+
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+ error = VNOP_FSYNC(vp, flags, ctx);
+
+#if NAMEDRSRCFORK
+ /* Sync resource fork shadow file if necessary. */
+ if ((error == 0) &&
+ (vp->v_flag & VISNAMEDSTREAM) &&
+ (vp->v_parent != NULLVP) &&
+ vnode_isshadow(vp) &&
+ (fp->f_flags & FP_WRITTEN)) {
+ (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
+ }
+#endif
+
+ (void)vnode_put(vp);
+ file_drop(uap->fd);
+ return (error);
+}
+
+/*
+ * Duplicate files. Source must be a file, target must be a file or
+ * must not exist.
+ *
+ * XXX Copyfile authorisation checking is woefully inadequate, and will not
+ * perform inheritance correctly.
+ */
+/* ARGSUSED */
+int
+copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
+{
+ vnode_t tvp, fvp, tdvp, sdvp;
+ struct nameidata fromnd, tond;
+ int error;
+ vfs_context_t ctx = vfs_context_current();
+#if CONFIG_MACF
+ struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
+ struct vnode_attr va;
+#endif
+
+ /* Check that the flags are valid. */
+
+ if (uap->flags & ~CPF_MASK) {
+ return(EINVAL);
+ }
+
+ NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
+ UIO_USERSPACE, uap->from, ctx);
+ if ((error = namei(&fromnd)))
+ return (error);
+ fvp = fromnd.ni_vp;
+
+ NDINIT(&tond, CREATE, OP_LINK,
+ LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
+ UIO_USERSPACE, uap->to, ctx);
+ if ((error = namei(&tond))) {
+ goto out1;
+ }
+ tdvp = tond.ni_dvp;
+ tvp = tond.ni_vp;
+
+ if (tvp != NULL) {
+ if (!(uap->flags & CPF_OVERWRITE)) {
+ error = EEXIST;
+ goto out;
+ }
+ }
+
+ if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
+ error = EISDIR;
+ goto out;
+ }
+
+ /* This calls existing MAC hooks for open */
+ if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
+ NULL))) {
+ goto out;
+ }
+
+ if (tvp) {
+ /*
+ * See unlinkat_internal for an explanation of the potential
+ * ENOENT from the MAC hook but the gist is that the MAC hook
+ * can fail because vn_getpath isn't able to return the full
+ * path. We choose to ignore this failure.
+ */
+ error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
+ if (error && error != ENOENT)
+ goto out;
+ error = 0;
+ }
+
+#if CONFIG_MACF
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_type, fvp->v_type);
+ /* Mask off all but regular access permissions */
+ VATTR_SET(&va, va_mode,
+ ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
+ error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
+ if (error)
+ goto out;
+#endif /* CONFIG_MACF */
+
+ if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
+ goto out;
+
+ if (fvp == tdvp)
+ error = EINVAL;
+ /*
+ * If source is the same as the destination (that is the
+ * same inode number) then there is nothing to do.
+ * (fixed to have POSIX semantics - CSM 3/2/98)
+ */
+ if (fvp == tvp)
+ error = -1;
+ if (!error)
+ error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
+out:
+ sdvp = tond.ni_startdir;
+ /*
+ * nameidone has to happen before we vnode_put(tdvp)
+ * since it may need to release the fs_nodelock on the tdvp
+ */
+ nameidone(&tond);
+
+ if (tvp)
+ vnode_put(tvp);
+ vnode_put(tdvp);
+ vnode_put(sdvp);
+out1:
+ vnode_put(fvp);
+
+ nameidone(&fromnd);
+
+ if (error == -1)
+ return (0);
+ return (error);
+}
+
+#define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
+
+/*
+ * Helper function for doing clones. The caller is expected to provide an
+ * iocounted source vnode and release it.
+ */
+static int
+clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
+ user_addr_t dst, uint32_t flags, vfs_context_t ctx)
+{
+ vnode_t tvp, tdvp;
+ struct nameidata tond;
+ int error;
+ int follow;
+ boolean_t free_src_acl;
+ boolean_t attr_cleanup;
+ enum vtype v_type;
+ kauth_action_t action;
+ struct componentname *cnp;
+ uint32_t defaulted;
+ struct vnode_attr va;
+ struct vnode_attr nva;
+
+ v_type = vnode_vtype(fvp);
+ switch (v_type) {
+ case VLNK:
+ /* FALLTHRU */
+ case VREG:
+ action = KAUTH_VNODE_ADD_FILE;
+ break;
+ case VDIR:
+ if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
+ fvp->v_mountedhere) {
+ return (EINVAL);
+ }
+ action = KAUTH_VNODE_ADD_SUBDIRECTORY;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ AUDIT_ARG(fd2, dst_dirfd);
+ AUDIT_ARG(value32, flags);
+
+ follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
+ NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
+ UIO_USERSPACE, dst, ctx);
+ if ((error = nameiat(&tond, dst_dirfd)))
+ return (error);
+ cnp = &tond.ni_cnd;
+ tdvp = tond.ni_dvp;
+ tvp = tond.ni_vp;
+
+ free_src_acl = FALSE;
+ attr_cleanup = FALSE;
+
+ if (tvp != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+
+ if (vnode_mount(tdvp) != vnode_mount(fvp)) {
+ error = EXDEV;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp)))
+ goto out;
+#endif
+ if ((error = vnode_authorize(tdvp, NULL, action, ctx)))
+ goto out;
+
+ action = KAUTH_VNODE_GENERIC_READ_BITS;
+ if (data_read_authorised)
+ action &= ~KAUTH_VNODE_READ_DATA;
+ if ((error = vnode_authorize(fvp, NULL, action, ctx)))
+ goto out;
+
+ /*
+ * certain attributes may need to be changed from the source, we ask for
+ * those here.
+ */
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_uid);
+ VATTR_WANTED(&va, va_gid);
+ VATTR_WANTED(&va, va_mode);
+ VATTR_WANTED(&va, va_flags);
+ VATTR_WANTED(&va, va_acl);
+
+ if ((error = vnode_getattr(fvp, &va, ctx)) != 0)
+ goto out;
+
+ VATTR_INIT(&nva);
+ VATTR_SET(&nva, va_type, v_type);
+ if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
+ VATTR_SET(&nva, va_acl, va.va_acl);
+ free_src_acl = TRUE;
+ }
+
+ /* Handle ACL inheritance, initialize vap. */
+ if (v_type == VLNK) {
+ error = vnode_authattr_new(tdvp, &nva, 0, ctx);
+ } else {
+ error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
+ if (error)
+ goto out;
+ attr_cleanup = TRUE;
+ }
+
+ /*
+ * We've got initial values for all security parameters,
+ * If we are superuser, then we can change owners to be the
+ * same as the source. Both superuser and the owner have default
+ * WRITE_SECURITY privileges so all other fields can be taken
+ * from source as well.
+ */
+ if (vfs_context_issuser(ctx)) {
+ if (VATTR_IS_SUPPORTED(&va, va_uid))
+ VATTR_SET(&nva, va_uid, va.va_uid);
+ if (VATTR_IS_SUPPORTED(&va, va_gid))
+ VATTR_SET(&nva, va_gid, va.va_gid);
+ }
+ if (VATTR_IS_SUPPORTED(&va, va_mode))
+ VATTR_SET(&nva, va_mode, va.va_mode);
+ if (VATTR_IS_SUPPORTED(&va, va_flags)) {
+ VATTR_SET(&nva, va_flags,
+ ((va.va_flags & ~SF_RESTRICTED) | /* Turn off from source */
+ (nva.va_flags & SF_RESTRICTED)));
+ }
+
+ error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva,
+ VNODE_CLONEFILE_DEFAULT, ctx);
+
+ if (!error && tvp) {
+ int update_flags = 0;
+#if CONFIG_FSE
+ int fsevent;
+#endif /* CONFIG_FSE */
+
+#if CONFIG_MACF
+ (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
+ VNODE_LABEL_CREATE, ctx);
+#endif
+ /*
+ * If some of the requested attributes weren't handled by the
+ * VNOP, use our fallback code.
+ */
+ if (!VATTR_ALL_SUPPORTED(&va))
+ (void)vnode_setattr_fallback(tvp, &nva, ctx);
+
+ // Make sure the name & parent pointers are hooked up
+ if (tvp->v_name == NULL)
+ update_flags |= VNODE_UPDATE_NAME;
+ if (tvp->v_parent == NULLVP)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ if (update_flags) {
+ (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
+ cnp->cn_namelen, cnp->cn_hash, update_flags);
+ }
+
+#if CONFIG_FSE
+ switch (vnode_vtype(tvp)) {
+ case VLNK:
+ /* FALLTHRU */
+ case VREG:
+ fsevent = FSE_CREATE_FILE;
+ break;
+ case VDIR:
+ fsevent = FSE_CREATE_DIR;
+ break;
+ default:
+ goto out;
+ }
+
+ if (need_fsevent(fsevent, tvp)) {
+ add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
+ FSE_ARG_DONE);
+ }
+#endif /* CONFIG_FSE */
+ }
+
+out:
+ if (attr_cleanup)
+ vn_attribute_cleanup(&nva, defaulted);
+ if (free_src_acl && va.va_acl)
+ kauth_acl_free(va.va_acl);
+ nameidone(&tond);
+ if (tvp)
+ vnode_put(tvp);
+ vnode_put(tdvp);
+ return (error);
+}
+
+/*
+ * clone files or directories, target must not exist.
+ */
+/* ARGSUSED */
+int
+clonefileat(__unused proc_t p, struct clonefileat_args *uap,
+ __unused int32_t *retval)
+{
+ vnode_t fvp;
+ struct nameidata fromnd;
+ int follow;
+ int error;
+ vfs_context_t ctx = vfs_context_current();
+
+ /* Check that the flags are valid. */
+ if (uap->flags & ~CLONE_NOFOLLOW)
+ return (EINVAL);
+
+ AUDIT_ARG(fd, uap->src_dirfd);
+
+ follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
+ NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
+ UIO_USERSPACE, uap->src, ctx);
+ if ((error = nameiat(&fromnd, uap->src_dirfd)))
+ return (error);
+
+ fvp = fromnd.ni_vp;
+ nameidone(&fromnd);
+
+ error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
+ uap->flags, ctx);
+
+ vnode_put(fvp);
+ return (error);
+}
+
+int
+fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
+ __unused int32_t *retval)
+{
+ vnode_t fvp;
+ struct fileproc *fp;
+ int error;
+ vfs_context_t ctx = vfs_context_current();
+
+ AUDIT_ARG(fd, uap->src_fd);
+ error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
+ if (error)
+ return (error);
+
+ if ((fp->f_fglob->fg_flag & FREAD) == 0) {
+ AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
+ error = EBADF;
+ goto out;
+ }
+
+ if ((error = vnode_getwithref(fvp)))
+ goto out;
+
+ AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
+
+ error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
+ uap->flags, ctx);
+
+ vnode_put(fvp);
+out:
+ file_drop(uap->src_fd);
+ return (error);
+}
+
+/*
+ * Rename files. Source and destination must either both be directories,
+ * or both not be directories. If target is a directory, it must be empty.
+ */
+/* ARGSUSED */
+static int
+renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
+ int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
+{
+ if (flags & ~VFS_RENAME_FLAGS_MASK)
+ return EINVAL;
+
+ if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL))
+ return EINVAL;
+
+ vnode_t tvp, tdvp;
+ vnode_t fvp, fdvp;
+ struct nameidata *fromnd, *tond;
+ int error;
+ int do_retry;
+ int retry_count;
+ int mntrename;
+ int need_event;
+ const char *oname = NULL;
+ char *from_name = NULL, *to_name = NULL;
+ int from_len=0, to_len=0;
+ int holding_mntlock;
+ mount_t locked_mp = NULL;
+ vnode_t oparent = NULLVP;
+#if CONFIG_FSE
+ fse_info from_finfo, to_finfo;
+#endif
+ int from_truncated=0, to_truncated;
+ int batched = 0;
+ struct vnode_attr *fvap, *tvap;
+ int continuing = 0;
+ /* carving out a chunk for structs that are too big to be on stack. */
+ struct {
+ struct nameidata from_node, to_node;
+ struct vnode_attr fv_attr, tv_attr;
+ } * __rename_data;
+ MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
+ fromnd = &__rename_data->from_node;
+ tond = &__rename_data->to_node;
+
+ holding_mntlock = 0;
+ do_retry = 0;
+ retry_count = 0;
+retry:
+ fvp = tvp = NULL;
+ fdvp = tdvp = NULL;
+ fvap = tvap = NULL;
+ mntrename = FALSE;
+
+ NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
+ segflg, from, ctx);
+ fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
+
+ NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
+ segflg, to, ctx);
+ tond->ni_flag = NAMEI_COMPOUNDRENAME;
+
+continue_lookup:
+ if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
+ if ( (error = nameiat(fromnd, fromfd)) )
+ goto out1;
+ fdvp = fromnd->ni_dvp;
+ fvp = fromnd->ni_vp;
+
+ if (fvp && fvp->v_type == VDIR)
+ tond->ni_cnd.cn_flags |= WILLBEDIR;
+ }
+
+ if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
+ if ( (error = nameiat(tond, tofd)) ) {
+ /*
+ * Translate error code for rename("dir1", "dir2/.").
+ */
+ if (error == EISDIR && fvp->v_type == VDIR)
+ error = EINVAL;
+ goto out1;
+ }
+ tdvp = tond->ni_dvp;
+ tvp = tond->ni_vp;
+ }
+
+#if DEVELOPMENT || DEBUG
+ /*
+ * XXX VSWAP: Check for entitlements or special flag here
+ * so we can restrict access appropriately.
+ */
+#else /* DEVELOPMENT || DEBUG */
+
+ if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
+ error = EPERM;
+ goto out1;
+ }
+
+ if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
+ error = EPERM;
+ goto out1;
+ }
+#endif /* DEVELOPMENT || DEBUG */
+
+ if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
+ error = ENOENT;
+ goto out1;
+ }
+
+ if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
+ error = EEXIST;
+ goto out1;
+ }
+
+ batched = vnode_compound_rename_available(fdvp);
+ if (!fvp) {
+ /*
+ * Claim: this check will never reject a valid rename.
+ * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
+ * Suppose fdvp and tdvp are not on the same mount.
+ * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
+ * then you can't move it to within another dir on the same mountpoint.
+ * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
+ *
+ * If this check passes, then we are safe to pass these vnodes to the same FS.
+ */
+ if (fdvp->v_mount != tdvp->v_mount) {
+ error = EXDEV;
+ goto out1;
+ }
+ goto skipped_lookup;
+ }
+
+ if (!batched) {
+ error = vn_authorize_renamex(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, flags, NULL);
+ if (error) {
+ if (error == ENOENT) {
+ assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ /*
+ * We encountered a race where after doing the namei, tvp stops
+ * being valid. If so, simply re-drive the rename call from the
+ * top.
+ */
+ do_retry = 1;
+ retry_count += 1;
+ }
+ }
+ goto out1;
+ }
+ }
+
+ /*
+ * If the source and destination are the same (i.e. they're
+ * links to the same vnode) and the target file system is
+ * case sensitive, then there is nothing to do.
+ *
+ * XXX Come back to this.
+ */
+ if (fvp == tvp) {
+ int pathconf_val;
+
+ /*
+ * Note: if _PC_CASE_SENSITIVE selector isn't supported,
+ * then assume that this file system is case sensitive.
+ */
+ if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
+ pathconf_val != 0) {
+ goto out1;
+ }
+ }
+
+ /*
+ * Allow the renaming of mount points.
+ * - target must not exist
+ * - target must reside in the same directory as source
+ * - union mounts cannot be renamed
+ * - "/" cannot be renamed
+ *
+ * XXX Handle this in VFS after a continued lookup (if we missed
+ * in the cache to start off)
+ *
+ * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
+ * we'll skip past here. The file system is responsible for
+ * checking that @tvp is not a descendent of @fvp and vice versa
+ * so it should always return EINVAL if either @tvp or @fvp is the
+ * root of a volume.
+ */
+ if ((fvp->v_flag & VROOT) &&
+ (fvp->v_type == VDIR) &&
+ (tvp == NULL) &&
+ (fvp->v_mountedhere == NULL) &&
+ (fdvp == tdvp) &&
+ ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
+ (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
+ vnode_t coveredvp;
+
+ /* switch fvp to the covered vnode */
+ coveredvp = fvp->v_mount->mnt_vnodecovered;
+ if ( (vnode_getwithref(coveredvp)) ) {
+ error = ENOENT;
+ goto out1;
+ }
+ vnode_put(fvp);
+
+ fvp = coveredvp;
+ mntrename = TRUE;
+ }
+ /*
+ * Check for cross-device rename.
+ */
+ if ((fvp->v_mount != tdvp->v_mount) ||
+ (tvp && (fvp->v_mount != tvp->v_mount))) {
+ error = EXDEV;
+ goto out1;
+ }
+
+ /*
+ * If source is the same as the destination (that is the
+ * same inode number) then there is nothing to do...
+ * EXCEPT if the underlying file system supports case
+ * insensitivity and is case preserving. In this case
+ * the file system needs to handle the special case of
+ * getting the same vnode as target (fvp) and source (tvp).
+ *
+ * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
+ * and _PC_CASE_PRESERVING can have this exception, and they need to
+ * handle the special case of getting the same vnode as target and
+ * source. NOTE: Then the target is unlocked going into vnop_rename,
+ * so not to cause locking problems. There is a single reference on tvp.
+ *
+ * NOTE - that fvp == tvp also occurs if they are hard linked and
+ * that correct behaviour then is just to return success without doing
+ * anything.
+ *
+ * XXX filesystem should take care of this itself, perhaps...
+ */
+ if (fvp == tvp && fdvp == tdvp) {
+ if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
+ !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
+ fromnd->ni_cnd.cn_namelen)) {
+ goto out1;
+ }
+ }
+
+ if (holding_mntlock && fvp->v_mount != locked_mp) {
+ /*
+ * we're holding a reference and lock
+ * on locked_mp, but it no longer matches
+ * what we want to do... so drop our hold
+ */
+ mount_unlock_renames(locked_mp);
+ mount_drop(locked_mp, 0);
+ holding_mntlock = 0;
+ }
+ if (tdvp != fdvp && fvp->v_type == VDIR) {
+ /*
+ * serialize renames that re-shape
+ * the tree... if holding_mntlock is
+ * set, then we're ready to go...
+ * otherwise we
+ * first need to drop the iocounts
+ * we picked up, second take the
+ * lock to serialize the access,
+ * then finally start the lookup
+ * process over with the lock held
+ */
+ if (!holding_mntlock) {
+ /*
+ * need to grab a reference on
+ * the mount point before we
+ * drop all the iocounts... once
+ * the iocounts are gone, the mount
+ * could follow
+ */
+ locked_mp = fvp->v_mount;
+ mount_ref(locked_mp, 0);
+
+ /*
+ * nameidone has to happen before we vnode_put(tvp)
+ * since it may need to release the fs_nodelock on the tvp
+ */
+ nameidone(tond);
+
+ if (tvp)
+ vnode_put(tvp);
+ vnode_put(tdvp);
+
+ /*
+ * nameidone has to happen before we vnode_put(fdvp)
+ * since it may need to release the fs_nodelock on the fvp
+ */
+ nameidone(fromnd);
+
+ vnode_put(fvp);
+ vnode_put(fdvp);
+
+ mount_lock_renames(locked_mp);
+ holding_mntlock = 1;
+
+ goto retry;
+ }
+ } else {
+ /*
+ * when we dropped the iocounts to take
+ * the lock, we allowed the identity of
+ * the various vnodes to change... if they did,
+ * we may no longer be dealing with a rename
+ * that reshapes the tree... once we're holding
+ * the iocounts, the vnodes can't change type
+ * so we're free to drop the lock at this point
+ * and continue on
+ */
+ if (holding_mntlock) {
+ mount_unlock_renames(locked_mp);
+ mount_drop(locked_mp, 0);
+ holding_mntlock = 0;
+ }
+ }
+
+ // save these off so we can later verify that fvp is the same
+ oname = fvp->v_name;
+ oparent = fvp->v_parent;
+
+skipped_lookup:
+#if CONFIG_FSE
+ need_event = need_fsevent(FSE_RENAME, fdvp);
+ if (need_event) {
+ if (fvp) {
+ get_fse_info(fvp, &from_finfo, ctx);
+ } else {
+ error = vfs_get_notify_attributes(&__rename_data->fv_attr);
+ if (error) {
+ goto out1;
+ }
+
+ fvap = &__rename_data->fv_attr;
+ }
+
+ if (tvp) {
+ get_fse_info(tvp, &to_finfo, ctx);
+ } else if (batched) {
+ error = vfs_get_notify_attributes(&__rename_data->tv_attr);
+ if (error) {
+ goto out1;
+ }
+
+ tvap = &__rename_data->tv_attr;
+ }
+ }
+#else
+ need_event = 0;
+#endif /* CONFIG_FSE */
+
+ if (need_event || kauth_authorize_fileop_has_listeners()) {
+ if (from_name == NULL) {
+ GET_PATH(from_name);
+ if (from_name == NULL) {
+ error = ENOMEM;
+ goto out1;
+ }
+ }
+
+ from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
+
+ if (to_name == NULL) {
+ GET_PATH(to_name);
+ if (to_name == NULL) {
+ error = ENOMEM;
+ goto out1;
+ }
+ }
+
+ to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
+ }
+ error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
+ tdvp, &tvp, &tond->ni_cnd, tvap,
+ flags, ctx);
+
+ if (holding_mntlock) {
+ /*
+ * we can drop our serialization
+ * lock now
+ */
+ mount_unlock_renames(locked_mp);
+ mount_drop(locked_mp, 0);
+ holding_mntlock = 0;
+ }
+ if (error) {
+ if (error == EKEEPLOOKING) {
+ if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+ if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+ panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
+ }
+ }
+
+ fromnd->ni_vp = fvp;
+ tond->ni_vp = tvp;
+
+ goto continue_lookup;
+ }
+
+ /*
+ * We may encounter a race in the VNOP where the destination didn't
+ * exist when we did the namei, but it does by the time we go and
+ * try to create the entry. In this case, we should re-drive this rename
+ * call from the top again. Currently, only HFS bubbles out ERECYCLE,
+ * but other filesystems susceptible to this race could return it, too.
+ */
+ if (error == ERECYCLE) {
+ do_retry = 1;
+ }
+
+ /*
+ * For compound VNOPs, the authorization callback may return
+ * ENOENT in case of racing hardlink lookups hitting the name
+ * cache, redrive the lookup.
+ */
+ if (batched && error == ENOENT) {
+ assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ do_retry = 1;
+ retry_count += 1;
+ }
+ }
+
+ goto out1;
+ }
+
+ /* call out to allow 3rd party notification of rename.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_RENAME,
+ (uintptr_t)from_name, (uintptr_t)to_name);
+ if (flags & VFS_RENAME_SWAP) {
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_RENAME,
+ (uintptr_t)to_name, (uintptr_t)from_name);
+ }
+
+#if CONFIG_FSE
+ if (from_name != NULL && to_name != NULL) {
+ if (from_truncated || to_truncated) {
+ // set it here since only the from_finfo gets reported up to user space
+ from_finfo.mode |= FSE_TRUNCATED_PATH;
+ }
+
+ if (tvap && tvp) {
+ vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
+ }
+ if (fvap) {
+ vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
+ }
+
+ if (tvp) {
+ add_fsevent(FSE_RENAME, ctx,
+ FSE_ARG_STRING, from_len, from_name,
+ FSE_ARG_FINFO, &from_finfo,
+ FSE_ARG_STRING, to_len, to_name,
+ FSE_ARG_FINFO, &to_finfo,
+ FSE_ARG_DONE);
+ if (flags & VFS_RENAME_SWAP) {
+ /*
+ * Strictly speaking, swap is the equivalent of
+ * *three* renames. FSEvents clients should only take
+ * the events as a hint, so we only bother reporting
+ * two.
+ */
+ add_fsevent(FSE_RENAME, ctx,
+ FSE_ARG_STRING, to_len, to_name,
+ FSE_ARG_FINFO, &to_finfo,
+ FSE_ARG_STRING, from_len, from_name,
+ FSE_ARG_FINFO, &from_finfo,
+ FSE_ARG_DONE);
+ }
+ } else {
+ add_fsevent(FSE_RENAME, ctx,
+ FSE_ARG_STRING, from_len, from_name,
+ FSE_ARG_FINFO, &from_finfo,
+ FSE_ARG_STRING, to_len, to_name,
+ FSE_ARG_DONE);
+ }
+ }
+#endif /* CONFIG_FSE */
+
+ /*
+ * update filesystem's mount point data
+ */
+ if (mntrename) {
+ char *cp, *pathend, *mpname;
+ char * tobuf;
+ struct mount *mp;
+ int maxlen;
+ size_t len = 0;
+
+ mp = fvp->v_mountedhere;
+
+ if (vfs_busy(mp, LK_NOWAIT)) {
+ error = EBUSY;
+ goto out1;
+ }
+ MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+
+ if (UIO_SEG_IS_USER_SPACE(segflg))
+ error = copyinstr(to, tobuf, MAXPATHLEN, &len);
+ else
+ error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
+ if (!error) {
+ /* find current mount point prefix */
+ pathend = &mp->mnt_vfsstat.f_mntonname[0];
+ for (cp = pathend; *cp != '\0'; ++cp) {
+ if (*cp == '/')
+ pathend = cp + 1;
+ }
+ /* find last component of target name */
+ for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
+ if (*cp == '/')
+ mpname = cp + 1;
+ }
+ /* append name to prefix */
+ maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
+ bzero(pathend, maxlen);
+ strlcpy(pathend, mpname, maxlen);
+ }
+ FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
+
+ vfs_unbusy(mp);
+ }
+ /*
+ * fix up name & parent pointers. note that we first
+ * check that fvp has the same name/parent pointers it
+ * had before the rename call... this is a 'weak' check
+ * at best...
+ *
+ * XXX oparent and oname may not be set in the compound vnop case
+ */
+ if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
+ int update_flags;
+
+ update_flags = VNODE_UPDATE_NAME;
+
+ if (fdvp != tdvp)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
+ }
+out1:
+ if (to_name != NULL) {
+ RELEASE_PATH(to_name);
+ to_name = NULL;
+ }
+ if (from_name != NULL) {
+ RELEASE_PATH(from_name);
+ from_name = NULL;
+ }
+ if (holding_mntlock) {
+ mount_unlock_renames(locked_mp);
+ mount_drop(locked_mp, 0);
+ holding_mntlock = 0;
+ }
+ if (tdvp) {
+ /*
+ * nameidone has to happen before we vnode_put(tdvp)
+ * since it may need to release the fs_nodelock on the tdvp
+ */
+ nameidone(tond);
+
+ if (tvp)
+ vnode_put(tvp);
+ vnode_put(tdvp);
+ }
+ if (fdvp) {
+ /*
+ * nameidone has to happen before we vnode_put(fdvp)
+ * since it may need to release the fs_nodelock on the fdvp
+ */
+ nameidone(fromnd);
+
+ if (fvp)
+ vnode_put(fvp);
+ vnode_put(fdvp);
+ }
+
+ /*
+ * If things changed after we did the namei, then we will re-drive
+ * this rename call from the top.
+ */
+ if (do_retry) {
+ do_retry = 0;
+ goto retry;
+ }
+
+ FREE(__rename_data, M_TEMP);
+ return (error);
+}
+
+int
+rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
+{
+ return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
+ AT_FDCWD, uap->to, UIO_USERSPACE, 0));
+}
+
+int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
+{
+ return renameat_internal(
+ vfs_context_current(),
+ uap->fromfd, uap->from,
+ uap->tofd, uap->to,
+ UIO_USERSPACE, uap->flags);
+}
+
+int
+renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
+{
+ return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
+ uap->tofd, uap->to, UIO_USERSPACE, 0));
+}
+
+/*
+ * Make a directory file.
+ *
+ * Returns: 0 Success
+ * EEXIST
+ * namei:???
+ * vnode_authorize:???
+ * vn_create:???
+ */
+/* ARGSUSED */
+static int
+mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
+ enum uio_seg segflg)
+{
+ vnode_t vp, dvp;
+ int error;
+ int update_flags = 0;
+ int batched;
+ struct nameidata nd;
+
+ AUDIT_ARG(mode, vap->va_mode);
+ NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
+ path, ctx);
+ nd.ni_cnd.cn_flags |= WILLBEDIR;
+ nd.ni_flag = NAMEI_COMPOUNDMKDIR;
+
+continue_lookup:
+ error = nameiat(&nd, fd);
+ if (error)
+ return (error);
+ dvp = nd.ni_dvp;
+ vp = nd.ni_vp;
+
+ if (vp != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+
+ batched = vnode_compound_mkdir_available(dvp);
+
+ VATTR_SET(vap, va_type, VDIR);
+
+ /*
+ * XXX
+ * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
+ * only get EXISTS or EISDIR for existing path components, and not that it could see
+ * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
+ * it will fail in a spurious manner. Need to figure out if this is valid behavior.
+ */
+ if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
+ if (error == EACCES || error == EPERM) {
+ int error2;
+
+ nameidone(&nd);
+ vnode_put(dvp);
+ dvp = NULLVP;
+
+ /*
+ * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
+ * rather than EACCESS if the target exists.
+ */
+ NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
+ path, ctx);
+ error2 = nameiat(&nd, fd);
+ if (error2) {
+ goto out;
+ } else {
+ vp = nd.ni_vp;
+ error = EEXIST;
+ goto out;
+ }
+ }
+
+ goto out;
+ }
+
+ /*
+ * make the directory
+ */
+ if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
+ if (error == EKEEPLOOKING) {
+ nd.ni_vp = vp;
+ goto continue_lookup;
+ }
+
+ goto out;
+ }
+
+ // Make sure the name & parent pointers are hooked up
+ if (vp->v_name == NULL)
+ update_flags |= VNODE_UPDATE_NAME;
+ if (vp->v_parent == NULLVP)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ if (update_flags)
+ vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
+
+#if CONFIG_FSE
+ add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
+#endif
+
+out:
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+
+ if (vp)
+ vnode_put(vp);
+ if (dvp)
+ vnode_put(dvp);
+
+ return (error);
+}
+
+/*
+ * mkdir_extended: Create a directory; with extended security (ACL).
+ *
+ * Parameters: p Process requesting to create the directory
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->path Path of directory to create
+ * uap->mode Access permissions to set
+ * uap->xsecurity ACL to set
+ *
+ * Returns: 0 Success
+ * !0 Not success
+ *
+ */
+int
+mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
+{
+ int ciferror;
+ kauth_filesec_t xsecdst;
+ struct vnode_attr va;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+
+ xsecdst = NULL;
+ if ((uap->xsecurity != USER_ADDR_NULL) &&
+ ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
+ return ciferror;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
+ if (xsecdst != NULL)
+ VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+
+ ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
+ UIO_USERSPACE);
+ if (xsecdst != NULL)
+ kauth_filesec_free(xsecdst);
+ return ciferror;
+}
+
+int
+mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
+
+ return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
+ UIO_USERSPACE));
+}
+
+int
+mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
+
+ return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
+ UIO_USERSPACE));
+}
+
+static int
+rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
+ enum uio_seg segflg)
+{
+ vnode_t vp, dvp;
+ int error;
+ struct nameidata nd;
+ char *path = NULL;
+ int len=0;
+ int has_listeners = 0;
+ int need_event = 0;
+ int truncated = 0;
+#if CONFIG_FSE
+ struct vnode_attr va;
+#endif /* CONFIG_FSE */
+ struct vnode_attr *vap = NULL;
+ int restart_count = 0;
+ int batched;
+
+ int restart_flag;
+
+ /*
+ * This loop exists to restart rmdir in the unlikely case that two
+ * processes are simultaneously trying to remove the same directory
+ * containing orphaned appleDouble files.
+ */
+ do {
+ NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
+ segflg, dirpath, ctx);
+ nd.ni_flag = NAMEI_COMPOUNDRMDIR;
+continue_lookup:
+ restart_flag = 0;
+ vap = NULL;
+
+ error = nameiat(&nd, fd);
+ if (error)
+ return (error);
+
+ dvp = nd.ni_dvp;
+ vp = nd.ni_vp;
+
+ if (vp) {
+ batched = vnode_compound_rmdir_available(vp);
+
+ if (vp->v_flag & VROOT) {
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ error = EBUSY;
+ goto out;
+ }
+
+#if DEVELOPMENT || DEBUG
+ /*
+ * XXX VSWAP: Check for entitlements or special flag here
+ * so we can restrict access appropriately.
+ */
+#else /* DEVELOPMENT || DEBUG */
+
+ if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
+ error = EPERM;
+ goto out;
+ }
+#endif /* DEVELOPMENT || DEBUG */
+
+ /*
+ * Removed a check here; we used to abort if vp's vid
+ * was not the same as what we'd seen the last time around.
+ * I do not think that check was valid, because if we retry
+ * and all dirents are gone, the directory could legitimately
+ * be recycled but still be present in a situation where we would
+ * have had permission to delete. Therefore, we won't make
+ * an effort to preserve that check now that we may not have a
+ * vp here.
+ */
+
+ if (!batched) {
+ error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
+ if (error) {
+ if (error == ENOENT) {
+ assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ restart_flag = 1;
+ restart_count += 1;
+ }
+ }
+ goto out;
+ }
+ }
+ } else {
+ batched = 1;
+
+ if (!vnode_compound_rmdir_available(dvp)) {
+ panic("No error, but no compound rmdir?");
+ }
+ }
+
+#if CONFIG_FSE
+ fse_info finfo;
+
+ need_event = need_fsevent(FSE_DELETE, dvp);
+ if (need_event) {
+ if (!batched) {
+ get_fse_info(vp, &finfo, ctx);
+ } else {
+ error = vfs_get_notify_attributes(&va);
+ if (error) {
+ goto out;
+ }
+
+ vap = &va;
+ }
+ }
+#endif
+ has_listeners = kauth_authorize_fileop_has_listeners();
+ if (need_event || has_listeners) {
+ if (path == NULL) {
+ GET_PATH(path);
+ if (path == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ }
+
+ len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
+#if CONFIG_FSE
+ if (truncated) {
+ finfo.mode |= FSE_TRUNCATED_PATH;
+ }
+#endif
+ }
+
+ error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
+ nd.ni_vp = vp;
+ if (vp == NULLVP) {
+ /* Couldn't find a vnode */
+ goto out;
+ }
+
+ if (error == EKEEPLOOKING) {
+ goto continue_lookup;
+ } else if (batched && error == ENOENT) {
+ assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ /*
+ * For compound VNOPs, the authorization callback
+ * may return ENOENT in case of racing hard link lookups
+ * redrive the lookup.
+ */
+ restart_flag = 1;
+ restart_count += 1;
+ goto out;
+ }
+ }
+#if CONFIG_APPLEDOUBLE
+ /*
+ * Special case to remove orphaned AppleDouble
+ * files. I don't like putting this in the kernel,
+ * but carbon does not like putting this in carbon either,
+ * so here we are.
+ */
+ if (error == ENOTEMPTY) {
+ error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
+ if (error == EBUSY) {
+ goto out;
+ }
+
+
+ /*
+ * Assuming everything went well, we will try the RMDIR again
+ */
+ if (!error)
+ error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
+ }
+#endif /* CONFIG_APPLEDOUBLE */
+ /*
+ * Call out to allow 3rd party notification of delete.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ if (!error) {
+ if (has_listeners) {
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_DELETE,
+ (uintptr_t)vp,
+ (uintptr_t)path);
+ }
+
+ if (vp->v_flag & VISHARDLINK) {
+ // see the comment in unlink1() about why we update
+ // the parent of a hard link when it is removed
+ vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
+ }
+
+#if CONFIG_FSE
+ if (need_event) {
+ if (vap) {
+ vnode_get_fse_info_from_vap(vp, &finfo, vap);
+ }
+ add_fsevent(FSE_DELETE, ctx,
+ FSE_ARG_STRING, len, path,
+ FSE_ARG_FINFO, &finfo,
+ FSE_ARG_DONE);
+ }
+#endif
+ }
+
+out:
+ if (path != NULL) {
+ RELEASE_PATH(path);
+ path = NULL;
+ }
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+ vnode_put(dvp);
+
+ if (vp)
+ vnode_put(vp);
+
+ if (restart_flag == 0) {
+ wakeup_one((caddr_t)vp);
+ return (error);
+ }
+ tsleep(vp, PVFS, "rm AD", 1);
+
+ } while (restart_flag != 0);
+
+ return (error);
+
+}
+
+/*
+ * Remove a directory file.
+ */
+/* ARGSUSED */
+int
+rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
+{
+ return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
+ CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
+}
+
+/* Get direntry length padded to 8 byte alignment */
+#define DIRENT64_LEN(namlen) \
+ ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
+
+errno_t
+vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
+ int *numdirent, vfs_context_t ctxp)
+{
+ /* Check if fs natively supports VNODE_READDIR_EXTENDED */
+ if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
+ ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
+ return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
+ } else {
+ size_t bufsize;
+ void * bufptr;
+ uio_t auio;
+ struct direntry *entry64;
+ struct dirent *dep;
+ int bytesread;
+ int error;
+
+ /*
+ * Our kernel buffer needs to be smaller since re-packing
+ * will expand each dirent. The worse case (when the name
+ * length is 3) corresponds to a struct direntry size of 32
+ * bytes (8-byte aligned) and a struct dirent size of 12 bytes
+ * (4-byte aligned). So having a buffer that is 3/8 the size
+ * will prevent us from reading more than we can pack.
+ *
+ * Since this buffer is wired memory, we will limit the
+ * buffer size to a maximum of 32K. We would really like to
+ * use 32K in the MIN(), but we use magic number 87371 to
+ * prevent uio_resid() * 3 / 8 from overflowing.
+ */
+ bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
+ MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
+ if (bufptr == NULL) {
+ return ENOMEM;
+ }
+
+ auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
+ uio_addiov(auio, (uintptr_t)bufptr, bufsize);
+ auio->uio_offset = uio->uio_offset;
+
+ error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
+
+ dep = (struct dirent *)bufptr;
+ bytesread = bufsize - uio_resid(auio);
+
+ MALLOC(entry64, struct direntry *, sizeof(struct direntry),
+ M_TEMP, M_WAITOK);
+ /*
+ * Convert all the entries and copy them out to user's buffer.
+ */
+ while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
+ size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
+
+ bzero(entry64, enbufsize);
+ /* Convert a dirent to a dirent64. */
+ entry64->d_ino = dep->d_ino;
+ entry64->d_seekoff = 0;
+ entry64->d_reclen = enbufsize;
+ entry64->d_namlen = dep->d_namlen;
+ entry64->d_type = dep->d_type;
+ bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
+
+ /* Move to next entry. */
+ dep = (struct dirent *)((char *)dep + dep->d_reclen);
+
+ /* Copy entry64 to user's buffer. */
+ error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
+ }
+
+ /* Update the real offset using the offset we got from VNOP_READDIR. */
+ if (error == 0) {
+ uio->uio_offset = auio->uio_offset;
+ }
+ uio_free(auio);
+ FREE(bufptr, M_TEMP);
+ FREE(entry64, M_TEMP);
+ return (error);
+ }
+}
+
+#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
+
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+static int
+getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
+ off_t *offset, int flags)
+{
+ vnode_t vp;
+ struct vfs_context context = *vfs_context_current(); /* local copy */
+ struct fileproc *fp;
+ uio_t auio;
+ int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ off_t loff;
+ int error, eofflag, numdirent;
+ char uio_buf[ UIO_SIZEOF(1) ];
+
+ error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
+ if (error) {
+ return (error);
+ }
+ if ((fp->f_fglob->fg_flag & FREAD) == 0) {
+ AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
+ error = EBADF;
+ goto out;
+ }
+
+ if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
+ bufsize = GETDIRENTRIES_MAXBUFSIZE;
+
+#if CONFIG_MACF
+ error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
+ if (error)
+ goto out;
+#endif
+ if ( (error = vnode_getwithref(vp)) ) {
+ goto out;
+ }
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+unionread:
+ if (vp->v_type != VDIR) {
+ (void)vnode_put(vp);
+ error = EINVAL;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_readdir(&context, vp);
+ if (error != 0) {
+ (void)vnode_put(vp);
+ goto out;
+ }
+#endif /* MAC */
+
+ loff = fp->f_fglob->fg_offset;
+ auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, bufp, bufsize);
+
+ if (flags & VNODE_READDIR_EXTENDED) {
+ error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
+ fp->f_fglob->fg_offset = uio_offset(auio);
+ } else {
+ error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
+ fp->f_fglob->fg_offset = uio_offset(auio);
+ }
+ if (error) {
+ (void)vnode_put(vp);
+ goto out;
+ }
+
+ if ((user_ssize_t)bufsize == uio_resid(auio)){
+ if (union_dircheckp) {
+ error = union_dircheckp(&vp, fp, &context);
+ if (error == -1)
+ goto unionread;
+ if (error) {
+ (void)vnode_put(vp);
+ goto out;
+ }
+ }
+
+ if ((vp->v_mount->mnt_flag & MNT_UNION)) {
+ struct vnode *tvp = vp;
+ if (lookup_traverse_union(tvp, &vp, &context) == 0) {
+ vnode_ref(vp);
+ fp->f_fglob->fg_data = (caddr_t) vp;
+ fp->f_fglob->fg_offset = 0;
+ vnode_rele(tvp);
+ vnode_put(tvp);
+ goto unionread;
+ }
+ vp = tvp;
+ }
+ }
+
+ vnode_put(vp);
+ if (offset) {
+ *offset = loff;
+ }
+
+ *bytesread = bufsize - uio_resid(auio);
+out:
+ file_drop(fd);
+ return (error);
+}
+
+
+int
+getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
+{
+ off_t offset;
+ ssize_t bytesread;
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+ error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
+
+ if (error == 0) {
+ if (proc_is64bit(p)) {
+ user64_long_t base = (user64_long_t)offset;
+ error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
+ } else {
+ user32_long_t base = (user32_long_t)offset;
+ error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
+ }
+ *retval = bytesread;
+ }
+ return (error);
+}
+
+int
+getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
+{
+ off_t offset;
+ ssize_t bytesread;
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+ error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
+
+ if (error == 0) {
+ *retval = bytesread;
+ error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
+ }
+ return (error);
+}
+
+
+/*
+ * Set the mode mask for creation of filesystem nodes.
+ * XXX implement xsecurity
+ */
+#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
+static int
+umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
+{
+ struct filedesc *fdp;
+
+ AUDIT_ARG(mask, newmask);
+ proc_fdlock(p);
+ fdp = p->p_fd;
+ *retval = fdp->fd_cmask;
+ fdp->fd_cmask = newmask & ALLPERMS;
+ proc_fdunlock(p);
+ return (0);
+}
+
+/*
+ * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
+ *
+ * Parameters: p Process requesting to set the umask
+ * uap User argument descriptor (see below)
+ * retval umask of the process (parameter p)
+ *
+ * Indirect: uap->newmask umask to set
+ * uap->xsecurity ACL to set
+ *
+ * Returns: 0 Success
+ * !0 Not success
+ *
+ */
+int
+umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
+{
+ int ciferror;
+ kauth_filesec_t xsecdst;
+
+ xsecdst = KAUTH_FILESEC_NONE;
+ if (uap->xsecurity != USER_ADDR_NULL) {
+ if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
+ return ciferror;
+ } else {
+ xsecdst = KAUTH_FILESEC_NONE;
+ }
+
+ ciferror = umask1(p, uap->newmask, xsecdst, retval);
+
+ if (xsecdst != KAUTH_FILESEC_NONE)
+ kauth_filesec_free(xsecdst);
+ return ciferror;
+}
+
+int
+umask(proc_t p, struct umask_args *uap, int32_t *retval)
+{
+ return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
+}
+
+/*
+ * Void all references to file by ripping underlying filesystem
+ * away from vnode.
+ */
+/* ARGSUSED */
+int
+revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp;
+ struct vnode_attr va;
+ vfs_context_t ctx = vfs_context_current();
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+ uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+
+ nameidone(&nd);
+
+ if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
+ error = ENOTSUP;
+ goto out;
+ }
+
+ if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
+ error = EBUSY;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_revoke(ctx, vp);
+ if (error)
+ goto out;
+#endif
+
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_uid);
+ if ((error = vnode_getattr(vp, &va, ctx)))
+ goto out;
+ if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
+ (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
+ goto out;
+ if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
+ VNOP_REVOKE(vp, REVOKEALL, ctx);
+out:
+ vnode_put(vp);
+ return (error);
+}
+
+
+/*
+ * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
+ * The following system calls are designed to support features
+ * which are specific to the HFS & HFS Plus volume formats
+ */
+
+
+/*
+ * Obtain attribute information on objects in a directory while enumerating
+ * the directory.
+ */
+/* ARGSUSED */
+int
+getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
+{
+ vnode_t vp;
+ struct fileproc *fp;
+ uio_t auio = NULL;
+ int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ uint32_t count, savecount;
+ uint32_t newstate;
+ int error, eofflag;
+ uint32_t loff;
+ struct attrlist attributelist;
+ vfs_context_t ctx = vfs_context_current();
+ int fd = uap->fd;
+ char uio_buf[ UIO_SIZEOF(1) ];
+ kauth_action_t action;
+
+ AUDIT_ARG(fd, fd);
+
+ /* Get the attributes into kernel space */
+ if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
+ return(error);
+ }
+ if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
+ return(error);
+ }
+ savecount = count;
+ if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
+ return (error);
+ }
+ if ((fp->f_fglob->fg_flag & FREAD) == 0) {
+ AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
+ error = EBADF;
+ goto out;
+ }
+
+
+#if CONFIG_MACF
+ error = mac_file_check_change_offset(vfs_context_ucred(ctx),
+ fp->f_fglob);
+ if (error)
+ goto out;
+#endif
+
+
+ if ( (error = vnode_getwithref(vp)) )
+ goto out;
+
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+unionread:
+ if (vp->v_type != VDIR) {
+ (void)vnode_put(vp);
+ error = EINVAL;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_readdir(ctx, vp);
+ if (error != 0) {
+ (void)vnode_put(vp);
+ goto out;
+ }
+#endif /* MAC */
+
+ /* set up the uio structure which will contain the users return buffer */
+ loff = fp->f_fglob->fg_offset;
+ auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, uap->buffer, uap->buffersize);
+
+ /*
+ * If the only item requested is file names, we can let that past with
+ * just LIST_DIRECTORY. If they want any other attributes, that means
+ * they need SEARCH as well.
+ */
+ action = KAUTH_VNODE_LIST_DIRECTORY;
+ if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
+ attributelist.fileattr || attributelist.dirattr)
+ action |= KAUTH_VNODE_SEARCH;
+
+ if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
+
+ /* Believe it or not, uap->options only has 32-bits of valid
+ * info, so truncate before extending again */
+
+ error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
+ (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
+ }
+
+ if (error) {
+ (void) vnode_put(vp);
+ goto out;
+ }
+
+ /*
+ * If we've got the last entry of a directory in a union mount
+ * then reset the eofflag and pretend there's still more to come.
+ * The next call will again set eofflag and the buffer will be empty,
+ * so traverse to the underlying directory and do the directory
+ * read there.
+ */
+ if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
+ if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
+ eofflag = 0;
+ } else { // Empty buffer
+ struct vnode *tvp = vp;
+ if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
+ vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
+ fp->f_fglob->fg_data = (caddr_t) vp;
+ fp->f_fglob->fg_offset = 0; // reset index for new dir
+ count = savecount;
+ vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
+ vnode_put(tvp);
+ goto unionread;
+ }
+ vp = tvp;
+ }
+ }
+
+ (void)vnode_put(vp);
+
+ if (error)
+ goto out;
+ fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
+
+ if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
+ goto out;
+ if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
+ goto out;
+ if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
+ goto out;
+
+ *retval = eofflag; /* similar to getdirentries */
+ error = 0;
+out:
+ file_drop(fd);
+ return (error); /* return error earlier, an retval of 0 or 1 now */
+
+} /* end of getdirentriesattr system call */
+
+/*
+* Exchange data between two files
+*/
+
+/* ARGSUSED */
+int
+exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
+{
+
+ struct nameidata fnd, snd;
+ vfs_context_t ctx = vfs_context_current();
+ vnode_t fvp;
+ vnode_t svp;
+ int error;
+ u_int32_t nameiflags;
+ char *fpath = NULL;
+ char *spath = NULL;
+ int flen=0, slen=0;
+ int from_truncated=0, to_truncated=0;
+#if CONFIG_FSE
+ fse_info f_finfo, s_finfo;
+#endif
+
+ nameiflags = 0;
+ if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
+
+ NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path1, ctx);
+
+ error = namei(&fnd);
+ if (error)
+ goto out2;
+
+ nameidone(&fnd);
+ fvp = fnd.ni_vp;
+
+ NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
+ UIO_USERSPACE, uap->path2, ctx);
+
+ error = namei(&snd);
+ if (error) {
+ vnode_put(fvp);
+ goto out2;
+ }
+ nameidone(&snd);
+ svp = snd.ni_vp;
+
+ /*
+ * if the files are the same, return an inval error
+ */
+ if (svp == fvp) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * if the files are on different volumes, return an error
+ */
+ if (svp->v_mount != fvp->v_mount) {
+ error = EXDEV;
+ goto out;
+ }
+
+ /* If they're not files, return an error */
+ if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
+ error = EINVAL;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_exchangedata(ctx,
+ fvp, svp);
+ if (error)
+ goto out;
+#endif
+ if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
+ ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
+ goto out;
+
+ if (
+#if CONFIG_FSE
+ need_fsevent(FSE_EXCHANGE, fvp) ||
+#endif
+ kauth_authorize_fileop_has_listeners()) {
+ GET_PATH(fpath);
+ GET_PATH(spath);
+ if (fpath == NULL || spath == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+
+ flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
+ slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
+
+#if CONFIG_FSE
+ get_fse_info(fvp, &f_finfo, ctx);
+ get_fse_info(svp, &s_finfo, ctx);
+ if (from_truncated || to_truncated) {
+ // set it here since only the f_finfo gets reported up to user space
+ f_finfo.mode |= FSE_TRUNCATED_PATH;
+ }
+#endif
+ }
+ /* Ok, make the call */
+ error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
+
+ if (error == 0) {
+ const char *tmpname;
+
+ if (fpath != NULL && spath != NULL) {
+ /* call out to allow 3rd party notification of exchangedata.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
+ (uintptr_t)fpath, (uintptr_t)spath);
+ }
+ name_cache_lock();
+
+ tmpname = fvp->v_name;
+ fvp->v_name = svp->v_name;
+ svp->v_name = tmpname;
+
+ if (fvp->v_parent != svp->v_parent) {
+ vnode_t tmp;
+
+ tmp = fvp->v_parent;
+ fvp->v_parent = svp->v_parent;
+ svp->v_parent = tmp;
+ }
+ name_cache_unlock();
+
+#if CONFIG_FSE
+ if (fpath != NULL && spath != NULL) {
+ add_fsevent(FSE_EXCHANGE, ctx,
+ FSE_ARG_STRING, flen, fpath,
+ FSE_ARG_FINFO, &f_finfo,
+ FSE_ARG_STRING, slen, spath,
+ FSE_ARG_FINFO, &s_finfo,
+ FSE_ARG_DONE);
+ }
+#endif
+ }
+
+out:
+ if (fpath != NULL)
+ RELEASE_PATH(fpath);
+ if (spath != NULL)
+ RELEASE_PATH(spath);
+ vnode_put(svp);
+ vnode_put(fvp);
+out2:
+ return (error);
+}
+
+/*
+ * Return (in MB) the amount of freespace on the given vnode's volume.
+ */
+uint32_t freespace_mb(vnode_t vp);
+
+uint32_t
+freespace_mb(vnode_t vp)
+{
+ vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
+ return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
+ vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
+}
+
+#if CONFIG_SEARCHFS
+
+/* ARGSUSED */
+
+int
+searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp, tvp;
+ int i, error=0;
+ int fserror = 0;
+ struct nameidata nd;
+ struct user64_fssearchblock searchblock;
+ struct searchstate *state;
+ struct attrlist *returnattrs;
+ struct timeval timelimit;
+ void *searchparams1,*searchparams2;
+ uio_t auio = NULL;
+ int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ uint32_t nummatches;
+ int mallocsize;
+ uint32_t nameiflags;
+ vfs_context_t ctx = vfs_context_current();
+ char uio_buf[ UIO_SIZEOF(1) ];
+
+ /* Start by copying in fsearchblock parameter list */
+ if (IS_64BIT_PROCESS(p)) {
+ error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
+ timelimit.tv_sec = searchblock.timelimit.tv_sec;
+ timelimit.tv_usec = searchblock.timelimit.tv_usec;
+ }
+ else {
+ struct user32_fssearchblock tmp_searchblock;
+
+ error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
+ // munge into 64-bit version
+ searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
+ searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
+ searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
+ searchblock.maxmatches = tmp_searchblock.maxmatches;
+ /*
+ * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
+ * from a 32 bit long, and tv_usec is already a signed 32 bit int.
+ */
+ timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
+ timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
+ searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
+ searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
+ searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
+ searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
+ searchblock.searchattrs = tmp_searchblock.searchattrs;
+ }
+ if (error)
+ return(error);
+
+ /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
+ */
+ if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
+ searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
+ return(EINVAL);
+
+ /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
+ /* It all has to do into local memory and it's not that big so we might as well put it all together. */
+ /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
+ /* block. */
+ /* */
+ /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
+ /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
+ /* assumes the size is still 556 bytes it will continue to work */
+
+ mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
+ sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
+
+ MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
+
+ /* Now set up the various pointers to the correct place in our newly allocated memory */
+
+ searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
+ returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
+ state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
+
+ /* Now copy in the stuff given our local variables. */
+
+ if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
+ goto freeandexit;
+
+ if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
+ goto freeandexit;
+
+ if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
+ goto freeandexit;
+
+ if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
+ goto freeandexit;
+
+ /*
+ * When searching a union mount, need to set the
+ * start flag at the first call on each layer to
+ * reset state for the new volume.
+ */
+ if (uap->options & SRCHFS_START)
+ state->ss_union_layer = 0;
+ else
+ uap->options |= state->ss_union_flags;
+ state->ss_union_flags = 0;
+
+ /*
+ * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
+ * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
+ * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
+ * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
+ * validate the user-supplied data offset of the attrreference_t, we'll do it here.
+ */
+
+ if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
+ attrreference_t* string_ref;
+ u_int32_t* start_length;
+ user64_size_t param_length;
+
+ /* validate searchparams1 */
+ param_length = searchblock.sizeofsearchparams1;
+ /* skip the word that specifies length of the buffer */
+ start_length= (u_int32_t*) searchparams1;
+ start_length= start_length+1;
+ string_ref= (attrreference_t*) start_length;
+
+ /* ensure no negative offsets or too big offsets */
+ if (string_ref->attr_dataoffset < 0 ) {
+ error = EINVAL;
+ goto freeandexit;
+ }
+ if (string_ref->attr_length > MAXPATHLEN) {
+ error = EINVAL;
+ goto freeandexit;
+ }
+
+ /* Check for pointer overflow in the string ref */
+ if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
+ error = EINVAL;
+ goto freeandexit;
+ }
+
+ if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
+ error = EINVAL;
+ goto freeandexit;
+ }
+ if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
+ error = EINVAL;
+ goto freeandexit;
+ }
+ }
+
+ /* set up the uio structure which will contain the users return buffer */
+ auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
+
+ nameiflags = 0;
+ if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
+ NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+
+ error = namei(&nd);
+ if (error)
+ goto freeandexit;
+ vp = nd.ni_vp;
+ nameidone(&nd);
+
+ /*
+ * Switch to the root vnode for the volume
+ */
+ error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
+ vnode_put(vp);
+ if (error)
+ goto freeandexit;
+ vp = tvp;
+
+ /*
+ * If it's a union mount, the path lookup takes
+ * us to the top layer. But we may need to descend
+ * to a lower layer. For non-union mounts the layer
+ * is always zero.
+ */
+ for (i = 0; i < (int) state->ss_union_layer; i++) {
+ if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
+ break;
+ tvp = vp;
+ vp = vp->v_mount->mnt_vnodecovered;
+ if (vp == NULL) {
+ vnode_put(tvp);
+ error = ENOENT;
+ goto freeandexit;
+ }
+ error = vnode_getwithref(vp);
+ vnode_put(tvp);
+ if (error)
+ goto freeandexit;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
+ if (error) {
+ vnode_put(vp);
+ goto freeandexit;
+ }
+#endif
+
+
+ /*
+ * If searchblock.maxmatches == 0, then skip the search. This has happened
+ * before and sometimes the underlying code doesnt deal with it well.
+ */
+ if (searchblock.maxmatches == 0) {
+ nummatches = 0;
+ goto saveandexit;
+ }
+
+ /*
+ * Allright, we have everything we need, so lets make that call.
+ *
+ * We keep special track of the return value from the file system:
+ * EAGAIN is an acceptable error condition that shouldn't keep us
+ * from copying out any results...
+ */
+
+ fserror = VNOP_SEARCHFS(vp,
+ searchparams1,
+ searchparams2,
+ &searchblock.searchattrs,
+ (u_long)searchblock.maxmatches,
+ &timelimit,
+ returnattrs,
+ &nummatches,
+ (u_long)uap->scriptcode,
+ (u_long)uap->options,
+ auio,
+ (struct searchstate *) &state->ss_fsstate,
+ ctx);
+
+ /*
+ * If it's a union mount we need to be called again
+ * to search the mounted-on filesystem.
+ */
+ if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
+ state->ss_union_flags = SRCHFS_START;
+ state->ss_union_layer++; // search next layer down
+ fserror = EAGAIN;
+ }
+
+saveandexit:
+
+ vnode_put(vp);
+
+ /* Now copy out the stuff that needs copying out. That means the number of matches, the
+ search state. Everything was already put into he return buffer by the vop call. */
+
+ if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
+ goto freeandexit;
+
+ if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
+ goto freeandexit;
+
+ error = fserror;
+
+freeandexit:
+
+ FREE(searchparams1,M_TEMP);
+
+ return(error);
+
+
+} /* end of searchfs system call */
+
+#else /* CONFIG_SEARCHFS */
+
+int
+searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
+{
+ return (ENOTSUP);
+}
+
+#endif /* CONFIG_SEARCHFS */
+
+
+lck_grp_attr_t * nspace_group_attr;
+lck_attr_t * nspace_lock_attr;
+lck_grp_t * nspace_mutex_group;
+
+lck_mtx_t nspace_handler_lock;
+lck_mtx_t nspace_handler_exclusion_lock;
+
+time_t snapshot_timestamp=0;
+int nspace_allow_virtual_devs=0;
+
+void nspace_handler_init(void);
+
+typedef struct nspace_item_info {
+ struct vnode *vp;
+ void *arg;
+ uint64_t op;
+ uint32_t vid;
+ uint32_t flags;
+ uint32_t token;
+ uint32_t refcount;
+} nspace_item_info;
+
+#define MAX_NSPACE_ITEMS 128
+nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
+uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
+uint32_t nspace_token_id=0;
+uint32_t nspace_handler_timeout = 15; // seconds
+
+#define NSPACE_ITEM_NEW 0x0001
+#define NSPACE_ITEM_PROCESSING 0x0002
+#define NSPACE_ITEM_DEAD 0x0004
+#define NSPACE_ITEM_CANCELLED 0x0008
+#define NSPACE_ITEM_DONE 0x0010
+#define NSPACE_ITEM_RESET_TIMER 0x0020
+
+#define NSPACE_ITEM_NSPACE_EVENT 0x0040
+#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
+
+#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
+
+//#pragma optimization_level 0
+
+typedef enum {
+ NSPACE_HANDLER_NSPACE = 0,
+ NSPACE_HANDLER_SNAPSHOT = 1,
+
+ NSPACE_HANDLER_COUNT,
+} nspace_type_t;
+
+typedef struct {
+ uint64_t handler_tid;
+ struct proc *handler_proc;
+ int handler_busy;
+} nspace_handler_t;
+
+nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
+
+/* namespace fsctl functions */
+static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
+static int nspace_item_flags_for_type(nspace_type_t nspace_type);
+static int nspace_open_flags_for_type(nspace_type_t nspace_type);
+static nspace_type_t nspace_type_for_op(uint64_t op);
+static int nspace_is_special_process(struct proc *proc);
+static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
+static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
+static int validate_namespace_args (int is64bit, int size);
+static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
+
+
+static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
+{
+ switch(nspace_type) {
+ case NSPACE_HANDLER_NSPACE:
+ return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
+ case NSPACE_HANDLER_SNAPSHOT:
+ return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
+ default:
+ printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
+ return 0;
+ }
+}
+
+static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
+{
+ switch(nspace_type) {
+ case NSPACE_HANDLER_NSPACE:
+ return NSPACE_ITEM_NSPACE_EVENT;
+ case NSPACE_HANDLER_SNAPSHOT:
+ return NSPACE_ITEM_SNAPSHOT_EVENT;
+ default:
+ printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
+ return 0;
+ }
+}
+
+static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
+{
+ switch(nspace_type) {
+ case NSPACE_HANDLER_NSPACE:
+ return FREAD | FWRITE | O_EVTONLY;
+ case NSPACE_HANDLER_SNAPSHOT:
+ return FREAD | O_EVTONLY;
+ default:
+ printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
+ return 0;
+ }
+}
+
+static inline nspace_type_t nspace_type_for_op(uint64_t op)
+{
+ switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
+ case NAMESPACE_HANDLER_NSPACE_EVENT:
+ return NSPACE_HANDLER_NSPACE;
+ case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
+ return NSPACE_HANDLER_SNAPSHOT;
+ default:
+ printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
+ return NSPACE_HANDLER_NSPACE;
+ }
+}
+
+static inline int nspace_is_special_process(struct proc *proc)
+{
+ int i;
+ for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
+ if (proc == nspace_handlers[i].handler_proc)
+ return 1;
+ }
+ return 0;
+}
+
+void
+nspace_handler_init(void)
+{
+ nspace_lock_attr = lck_attr_alloc_init();
+ nspace_group_attr = lck_grp_attr_alloc_init();
+ nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
+ lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
+ lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
+ memset(&nspace_items[0], 0, sizeof(nspace_items));
+}
+
+void
+nspace_proc_exit(struct proc *p)
+{
+ int i, event_mask = 0;
+
+ for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
+ if (p == nspace_handlers[i].handler_proc) {
+ event_mask |= nspace_item_flags_for_type(i);
+ nspace_handlers[i].handler_tid = 0;
+ nspace_handlers[i].handler_proc = NULL;
+ }
+ }
+
+ if (event_mask == 0) {
+ return;
+ }
+
+ lck_mtx_lock(&nspace_handler_lock);
+ if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
+ // if this process was the snapshot handler, zero snapshot_timeout
+ snapshot_timestamp = 0;
+ }
+
+ //
+ // unblock anyone that's waiting for the handler that died
+ //
+ for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+ if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
+
+ if ( nspace_items[i].flags & event_mask ) {
+
+ if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
+ vnode_lock_spin(nspace_items[i].vp);
+ nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
+ vnode_unlock(nspace_items[i].vp);
+ }
+ nspace_items[i].vp = NULL;
+ nspace_items[i].vid = 0;
+ nspace_items[i].flags = NSPACE_ITEM_DONE;
+ nspace_items[i].token = 0;
+
+ wakeup((caddr_t)&(nspace_items[i].vp));
+ }
+ }
+ }
+
+ wakeup((caddr_t)&nspace_item_idx);
+ lck_mtx_unlock(&nspace_handler_lock);
+}
+
+
+int
+resolve_nspace_item(struct vnode *vp, uint64_t op)
+{
+ return resolve_nspace_item_ext(vp, op, NULL);
+}
+
+int
+resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
+{
+ int i, error, keep_waiting;
+ struct timespec ts;
+ nspace_type_t nspace_type = nspace_type_for_op(op);
+
+ // only allow namespace events on regular files, directories and symlinks.
+ if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
+ return 0;
+ }
+
+ //
+ // if this is a snapshot event and the vnode is on a
+ // disk image just pretend nothing happened since any
+ // change to the disk image will cause the disk image
+ // itself to get backed up and this avoids multi-way
+ // deadlocks between the snapshot handler and the ever
+ // popular diskimages-helper process. the variable
+ // nspace_allow_virtual_devs allows this behavior to
+ // be overridden (for use by the Mobile TimeMachine
+ // testing infrastructure which uses disk images)
+ //
+ if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
+ && (vp->v_mount != NULL)
+ && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
+ && !nspace_allow_virtual_devs) {
+
+ return 0;
+ }
+
+ // if (thread_tid(current_thread()) == namespace_handler_tid) {
+ if (nspace_handlers[nspace_type].handler_proc == NULL) {
+ return 0;
+ }
+
+ if (nspace_is_special_process(current_proc())) {
+ return EDEADLK;
+ }
+
+ lck_mtx_lock(&nspace_handler_lock);
+
+retry:
+ for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+ if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
+ break;
+ }
+ }
+
+ if (i >= MAX_NSPACE_ITEMS) {
+ for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+ if (nspace_items[i].flags == 0) {
+ break;
+ }
+ }
+ } else {
+ nspace_items[i].refcount++;
+ }
+
+ if (i >= MAX_NSPACE_ITEMS) {
+ ts.tv_sec = nspace_handler_timeout;
+ ts.tv_nsec = 0;
+
+ error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
+ if (error == 0) {
+ // an entry got free'd up, go see if we can get a slot
+ goto retry;
+ } else {
+ lck_mtx_unlock(&nspace_handler_lock);
+ return error;
+ }
+ }
+
+ //
+ // if it didn't already exist, add it. if it did exist
+ // we'll get woken up when someone does a wakeup() on
+ // the slot in the nspace_items table.
+ //
+ if (vp != nspace_items[i].vp) {
+ nspace_items[i].vp = vp;
+ nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
+ nspace_items[i].op = op;
+ nspace_items[i].vid = vnode_vid(vp);
+ nspace_items[i].flags = NSPACE_ITEM_NEW;
+ nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
+ if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
+ if (arg) {
+ vnode_lock_spin(vp);
+ vp->v_flag |= VNEEDSSNAPSHOT;
+ vnode_unlock(vp);
+ }
+ }
+
+ nspace_items[i].token = 0;
+ nspace_items[i].refcount = 1;
+
+ wakeup((caddr_t)&nspace_item_idx);
+ }
+
+ //
+ // Now go to sleep until the handler does a wakeup on this
+ // slot in the nspace_items table (or we timeout).
+ //
+ keep_waiting = 1;
+ while(keep_waiting) {
+ ts.tv_sec = nspace_handler_timeout;
+ ts.tv_nsec = 0;
+ error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
+
+ if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
+ error = 0;
+ } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
+ error = nspace_items[i].token;
+ } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
+ if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
+ nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
+ continue;
+ } else {
+ error = ETIMEDOUT;
+ }
+ } else if (error == 0) {
+ // hmmm, why did we get woken up?
+ printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
+ nspace_items[i].token);
+ }
+
+ if (--nspace_items[i].refcount == 0) {
+ nspace_items[i].vp = NULL; // clear this so that no one will match on it again
+ nspace_items[i].arg = NULL;
+ nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
+ nspace_items[i].flags = 0; // this clears it for re-use
+ }
+ wakeup(&nspace_token_id);
+ keep_waiting = 0;
+ }
+
+ lck_mtx_unlock(&nspace_handler_lock);
+
+ return error;
+}
+
+int nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
+{
+ int snapshot_error = 0;
+
+ if (vp == NULL) {
+ return 0;
+ }
+
+ /* Swap files are special; skip them */
+ if (vnode_isswap(vp)) {
+ return 0;
+ }
+
+ if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
+ // the change time is within this epoch
+ int error;
+
+ error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
+ if (error == EDEADLK) {
+ snapshot_error = 0;
+ } else if (error) {
+ if (error == EAGAIN) {
+ printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
+ } else if (error == EINTR) {
+ // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
+ snapshot_error = EINTR;
+ }
+ }
+ }
+
+ return snapshot_error;
+}
+
+int
+get_nspace_item_status(struct vnode *vp, int32_t *status)
+{
+ int i;
+
+ lck_mtx_lock(&nspace_handler_lock);
+ for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+ if (nspace_items[i].vp == vp) {
+ break;
+ }
+ }
+
+ if (i >= MAX_NSPACE_ITEMS) {
+ lck_mtx_unlock(&nspace_handler_lock);
+ return ENOENT;
+ }
+
+ *status = nspace_items[i].flags;
+ lck_mtx_unlock(&nspace_handler_lock);
+ return 0;
+}
+
+
+#if 0
+static int
+build_volfs_path(struct vnode *vp, char *path, int *len)
+{
+ struct vnode_attr va;
+ int ret;
+
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_fsid);
+ VATTR_WANTED(&va, va_fileid);
+
+ if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
+ *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
+ ret = -1;
+ } else {
+ *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
+ ret = 0;
+ }
+
+ return ret;
+}
+#endif
+
+//
+// Note: this function does NOT check permissions on all of the
+// parent directories leading to this vnode. It should only be
+// called on behalf of a root process. Otherwise a process may
+// get access to a file because the file itself is readable even
+// though its parent directories would prevent access.
+//
+static int
+vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
+{
+ int error, action;
+
+ if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+ return error;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_open(ctx, vp, fmode);
+ if (error)
+ return error;
+#endif
+
+ /* compute action to be authorized */
+ action = 0;
+ if (fmode & FREAD) {
+ action |= KAUTH_VNODE_READ_DATA;
+ }
+ if (fmode & (FWRITE | O_TRUNC)) {
+ /*
+ * If we are writing, appending, and not truncating,
+ * indicate that we are appending so that if the
+ * UF_APPEND or SF_APPEND bits are set, we do not deny
+ * the open.
+ */
+ if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
+ action |= KAUTH_VNODE_APPEND_DATA;
+ } else {
+ action |= KAUTH_VNODE_WRITE_DATA;
+ }
+ }
+
+ if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
+ return error;
+
+
+ //
+ // if the vnode is tagged VOPENEVT and the current process
+ // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
+ // flag to the open mode so that this open won't count against
+ // the vnode when carbon delete() does a vnode_isinuse() to see
+ // if a file is currently in use. this allows spotlight
+ // importers to not interfere with carbon apps that depend on
+ // the no-delete-if-busy semantics of carbon delete().
+ //
+ if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
+ fmode |= O_EVTONLY;
+ }
+
+ if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
+ return error;
+ }
+ if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
+ VNOP_CLOSE(vp, fmode, ctx);
+ return error;
+ }
+
+ /* Call out to allow 3rd party notification of open.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+#if CONFIG_MACF
+ mac_vnode_notify_open(ctx, vp, fmode);
+#endif
+ kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
+ (uintptr_t)vp, 0);
+
+
+ return 0;
+}
+
+static int
+wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
+{
+ int i;
+ int error = 0;
+ int unblock = 0;
+ task_t curtask;
+
+ lck_mtx_lock(&nspace_handler_exclusion_lock);
+ if (nspace_handlers[nspace_type].handler_busy) {
+ lck_mtx_unlock(&nspace_handler_exclusion_lock);
+ return EBUSY;
+ }
+
+ nspace_handlers[nspace_type].handler_busy = 1;
+ lck_mtx_unlock(&nspace_handler_exclusion_lock);
+
+ /*
+ * Any process that gets here will be one of the namespace handlers.
+ * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
+ * as we can cause deadlocks to occur, because the namespace handler may prevent
+ * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
+ * process.
+ */
+ curtask = current_task();
+ bsd_set_dependency_capable (curtask);
+
+ lck_mtx_lock(&nspace_handler_lock);
+ if (nspace_handlers[nspace_type].handler_proc == NULL) {
+ nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
+ nspace_handlers[nspace_type].handler_proc = current_proc();
+ }
+
+ if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
+ (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
+ error = EINVAL;
+ }
+
+ while (error == 0) {
+
+ /* Try to find matching namespace item */
+ for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
+ if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
+ if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
+ break;
+ }
+ }
+ }
+
+ if (i >= MAX_NSPACE_ITEMS) {
+ /* Nothing is there yet. Wait for wake up and retry */
+ error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
+ if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
+ /* Prevent infinite loop if snapshot handler exited */
+ error = EINVAL;
+ break;
+ }
+ continue;
+ }
+
+ nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
+ nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
+ nspace_items[i].token = ++nspace_token_id;
+
+ assert(nspace_items[i].vp);
+ struct fileproc *fp;
+ int32_t indx;
+ int32_t fmode;
+ struct proc *p = current_proc();
+ vfs_context_t ctx = vfs_context_current();
+ struct vnode_attr va;
+ bool vn_get_succsessful = false;
+ bool vn_open_successful = false;
+ bool fp_alloc_successful = false;
+
+ /*
+ * Use vnode pointer to acquire a file descriptor for
+ * hand-off to userland
+ */
+ fmode = nspace_open_flags_for_type(nspace_type);
+ error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
+ if (error) goto cleanup;
+ vn_get_succsessful = true;
+
+ error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
+ if (error) goto cleanup;
+ vn_open_successful = true;
+
+ error = falloc(p, &fp, &indx, ctx);
+ if (error) goto cleanup;
+ fp_alloc_successful = true;
+
+ fp->f_fglob->fg_flag = fmode;
+ fp->f_fglob->fg_ops = &vnops;
+ fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
+
+ proc_fdlock(p);
+ procfdtbl_releasefd(p, indx, NULL);
+ fp_drop(p, indx, fp, 1);
+ proc_fdunlock(p);
+
+ /*
+ * All variants of the namespace handler struct support these three fields:
+ * token, flags, and the FD pointer
+ */
+ error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
+ if (error) goto cleanup;
+ error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
+ if (error) goto cleanup;
+ error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
+ if (error) goto cleanup;
+
+ /*
+ * Handle optional fields:
+ * extended version support an info ptr (offset, length), and the
+ *
+ * namedata version supports a unique per-link object ID
+ *
+ */
+ if (nhd->infoptr) {
+ uio_t uio = (uio_t)nspace_items[i].arg;
+ uint64_t u_offset, u_length;
+
+ if (uio) {
+ u_offset = uio_offset(uio);
+ u_length = uio_resid(uio);
+ } else {
+ u_offset = 0;
+ u_length = 0;
+ }
+ error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
+ if (error) goto cleanup;
+ error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
+ if (error) goto cleanup;
+ }
+
+ if (nhd->objid) {
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_linkid);
+ error = vnode_getattr(nspace_items[i].vp, &va, ctx);
+ if (error) goto cleanup;
+
+ uint64_t linkid = 0;
+ if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
+ linkid = (uint64_t)va.va_linkid;
+ }
+ error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
+ }
+cleanup:
+ if (error) {
+ if (fp_alloc_successful) fp_free(p, indx, fp);
+ if (vn_open_successful) vn_close(nspace_items[i].vp, fmode, ctx);
+ unblock = 1;
+ }
+
+ if (vn_get_succsessful) vnode_put(nspace_items[i].vp);
+
+ break;
+ }
+
+ if (unblock) {
+ if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
+ vnode_lock_spin(nspace_items[i].vp);
+ nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
+ vnode_unlock(nspace_items[i].vp);
+ }
+ nspace_items[i].vp = NULL;
+ nspace_items[i].vid = 0;
+ nspace_items[i].flags = NSPACE_ITEM_DONE;
+ nspace_items[i].token = 0;
+
+ wakeup((caddr_t)&(nspace_items[i].vp));
+ }
+
+ if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
+ // just go through every snapshot event and unblock it immediately.
+ if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
+ for(i = 0; i < MAX_NSPACE_ITEMS; i++) {
+ if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
+ if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
+ nspace_items[i].vp = NULL;
+ nspace_items[i].vid = 0;
+ nspace_items[i].flags = NSPACE_ITEM_DONE;
+ nspace_items[i].token = 0;
+
+ wakeup((caddr_t)&(nspace_items[i].vp));
+ }
+ }
+ }
+ }
+ }
+
+ lck_mtx_unlock(&nspace_handler_lock);
+
+ lck_mtx_lock(&nspace_handler_exclusion_lock);
+ nspace_handlers[nspace_type].handler_busy = 0;
+ lck_mtx_unlock(&nspace_handler_exclusion_lock);
+
+ return error;
+}
+
+static inline int validate_namespace_args (int is64bit, int size) {
+
+ if (is64bit) {
+ /* Must be one of these */
+ if (size == sizeof(user64_namespace_handler_info)) {
+ goto sizeok;
+ }
+ if (size == sizeof(user64_namespace_handler_info_ext)) {
+ goto sizeok;
+ }
+ if (size == sizeof(user64_namespace_handler_data)) {
+ goto sizeok;
+ }
+ return EINVAL;
+ }
+ else {
+ /* 32 bit -- must be one of these */
+ if (size == sizeof(user32_namespace_handler_info)) {
+ goto sizeok;
+ }
+ if (size == sizeof(user32_namespace_handler_info_ext)) {
+ goto sizeok;
+ }
+ if (size == sizeof(user32_namespace_handler_data)) {
+ goto sizeok;
+ }
+ return EINVAL;
+ }
+
+sizeok:
+
+ return 0;
+
+}
+
+static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
+{
+ int error = 0;
+ namespace_handler_data nhd;
+
+ bzero (&nhd, sizeof(namespace_handler_data));
+
+ if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+ return error;
+ }
+
+ error = validate_namespace_args (is64bit, size);
+ if (error) {
+ return error;
+ }
+
+ /* Copy in the userland pointers into our kernel-only struct */
+
+ if (is64bit) {
+ /* 64 bit userland structures */
+ nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
+ nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
+ nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
+
+ /* If the size is greater than the standard info struct, add in extra fields */
+ if (size > (sizeof(user64_namespace_handler_info))) {
+ if (size >= (sizeof(user64_namespace_handler_info_ext))) {
+ nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
+ }
+ if (size == (sizeof(user64_namespace_handler_data))) {
+ nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
+ }
+ /* Otherwise the fields were pre-zeroed when we did the bzero above. */
+ }
+ }
+ else {
+ /* 32 bit userland structures */
+ nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
+ nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
+ nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
+
+ if (size > (sizeof(user32_namespace_handler_info))) {
+ if (size >= (sizeof(user32_namespace_handler_info_ext))) {
+ nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
+ }
+ if (size == (sizeof(user32_namespace_handler_data))) {
+ nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
+ }
+ /* Otherwise the fields were pre-zeroed when we did the bzero above. */
+ }
+ }
+
+ return wait_for_namespace_event(&nhd, nspace_type);
+}
+
+/*
+ * Make a filesystem-specific control call:
+ */
+/* ARGSUSED */
+static int
+fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
+{
+ int error=0;
+ boolean_t is64bit;
+ u_int size;
+#define STK_PARAMS 128
+ char stkbuf[STK_PARAMS] = {0};
+ caddr_t data, memp;
+ vnode_t vp = *arg_vp;
+
+ size = IOCPARM_LEN(cmd);
+ if (size > IOCPARM_MAX) return (EINVAL);
+
+ is64bit = proc_is64bit(p);
+
+ memp = NULL;
+
+
+ /*
+ * ensure the buffer is large enough for underlying calls
+ */
+#ifndef HFSIOC_GETPATH
+ typedef char pn_t[MAXPATHLEN];
+#define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
+#endif
+
+#ifndef HFS_GETPATH
+#define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
+#endif
+ if (IOCBASECMD(cmd) == HFS_GETPATH) {
+ /* Round up to MAXPATHLEN regardless of user input */
+ size = MAXPATHLEN;
+ }
+ else if (vp->v_tag == VT_CIFS) {
+ /*
+ * XXX Until fsctl's length encoding can be
+ * XXX fixed properly.
+ */
+ if (IOCBASECMD(cmd) == _IOWR('z', 19, 0) && size < 1432) {
+ size = 1432; /* sizeof(struct UniqueSMBShareID) */
+ } else if (IOCBASECMD(cmd) == _IOWR('z', 28, 0) && size < 308) {
+ size = 308; /* sizeof(struct smbDebugTestPB) */
+ }
+ }
+
+ if (size > sizeof (stkbuf)) {
+ if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
+ data = memp;
+ } else {
+ data = &stkbuf[0];
+ };
+
+ if (cmd & IOC_IN) {
+ if (size) {
+ error = copyin(udata, data, size);
+ if (error) {
+ if (memp) {
+ kfree (memp, size);
+ }
+ return error;
+ }
+ } else {
+ if (is64bit) {
+ *(user_addr_t *)data = udata;
+ }
+ else {
+ *(uint32_t *)data = (uint32_t)udata;
+ }
+ };
+ } else if ((cmd & IOC_OUT) && size) {
+ /*
+ * Zero the buffer so the user always
+ * gets back something deterministic.
+ */
+ bzero(data, size);
+ } else if (cmd & IOC_VOID) {
+ if (is64bit) {
+ *(user_addr_t *)data = udata;
+ }
+ else {
+ *(uint32_t *)data = (uint32_t)udata;
+ }
+ }
+
+ /* Check to see if it's a generic command */
+ switch (IOCBASECMD(cmd)) {
+
+ case FSCTL_SYNC_VOLUME: {
+ mount_t mp = vp->v_mount;
+ int arg = *(uint32_t*)data;
+
+ /* record vid of vp so we can drop it below. */
+ uint32_t vvid = vp->v_id;
+
+ /*
+ * Then grab mount_iterref so that we can release the vnode.
+ * Without this, a thread may call vnode_iterate_prepare then
+ * get into a deadlock because we've never released the root vp
+ */
+ error = mount_iterref (mp, 0);
+ if (error) {
+ break;
+ }
+ vnode_put(vp);
+
+ /* issue the sync for this volume */
+ (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
+
+ /*
+ * Then release the mount_iterref once we're done syncing; it's not
+ * needed for the VNOP_IOCTL below
+ */
+ mount_iterdrop(mp);
+
+ if (arg & FSCTL_SYNC_FULLSYNC) {
+ /* re-obtain vnode iocount on the root vp, if possible */
+ error = vnode_getwithvid (vp, vvid);
+ if (error == 0) {
+ error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
+ vnode_put (vp);
+ }
+ }
+ /* mark the argument VP as having been released */
+ *arg_vp = NULL;
+ }
+ break;
+
+ case FSCTL_ROUTEFS_SETROUTEID: {
+#if ROUTEFS
+ char routepath[MAXPATHLEN];
+ size_t len = 0;
+
+ if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+ break;
+ }
+ bzero(routepath, MAXPATHLEN);
+ error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
+ if (error) {
+ break;
+ }
+ error = routefs_kernel_mount(routepath);
+ if (error) {
+ break;
+ }
+#endif
+ }
+ break;
+
+ case FSCTL_SET_PACKAGE_EXTS: {
+ user_addr_t ext_strings;
+ uint32_t num_entries;
+ uint32_t max_width;
+
+ if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0)))
+ break;
+
+ if ( (is64bit && size != sizeof(user64_package_ext_info))
+ || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
+
+ // either you're 64-bit and passed a 64-bit struct or
+ // you're 32-bit and passed a 32-bit struct. otherwise
+ // it's not ok.
+ error = EINVAL;
+ break;
+ }
+
+ if (is64bit) {
+ ext_strings = ((user64_package_ext_info *)data)->strings;
+ num_entries = ((user64_package_ext_info *)data)->num_entries;
+ max_width = ((user64_package_ext_info *)data)->max_width;
+ } else {
+ ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
+ num_entries = ((user32_package_ext_info *)data)->num_entries;
+ max_width = ((user32_package_ext_info *)data)->max_width;
+ }
+ error = set_package_extensions_table(ext_strings, num_entries, max_width);
+ }
+ break;
+
+ /* namespace handlers */
+ case FSCTL_NAMESPACE_HANDLER_GET: {
+ error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
+ }
+ break;
+
+ /* Snapshot handlers */
+ case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
+ error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
+ }
+ break;
+
+ case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
+ error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
+ }
+ break;
+
+ case FSCTL_NAMESPACE_HANDLER_UPDATE: {
+ uint32_t token, val;
+ int i;
+
+ if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
+ break;
+ }
+
+ if (!nspace_is_special_process(p)) {
+ error = EINVAL;
+ break;
+ }
+
+ token = ((uint32_t *)data)[0];
+ val = ((uint32_t *)data)[1];
+
+ lck_mtx_lock(&nspace_handler_lock);
+
+ for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+ if (nspace_items[i].token == token) {
+ break; /* exit for loop, not case stmt */
+ }
+ }
+
+ if (i >= MAX_NSPACE_ITEMS) {
+ error = ENOENT;
+ } else {
+ //
+ // if this bit is set, when resolve_nspace_item() times out
+ // it will loop and go back to sleep.
+ //
+ nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
+ }
+
+ lck_mtx_unlock(&nspace_handler_lock);
+
+ if (error) {
+ printf("nspace-handler-update: did not find token %u\n", token);
+ }
+ }
+ break;
+
+ case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
+ uint32_t token, val;
+ int i;
+
+ if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
+ break;
+ }
+
+ if (!nspace_is_special_process(p)) {
+ error = EINVAL;
+ break;
+ }
+
+ token = ((uint32_t *)data)[0];
+ val = ((uint32_t *)data)[1];
+
+ lck_mtx_lock(&nspace_handler_lock);
+
+ for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+ if (nspace_items[i].token == token) {
+ break; /* exit for loop, not case statement */
+ }
+ }
+
+ if (i >= MAX_NSPACE_ITEMS) {
+ printf("nspace-handler-unblock: did not find token %u\n", token);
+ error = ENOENT;
+ } else {
+ if (val == 0 && nspace_items[i].vp) {
+ vnode_lock_spin(nspace_items[i].vp);
+ nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
+ vnode_unlock(nspace_items[i].vp);
+ }
+
+ nspace_items[i].vp = NULL;
+ nspace_items[i].arg = NULL;
+ nspace_items[i].op = 0;
+ nspace_items[i].vid = 0;
+ nspace_items[i].flags = NSPACE_ITEM_DONE;
+ nspace_items[i].token = 0;
+
+ wakeup((caddr_t)&(nspace_items[i].vp));
+ }
+
+ lck_mtx_unlock(&nspace_handler_lock);
+ }
+ break;
+
+ case FSCTL_NAMESPACE_HANDLER_CANCEL: {
+ uint32_t token, val;
+ int i;
+
+ if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
+ break;
+ }
+
+ if (!nspace_is_special_process(p)) {
+ error = EINVAL;
+ break;
+ }
+
+ token = ((uint32_t *)data)[0];
+ val = ((uint32_t *)data)[1];
+
+ lck_mtx_lock(&nspace_handler_lock);
+
+ for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+ if (nspace_items[i].token == token) {
+ break; /* exit for loop, not case stmt */
+ }
+ }
+
+ if (i >= MAX_NSPACE_ITEMS) {
+ printf("nspace-handler-cancel: did not find token %u\n", token);
+ error = ENOENT;
+ } else {
+ if (nspace_items[i].vp) {
+ vnode_lock_spin(nspace_items[i].vp);
+ nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
+ vnode_unlock(nspace_items[i].vp);
+ }
+
+ nspace_items[i].vp = NULL;
+ nspace_items[i].arg = NULL;
+ nspace_items[i].vid = 0;
+ nspace_items[i].token = val;
+ nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
+ nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
+
+ wakeup((caddr_t)&(nspace_items[i].vp));
+ }
+
+ lck_mtx_unlock(&nspace_handler_lock);
+ }
+ break;
+
+ case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
+ if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+ break;
+ }
+
+ // we explicitly do not do the namespace_handler_proc check here
+
+ lck_mtx_lock(&nspace_handler_lock);
+ snapshot_timestamp = ((uint32_t *)data)[0];
+ wakeup(&nspace_item_idx);
+ lck_mtx_unlock(&nspace_handler_lock);
+ printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
+
+ }
+ break;
+
+ case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
+ {
+ if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+ break;
+ }
+
+ lck_mtx_lock(&nspace_handler_lock);
+ nspace_allow_virtual_devs = ((uint32_t *)data)[0];
+ lck_mtx_unlock(&nspace_handler_lock);
+ printf("nspace-snapshot-handler will%s allow events on disk-images\n",
+ nspace_allow_virtual_devs ? "" : " NOT");
+ error = 0;
+
+ }
+ break;
+
+ case FSCTL_SET_FSTYPENAME_OVERRIDE:
+ {
+ if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+ break;
+ }
+ if (vp->v_mount) {
+ mount_lock(vp->v_mount);
+ if (data[0] != 0) {
+ strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
+ vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
+ if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
+ vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
+ vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
+ }
+ } else {
+ if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
+ vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
+ }
+ vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
+ vp->v_mount->fstypename_override[0] = '\0';
+ }
+ mount_unlock(vp->v_mount);
+ }
+ }
+ break;
+
+ default: {
+ /* Invoke the filesystem-specific code */
+ error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
+ }
+
+ } /* end switch stmt */
+
+ /*
+ * if no errors, copy any data to user. Size was
+ * already set and checked above.
+ */
+ if (error == 0 && (cmd & IOC_OUT) && size)
+ error = copyout(data, udata, size);
+
+ if (memp) {
+ kfree(memp, size);
+ }
+
+ return error;
+}
+
+/* ARGSUSED */
+int
+fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
+{
+ int error;
+ struct nameidata nd;
+ u_long nameiflags;
+ vnode_t vp = NULL;
+ vfs_context_t ctx = vfs_context_current();
+
+ AUDIT_ARG(cmd, uap->cmd);
+ AUDIT_ARG(value32, uap->options);
+ /* Get the vnode for the file we are getting info on: */
+ nameiflags = 0;
+ if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
+ NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ if ((error = namei(&nd))) goto done;
+ vp = nd.ni_vp;
+ nameidone(&nd);
+
+#if CONFIG_MACF
+ error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
+ if (error) {
+ goto done;
+ }
+#endif
+
+ error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
+
+done:
+ if (vp)
+ vnode_put(vp);
+ return error;
+}
+/* ARGSUSED */
+int
+ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
+{
+ int error;
+ vnode_t vp = NULL;
+ vfs_context_t ctx = vfs_context_current();
+ int fd = -1;
+
+ AUDIT_ARG(fd, uap->fd);
+ AUDIT_ARG(cmd, uap->cmd);
+ AUDIT_ARG(value32, uap->options);
+
+ /* Get the vnode for the file we are getting info on: */
+ if ((error = file_vnode(uap->fd, &vp)))
+ return error;
+ fd = uap->fd;
+ if ((error = vnode_getwithref(vp))) {
+ file_drop(fd);
+ return error;
+ }
+
+#if CONFIG_MACF
+ if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
+ file_drop(fd);
+ vnode_put(vp);
+ return error;
+ }
+#endif
+
+ error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
+
+ file_drop(fd);
+
+ /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
+ if (vp) {
+ vnode_put(vp);
+ }
+
+ return error;
+}
+/* end of fsctl system call */
+
+/*
+ * Retrieve the data of an extended attribute.
+ */
+int
+getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
+{
+ vnode_t vp;
+ struct nameidata nd;
+ char attrname[XATTR_MAXNAMELEN+1];
+ vfs_context_t ctx = vfs_context_current();
+ uio_t auio = NULL;
+ int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ size_t attrsize = 0;
+ size_t namelen;
+ u_int32_t nameiflags;
+ int error;
+ char uio_buf[ UIO_SIZEOF(1) ];
+
+ if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
+ return (EINVAL);
+
+ nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
+ NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
+ if ((error = namei(&nd))) {
+ return (error);
+ }
+ vp = nd.ni_vp;
+ nameidone(&nd);
+
+ if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
+ goto out;
+ }
+ if (xattr_protected(attrname)) {
+ if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
+ error = EPERM;
+ goto out;
+ }
+ }
+ /*
+ * the specific check for 0xffffffff is a hack to preserve
+ * binaray compatibilty in K64 with applications that discovered
+ * that passing in a buf pointer and a size of -1 resulted in
+ * just the size of the indicated extended attribute being returned.
+ * this isn't part of the documented behavior, but because of the
+ * original implemtation's check for "uap->size > 0", this behavior
+ * was allowed. In K32 that check turned into a signed comparison
+ * even though uap->size is unsigned... in K64, we blow by that
+ * check because uap->size is unsigned and doesn't get sign smeared
+ * in the munger for a 32 bit user app. we also need to add a
+ * check to limit the maximum size of the buffer being passed in...
+ * unfortunately, the underlying fileystems seem to just malloc
+ * the requested size even if the actual extended attribute is tiny.
+ * because that malloc is for kernel wired memory, we have to put a
+ * sane limit on it.
+ *
+ * U32 running on K64 will yield 0x00000000ffffffff for uap->size
+ * U64 running on K64 will yield -1 (64 bits wide)
+ * U32/U64 running on K32 will yield -1 (32 bits wide)
+ */
+ if (uap->size == 0xffffffff || uap->size == (size_t)-1)
+ goto no_uio;
+
+ if (uap->value) {
+ if (uap->size > (size_t)XATTR_MAXSIZE)
+ uap->size = XATTR_MAXSIZE;
+
+ auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
+ &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, uap->value, uap->size);
+ }
+no_uio:
+ error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
+out:
+ vnode_put(vp);
+
+ if (auio) {
+ *retval = uap->size - uio_resid(auio);
+ } else {
+ *retval = (user_ssize_t)attrsize;
+ }
+
+ return (error);
+}
+
+/*
+ * Retrieve the data of an extended attribute.
+ */
+int
+fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
+{
+ vnode_t vp;
+ char attrname[XATTR_MAXNAMELEN+1];
+ uio_t auio = NULL;
+ int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ size_t attrsize = 0;
+ size_t namelen;
+ int error;
+ char uio_buf[ UIO_SIZEOF(1) ];
+
+ if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
+ return (EINVAL);
+
+ if ( (error = file_vnode(uap->fd, &vp)) ) {
+ return (error);
+ }
+ if ( (error = vnode_getwithref(vp)) ) {
+ file_drop(uap->fd);
+ return(error);
+ }
+ if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
+ goto out;
+ }
+ if (xattr_protected(attrname)) {
+ error = EPERM;
+ goto out;
+ }
+ if (uap->value && uap->size > 0) {
+ auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
+ &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, uap->value, uap->size);
+ }
+
+ error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
+out:
+ (void)vnode_put(vp);
+ file_drop(uap->fd);
+
+ if (auio) {
+ *retval = uap->size - uio_resid(auio);
+ } else {
+ *retval = (user_ssize_t)attrsize;
+ }
+ return (error);
+}
+
+/*
+ * Set the data of an extended attribute.
+ */
+int
+setxattr(proc_t p, struct setxattr_args *uap, int *retval)
+{
+ vnode_t vp;
+ struct nameidata nd;
+ char attrname[XATTR_MAXNAMELEN+1];
+ vfs_context_t ctx = vfs_context_current();
+ uio_t auio = NULL;
+ int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ size_t namelen;
+ u_int32_t nameiflags;
+ int error;
+ char uio_buf[ UIO_SIZEOF(1) ];
+
+ if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
+ return (EINVAL);
+
+ if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
+ if (error == EPERM) {
+ /* if the string won't fit in attrname, copyinstr emits EPERM */
+ return (ENAMETOOLONG);
+ }
+ /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
+ return error;
+ }
+ if (xattr_protected(attrname))
+ return(EPERM);
+ if (uap->size != 0 && uap->value == 0) {
+ return (EINVAL);
+ }
+
+ nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
+ NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
+ if ((error = namei(&nd))) {
+ return (error);
+ }
+ vp = nd.ni_vp;
+ nameidone(&nd);
+
+ auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
+ &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, uap->value, uap->size);
+
+ error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
+#if CONFIG_FSE
+ if (error == 0) {
+ add_fsevent(FSE_XATTR_MODIFIED, ctx,
+ FSE_ARG_VNODE, vp,
+ FSE_ARG_DONE);
+ }
+#endif
+ vnode_put(vp);
+ *retval = 0;
+ return (error);
+}
+
+/*
+ * Set the data of an extended attribute.
+ */
+int
+fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
+{
+ vnode_t vp;
+ char attrname[XATTR_MAXNAMELEN+1];
+ uio_t auio = NULL;
+ int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ size_t namelen;
+ int error;
+ char uio_buf[ UIO_SIZEOF(1) ];
+#if CONFIG_FSE
+ vfs_context_t ctx = vfs_context_current();
+#endif
+
+ if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
+ return (EINVAL);
+
+ if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
+ if (error == EPERM) {
+ /* if the string won't fit in attrname, copyinstr emits EPERM */
+ return (ENAMETOOLONG);
+ }
+ /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
+ return error;
+ }
+ if (xattr_protected(attrname))
+ return(EPERM);
+ if (uap->size != 0 && uap->value == 0) {
+ return (EINVAL);
+ }
+ if ( (error = file_vnode(uap->fd, &vp)) ) {
+ return (error);
+ }
+ if ( (error = vnode_getwithref(vp)) ) {
+ file_drop(uap->fd);
+ return(error);
+ }
+ auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
+ &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, uap->value, uap->size);
+
+ error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
+#if CONFIG_FSE
+ if (error == 0) {
+ add_fsevent(FSE_XATTR_MODIFIED, ctx,
+ FSE_ARG_VNODE, vp,
+ FSE_ARG_DONE);
+ }
+#endif
+ vnode_put(vp);
+ file_drop(uap->fd);
+ *retval = 0;
+ return (error);
+}
+
+/*
+ * Remove an extended attribute.
+ * XXX Code duplication here.
+ */
+int
+removexattr(proc_t p, struct removexattr_args *uap, int *retval)
+{
+ vnode_t vp;
+ struct nameidata nd;
+ char attrname[XATTR_MAXNAMELEN+1];
+ int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ vfs_context_t ctx = vfs_context_current();
+ size_t namelen;
+ u_int32_t nameiflags;
+ int error;
+
+ if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
+ return (EINVAL);
+
+ error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
+ if (error != 0) {
+ return (error);
+ }
+ if (xattr_protected(attrname))
+ return(EPERM);
+ nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
+ NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
+ if ((error = namei(&nd))) {
+ return (error);
+ }
+ vp = nd.ni_vp;
+ nameidone(&nd);
+
+ error = vn_removexattr(vp, attrname, uap->options, ctx);
+#if CONFIG_FSE
+ if (error == 0) {
+ add_fsevent(FSE_XATTR_REMOVED, ctx,
+ FSE_ARG_VNODE, vp,
+ FSE_ARG_DONE);
+ }
+#endif
+ vnode_put(vp);
+ *retval = 0;
+ return (error);
+}
+
+/*
+ * Remove an extended attribute.
+ * XXX Code duplication here.
+ */
+int
+fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
+{
+ vnode_t vp;
+ char attrname[XATTR_MAXNAMELEN+1];
+ size_t namelen;
+ int error;
+#if CONFIG_FSE
+ vfs_context_t ctx = vfs_context_current();
+#endif
+
+ if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
+ return (EINVAL);
+
+ error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
+ if (error != 0) {
+ return (error);
+ }
+ if (xattr_protected(attrname))
+ return(EPERM);
+ if ( (error = file_vnode(uap->fd, &vp)) ) {
+ return (error);
+ }
+ if ( (error = vnode_getwithref(vp)) ) {
+ file_drop(uap->fd);
+ return(error);
+ }
+
+ error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
+#if CONFIG_FSE
+ if (error == 0) {
+ add_fsevent(FSE_XATTR_REMOVED, ctx,
+ FSE_ARG_VNODE, vp,
+ FSE_ARG_DONE);
+ }
+#endif
+ vnode_put(vp);
+ file_drop(uap->fd);
+ *retval = 0;
+ return (error);
+}
+
+/*
+ * Retrieve the list of extended attribute names.
+ * XXX Code duplication here.
+ */
+int
+listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
+{
+ vnode_t vp;
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+ uio_t auio = NULL;
+ int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ size_t attrsize = 0;
+ u_int32_t nameiflags;
+ int error;
+ char uio_buf[ UIO_SIZEOF(1) ];
+
+ if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
+ return (EINVAL);
+
+ nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
+ NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
+ if ((error = namei(&nd))) {
+ return (error);
+ }
+ vp = nd.ni_vp;
+ nameidone(&nd);
+ if (uap->namebuf != 0 && uap->bufsize > 0) {
+ auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
+ &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, uap->namebuf, uap->bufsize);
+ }
+
+ error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
+
+ vnode_put(vp);
+ if (auio) {
+ *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
+ } else {
+ *retval = (user_ssize_t)attrsize;
+ }
+ return (error);
+}
+
+/*
+ * Retrieve the list of extended attribute names.
+ * XXX Code duplication here.
+ */
+int
+flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
+{
+ vnode_t vp;
+ uio_t auio = NULL;
+ int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ size_t attrsize = 0;
+ int error;
+ char uio_buf[ UIO_SIZEOF(1) ];
+
+ if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
+ return (EINVAL);
+
+ if ( (error = file_vnode(uap->fd, &vp)) ) {
+ return (error);
+ }
+ if ( (error = vnode_getwithref(vp)) ) {
+ file_drop(uap->fd);
+ return(error);
+ }
+ if (uap->namebuf != 0 && uap->bufsize > 0) {
+ auio = uio_createwithbuffer(1, 0, spacetype,
+ UIO_READ, &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, uap->namebuf, uap->bufsize);
+ }
+
+ error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
+
+ vnode_put(vp);
+ file_drop(uap->fd);
+ if (auio) {
+ *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
+ } else {
+ *retval = (user_ssize_t)attrsize;
+ }
+ return (error);
+}
+
+static int fsgetpath_internal(
+ vfs_context_t ctx, int volfs_id, uint64_t objid,
+ vm_size_t bufsize, caddr_t buf, int *pathlen)
+{
+ int error;
+ struct mount *mp = NULL;
+ vnode_t vp;
+ int length;
+ int bpflags;
+ /* maximum number of times to retry build_path */
+ unsigned int retries = 0x10;
+
+ if (bufsize > PAGE_SIZE) {
+ return (EINVAL);
+ }
+
+ if (buf == NULL) {
+ return (ENOMEM);
+ }
+
+retry:
+ if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
+ error = ENOTSUP; /* unexpected failure */
+ return ENOTSUP;
+ }
+
+unionget:
+ if (objid == 2) {
+ error = VFS_ROOT(mp, &vp, ctx);
+ } else {
+ error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
+ }
+
+ if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
+ /*
+ * If the fileid isn't found and we're in a union
+ * mount volume, then see if the fileid is in the
+ * mounted-on volume.
+ */
+ struct mount *tmp = mp;
+ mp = vnode_mount(tmp->mnt_vnodecovered);
+ vfs_unbusy(tmp);
+ if (vfs_busy(mp, LK_NOWAIT) == 0)
+ goto unionget;
+ } else {
+ vfs_unbusy(mp);
+ }
+
+ if (error) {
+ return error;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_fsgetpath(ctx, vp);
+ if (error) {
+ vnode_put(vp);
+ return error;
+ }
+#endif
+
+ /* Obtain the absolute path to this vnode. */
+ bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
+ bpflags |= BUILDPATH_CHECK_MOVED;
+ error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
+ vnode_put(vp);
+
+ if (error) {
+ /* there was a race building the path, try a few more times */
+ if (error == EAGAIN) {
+ --retries;
+ if (retries > 0)
+ goto retry;
+
+ error = ENOENT;
+ }
+ goto out;
+ }
+
+ AUDIT_ARG(text, buf);
+
+ if (kdebug_enable) {
+ long dbg_parms[NUMPARMS];
+ int dbg_namelen;
+
+ dbg_namelen = (int)sizeof(dbg_parms);
+
+ if (length < dbg_namelen) {
+ memcpy((char *)dbg_parms, buf, length);
+ memset((char *)dbg_parms + length, 0, dbg_namelen - length);
+
+ dbg_namelen = length;
+ } else {
+ memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
+ }
+
+ kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
+ }
+
+ *pathlen = (user_ssize_t)length; /* may be superseded by error */
+
+out:
+ return (error);
+}
+
+/*
+ * Obtain the full pathname of a file system object by id.
+ *
+ * This is a private SPI used by the File Manager.
+ */
+__private_extern__
+int
+fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
+{
+ vfs_context_t ctx = vfs_context_current();
+ fsid_t fsid;
+ char *realpath;
+ int length;
+ int error;