+ }
+
+#if CONFIG_MACF
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_type, fvp->v_type);
+ /* Mask off all but regular access permissions */
+ VATTR_SET(&va, va_mode,
+ ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
+ error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
+ if (error)
+ goto out;
+#endif /* CONFIG_MACF */
+
+ if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
+ goto out;
+
+ if (fvp == tdvp)
+ error = EINVAL;
+ /*
+ * If source is the same as the destination (that is the
+ * same inode number) then there is nothing to do.
+ * (fixed to have POSIX semantics - CSM 3/2/98)
+ */
+ if (fvp == tvp)
+ error = -1;
+ if (!error)
+ error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
+out:
+ sdvp = tond.ni_startdir;
+ /*
+ * nameidone has to happen before we vnode_put(tdvp)
+ * since it may need to release the fs_nodelock on the tdvp
+ */
+ nameidone(&tond);
+
+ if (tvp)
+ vnode_put(tvp);
+ vnode_put(tdvp);
+ vnode_put(sdvp);
+out1:
+ vnode_put(fvp);
+
+ nameidone(&fromnd);
+
+ if (error == -1)
+ return (0);
+ return (error);
+}
+
+#define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
+
+/*
+ * Helper function for doing clones. The caller is expected to provide an
+ * iocounted source vnode and release it.
+ */
+static int
+clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
+ user_addr_t dst, uint32_t flags, vfs_context_t ctx)
+{
+ vnode_t tvp, tdvp;
+ struct nameidata tond;
+ int error;
+ int follow;
+ boolean_t free_src_acl;
+ boolean_t attr_cleanup;
+ enum vtype v_type;
+ kauth_action_t action;
+ struct componentname *cnp;
+ uint32_t defaulted;
+ struct vnode_attr va;
+ struct vnode_attr nva;
+ uint32_t vnop_flags;
+
+ v_type = vnode_vtype(fvp);
+ switch (v_type) {
+ case VLNK:
+ /* FALLTHRU */
+ case VREG:
+ action = KAUTH_VNODE_ADD_FILE;
+ break;
+ case VDIR:
+ if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
+ fvp->v_mountedhere) {
+ return (EINVAL);
+ }
+ action = KAUTH_VNODE_ADD_SUBDIRECTORY;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ AUDIT_ARG(fd2, dst_dirfd);
+ AUDIT_ARG(value32, flags);
+
+ follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
+ NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
+ UIO_USERSPACE, dst, ctx);
+ if ((error = nameiat(&tond, dst_dirfd)))
+ return (error);
+ cnp = &tond.ni_cnd;
+ tdvp = tond.ni_dvp;
+ tvp = tond.ni_vp;
+
+ free_src_acl = FALSE;
+ attr_cleanup = FALSE;
+
+ if (tvp != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+
+ if (vnode_mount(tdvp) != vnode_mount(fvp)) {
+ error = EXDEV;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp)))
+ goto out;
+#endif
+ if ((error = vnode_authorize(tdvp, NULL, action, ctx)))
+ goto out;
+
+ action = KAUTH_VNODE_GENERIC_READ_BITS;
+ if (data_read_authorised)
+ action &= ~KAUTH_VNODE_READ_DATA;
+ if ((error = vnode_authorize(fvp, NULL, action, ctx)))
+ goto out;
+
+ /*
+ * certain attributes may need to be changed from the source, we ask for
+ * those here.
+ */
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_uid);
+ VATTR_WANTED(&va, va_gid);
+ VATTR_WANTED(&va, va_mode);
+ VATTR_WANTED(&va, va_flags);
+ VATTR_WANTED(&va, va_acl);
+
+ if ((error = vnode_getattr(fvp, &va, ctx)) != 0)
+ goto out;
+
+ VATTR_INIT(&nva);
+ VATTR_SET(&nva, va_type, v_type);
+ if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
+ VATTR_SET(&nva, va_acl, va.va_acl);
+ free_src_acl = TRUE;
+ }
+
+ /* Handle ACL inheritance, initialize vap. */
+ if (v_type == VLNK) {
+ error = vnode_authattr_new(tdvp, &nva, 0, ctx);
+ } else {
+ error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
+ if (error)
+ goto out;
+ attr_cleanup = TRUE;
+ }
+
+ vnop_flags = VNODE_CLONEFILE_DEFAULT;
+ /*
+ * We've got initial values for all security parameters,
+ * If we are superuser, then we can change owners to be the
+ * same as the source. Both superuser and the owner have default
+ * WRITE_SECURITY privileges so all other fields can be taken
+ * from source as well.
+ */
+ if (!(flags & CLONE_NOOWNERCOPY) && vfs_context_issuser(ctx)) {
+ if (VATTR_IS_SUPPORTED(&va, va_uid))
+ VATTR_SET(&nva, va_uid, va.va_uid);
+ if (VATTR_IS_SUPPORTED(&va, va_gid))
+ VATTR_SET(&nva, va_gid, va.va_gid);
+ } else {
+ vnop_flags |= VNODE_CLONEFILE_NOOWNERCOPY;
+ }
+
+ if (VATTR_IS_SUPPORTED(&va, va_mode))
+ VATTR_SET(&nva, va_mode, va.va_mode);
+ if (VATTR_IS_SUPPORTED(&va, va_flags)) {
+ VATTR_SET(&nva, va_flags,
+ ((va.va_flags & ~(UF_DATAVAULT | SF_RESTRICTED)) | /* Turn off from source */
+ (nva.va_flags & (UF_DATAVAULT | SF_RESTRICTED))));
+ }
+
+ error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva, vnop_flags, ctx);
+
+ if (!error && tvp) {
+ int update_flags = 0;
+#if CONFIG_FSE
+ int fsevent;
+#endif /* CONFIG_FSE */
+
+#if CONFIG_MACF
+ (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
+ VNODE_LABEL_CREATE, ctx);
+#endif
+ /*
+ * If some of the requested attributes weren't handled by the
+ * VNOP, use our fallback code.
+ */
+ if (!VATTR_ALL_SUPPORTED(&va))
+ (void)vnode_setattr_fallback(tvp, &nva, ctx);
+
+ // Make sure the name & parent pointers are hooked up
+ if (tvp->v_name == NULL)
+ update_flags |= VNODE_UPDATE_NAME;
+ if (tvp->v_parent == NULLVP)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ if (update_flags) {
+ (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
+ cnp->cn_namelen, cnp->cn_hash, update_flags);
+ }
+
+#if CONFIG_FSE
+ switch (vnode_vtype(tvp)) {
+ case VLNK:
+ /* FALLTHRU */
+ case VREG:
+ fsevent = FSE_CREATE_FILE;
+ break;
+ case VDIR:
+ fsevent = FSE_CREATE_DIR;
+ break;
+ default:
+ goto out;
+ }
+
+ if (need_fsevent(fsevent, tvp)) {
+ /*
+ * The following is a sequence of three explicit events.
+ * A pair of FSE_CLONE events representing the source and destination
+ * followed by an FSE_CREATE_[FILE | DIR] for the destination.
+ * fseventsd may coalesce the destination clone and create events
+ * into a single event resulting in the following sequence for a client
+ * FSE_CLONE (src)
+ * FSE_CLONE | FSE_CREATE (dst)
+ */
+ add_fsevent(FSE_CLONE, ctx, FSE_ARG_VNODE, fvp, FSE_ARG_VNODE, tvp,
+ FSE_ARG_DONE);
+ add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
+ FSE_ARG_DONE);
+ }
+#endif /* CONFIG_FSE */
+ }
+
+out:
+ if (attr_cleanup)
+ vn_attribute_cleanup(&nva, defaulted);
+ if (free_src_acl && va.va_acl)
+ kauth_acl_free(va.va_acl);
+ nameidone(&tond);
+ if (tvp)
+ vnode_put(tvp);
+ vnode_put(tdvp);
+ return (error);
+}
+
+/*
+ * clone files or directories, target must not exist.
+ */
+/* ARGSUSED */
+int
+clonefileat(__unused proc_t p, struct clonefileat_args *uap,
+ __unused int32_t *retval)
+{
+ vnode_t fvp;
+ struct nameidata fromnd;
+ int follow;
+ int error;
+ vfs_context_t ctx = vfs_context_current();
+
+ /* Check that the flags are valid. */
+ if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY))
+ return (EINVAL);
+
+ AUDIT_ARG(fd, uap->src_dirfd);
+
+ follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
+ NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
+ UIO_USERSPACE, uap->src, ctx);
+ if ((error = nameiat(&fromnd, uap->src_dirfd)))
+ return (error);
+
+ fvp = fromnd.ni_vp;
+ nameidone(&fromnd);
+
+ error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
+ uap->flags, ctx);
+
+ vnode_put(fvp);
+ return (error);
+}
+
+int
+fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
+ __unused int32_t *retval)
+{
+ vnode_t fvp;
+ struct fileproc *fp;
+ int error;
+ vfs_context_t ctx = vfs_context_current();
+
+ /* Check that the flags are valid. */
+ if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY))
+ return (EINVAL);
+
+ AUDIT_ARG(fd, uap->src_fd);
+ error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
+ if (error)
+ return (error);
+
+ if ((fp->f_fglob->fg_flag & FREAD) == 0) {
+ AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
+ error = EBADF;
+ goto out;
+ }
+
+ if ((error = vnode_getwithref(fvp)))
+ goto out;
+
+ AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
+
+ error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
+ uap->flags, ctx);
+
+ vnode_put(fvp);
+out:
+ file_drop(uap->src_fd);
+ return (error);
+}
+
+/*
+ * Rename files. Source and destination must either both be directories,
+ * or both not be directories. If target is a directory, it must be empty.
+ */
+/* ARGSUSED */
+static int
+renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
+ int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
+{
+ if (flags & ~VFS_RENAME_FLAGS_MASK)
+ return EINVAL;
+
+ if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL))
+ return EINVAL;
+
+ vnode_t tvp, tdvp;
+ vnode_t fvp, fdvp;
+ struct nameidata *fromnd, *tond;
+ int error;
+ int do_retry;
+ int retry_count;
+ int mntrename;
+ int need_event;
+ const char *oname = NULL;
+ char *from_name = NULL, *to_name = NULL;
+ int from_len=0, to_len=0;
+ int holding_mntlock;
+ mount_t locked_mp = NULL;
+ vnode_t oparent = NULLVP;
+#if CONFIG_FSE
+ fse_info from_finfo, to_finfo;
+#endif
+ int from_truncated=0, to_truncated;
+ int batched = 0;
+ struct vnode_attr *fvap, *tvap;
+ int continuing = 0;
+ /* carving out a chunk for structs that are too big to be on stack. */
+ struct {
+ struct nameidata from_node, to_node;
+ struct vnode_attr fv_attr, tv_attr;
+ } * __rename_data;
+ MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
+ fromnd = &__rename_data->from_node;
+ tond = &__rename_data->to_node;
+
+ holding_mntlock = 0;
+ do_retry = 0;
+ retry_count = 0;
+retry:
+ fvp = tvp = NULL;
+ fdvp = tdvp = NULL;
+ fvap = tvap = NULL;
+ mntrename = FALSE;
+
+ NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
+ segflg, from, ctx);
+ fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
+
+ NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
+ segflg, to, ctx);
+ tond->ni_flag = NAMEI_COMPOUNDRENAME;
+
+continue_lookup:
+ if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
+ if ( (error = nameiat(fromnd, fromfd)) )
+ goto out1;
+ fdvp = fromnd->ni_dvp;
+ fvp = fromnd->ni_vp;
+
+ if (fvp && fvp->v_type == VDIR)
+ tond->ni_cnd.cn_flags |= WILLBEDIR;
+ }
+
+ if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
+ if ( (error = nameiat(tond, tofd)) ) {
+ /*
+ * Translate error code for rename("dir1", "dir2/.").
+ */
+ if (error == EISDIR && fvp->v_type == VDIR)
+ error = EINVAL;
+ goto out1;
+ }
+ tdvp = tond->ni_dvp;
+ tvp = tond->ni_vp;
+ }
+
+#if DEVELOPMENT || DEBUG
+ /*
+ * XXX VSWAP: Check for entitlements or special flag here
+ * so we can restrict access appropriately.
+ */
+#else /* DEVELOPMENT || DEBUG */
+
+ if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
+ error = EPERM;
+ goto out1;
+ }
+
+ if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
+ error = EPERM;
+ goto out1;
+ }
+#endif /* DEVELOPMENT || DEBUG */
+
+ if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
+ error = ENOENT;
+ goto out1;
+ }
+
+ if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
+ error = EEXIST;
+ goto out1;
+ }
+
+ batched = vnode_compound_rename_available(fdvp);
+
+#if CONFIG_FSE
+ need_event = need_fsevent(FSE_RENAME, fdvp);
+ if (need_event) {
+ if (fvp) {
+ get_fse_info(fvp, &from_finfo, ctx);
+ } else {
+ error = vfs_get_notify_attributes(&__rename_data->fv_attr);
+ if (error) {
+ goto out1;
+ }
+
+ fvap = &__rename_data->fv_attr;
+ }
+
+ if (tvp) {
+ get_fse_info(tvp, &to_finfo, ctx);
+ } else if (batched) {
+ error = vfs_get_notify_attributes(&__rename_data->tv_attr);
+ if (error) {
+ goto out1;
+ }
+
+ tvap = &__rename_data->tv_attr;
+ }
+ }
+#else
+ need_event = 0;
+#endif /* CONFIG_FSE */
+
+ if (need_event || kauth_authorize_fileop_has_listeners()) {
+ if (from_name == NULL) {
+ GET_PATH(from_name);
+ if (from_name == NULL) {
+ error = ENOMEM;
+ goto out1;
+ }
+ }
+
+ from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
+
+ if (to_name == NULL) {
+ GET_PATH(to_name);
+ if (to_name == NULL) {
+ error = ENOMEM;
+ goto out1;
+ }
+ }
+
+ to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
+ }
+ if (!fvp) {
+ /*
+ * Claim: this check will never reject a valid rename.
+ * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
+ * Suppose fdvp and tdvp are not on the same mount.
+ * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
+ * then you can't move it to within another dir on the same mountpoint.
+ * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
+ *
+ * If this check passes, then we are safe to pass these vnodes to the same FS.
+ */
+ if (fdvp->v_mount != tdvp->v_mount) {
+ error = EXDEV;
+ goto out1;
+ }
+ goto skipped_lookup;
+ }
+
+ if (!batched) {
+ error = vn_authorize_renamex_with_paths(fdvp, fvp, &fromnd->ni_cnd, from_name, tdvp, tvp, &tond->ni_cnd, to_name, ctx, flags, NULL);
+ if (error) {
+ if (error == ENOENT) {
+ assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ /*
+ * We encountered a race where after doing the namei, tvp stops
+ * being valid. If so, simply re-drive the rename call from the
+ * top.
+ */
+ do_retry = 1;
+ retry_count += 1;
+ }
+ }
+ goto out1;
+ }
+ }
+
+ /*
+ * If the source and destination are the same (i.e. they're
+ * links to the same vnode) and the target file system is
+ * case sensitive, then there is nothing to do.
+ *
+ * XXX Come back to this.
+ */
+ if (fvp == tvp) {
+ int pathconf_val;
+
+ /*
+ * Note: if _PC_CASE_SENSITIVE selector isn't supported,
+ * then assume that this file system is case sensitive.
+ */
+ if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
+ pathconf_val != 0) {
+ goto out1;
+ }
+ }
+
+ /*
+ * Allow the renaming of mount points.
+ * - target must not exist
+ * - target must reside in the same directory as source
+ * - union mounts cannot be renamed
+ * - "/" cannot be renamed
+ *
+ * XXX Handle this in VFS after a continued lookup (if we missed
+ * in the cache to start off)
+ *
+ * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
+ * we'll skip past here. The file system is responsible for
+ * checking that @tvp is not a descendent of @fvp and vice versa
+ * so it should always return EINVAL if either @tvp or @fvp is the
+ * root of a volume.
+ */
+ if ((fvp->v_flag & VROOT) &&
+ (fvp->v_type == VDIR) &&
+ (tvp == NULL) &&
+ (fvp->v_mountedhere == NULL) &&
+ (fdvp == tdvp) &&
+ ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
+ (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
+ vnode_t coveredvp;
+
+ /* switch fvp to the covered vnode */
+ coveredvp = fvp->v_mount->mnt_vnodecovered;
+ if ( (vnode_getwithref(coveredvp)) ) {
+ error = ENOENT;
+ goto out1;
+ }
+ vnode_put(fvp);
+
+ fvp = coveredvp;
+ mntrename = TRUE;
+ }
+ /*
+ * Check for cross-device rename.
+ */
+ if ((fvp->v_mount != tdvp->v_mount) ||
+ (tvp && (fvp->v_mount != tvp->v_mount))) {
+ error = EXDEV;
+ goto out1;
+ }
+
+ /*
+ * If source is the same as the destination (that is the
+ * same inode number) then there is nothing to do...
+ * EXCEPT if the underlying file system supports case
+ * insensitivity and is case preserving. In this case
+ * the file system needs to handle the special case of
+ * getting the same vnode as target (fvp) and source (tvp).
+ *
+ * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
+ * and _PC_CASE_PRESERVING can have this exception, and they need to
+ * handle the special case of getting the same vnode as target and
+ * source. NOTE: Then the target is unlocked going into vnop_rename,
+ * so not to cause locking problems. There is a single reference on tvp.
+ *
+ * NOTE - that fvp == tvp also occurs if they are hard linked and
+ * that correct behaviour then is just to return success without doing
+ * anything.
+ *
+ * XXX filesystem should take care of this itself, perhaps...
+ */
+ if (fvp == tvp && fdvp == tdvp) {
+ if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
+ !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
+ fromnd->ni_cnd.cn_namelen)) {
+ goto out1;
+ }
+ }
+
+ if (holding_mntlock && fvp->v_mount != locked_mp) {
+ /*
+ * we're holding a reference and lock
+ * on locked_mp, but it no longer matches
+ * what we want to do... so drop our hold
+ */
+ mount_unlock_renames(locked_mp);
+ mount_drop(locked_mp, 0);
+ holding_mntlock = 0;
+ }
+ if (tdvp != fdvp && fvp->v_type == VDIR) {
+ /*
+ * serialize renames that re-shape
+ * the tree... if holding_mntlock is
+ * set, then we're ready to go...
+ * otherwise we
+ * first need to drop the iocounts
+ * we picked up, second take the
+ * lock to serialize the access,
+ * then finally start the lookup
+ * process over with the lock held
+ */
+ if (!holding_mntlock) {
+ /*
+ * need to grab a reference on
+ * the mount point before we
+ * drop all the iocounts... once
+ * the iocounts are gone, the mount
+ * could follow
+ */
+ locked_mp = fvp->v_mount;
+ mount_ref(locked_mp, 0);
+
+ /*
+ * nameidone has to happen before we vnode_put(tvp)
+ * since it may need to release the fs_nodelock on the tvp
+ */
+ nameidone(tond);
+
+ if (tvp)
+ vnode_put(tvp);
+ vnode_put(tdvp);
+
+ /*
+ * nameidone has to happen before we vnode_put(fdvp)
+ * since it may need to release the fs_nodelock on the fvp
+ */
+ nameidone(fromnd);
+
+ vnode_put(fvp);
+ vnode_put(fdvp);
+
+ mount_lock_renames(locked_mp);
+ holding_mntlock = 1;
+
+ goto retry;
+ }
+ } else {
+ /*
+ * when we dropped the iocounts to take
+ * the lock, we allowed the identity of
+ * the various vnodes to change... if they did,
+ * we may no longer be dealing with a rename
+ * that reshapes the tree... once we're holding
+ * the iocounts, the vnodes can't change type
+ * so we're free to drop the lock at this point
+ * and continue on
+ */
+ if (holding_mntlock) {
+ mount_unlock_renames(locked_mp);
+ mount_drop(locked_mp, 0);
+ holding_mntlock = 0;
+ }
+ }
+
+ // save these off so we can later verify that fvp is the same
+ oname = fvp->v_name;
+ oparent = fvp->v_parent;
+
+skipped_lookup:
+ error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
+ tdvp, &tvp, &tond->ni_cnd, tvap,
+ flags, ctx);
+
+ if (holding_mntlock) {
+ /*
+ * we can drop our serialization
+ * lock now
+ */
+ mount_unlock_renames(locked_mp);
+ mount_drop(locked_mp, 0);
+ holding_mntlock = 0;
+ }
+ if (error) {
+ if (error == EKEEPLOOKING) {
+ if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+ if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+ panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
+ }
+ }
+
+ fromnd->ni_vp = fvp;
+ tond->ni_vp = tvp;
+
+ goto continue_lookup;
+ }
+
+ /*
+ * We may encounter a race in the VNOP where the destination didn't
+ * exist when we did the namei, but it does by the time we go and
+ * try to create the entry. In this case, we should re-drive this rename
+ * call from the top again. Currently, only HFS bubbles out ERECYCLE,
+ * but other filesystems susceptible to this race could return it, too.
+ */
+ if (error == ERECYCLE) {
+ do_retry = 1;
+ }
+
+ /*
+ * For compound VNOPs, the authorization callback may return
+ * ENOENT in case of racing hardlink lookups hitting the name
+ * cache, redrive the lookup.
+ */
+ if (batched && error == ENOENT) {
+ assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ do_retry = 1;
+ retry_count += 1;
+ }
+ }
+
+ goto out1;
+ }
+
+ /* call out to allow 3rd party notification of rename.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_RENAME,
+ (uintptr_t)from_name, (uintptr_t)to_name);
+ if (flags & VFS_RENAME_SWAP) {
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_RENAME,
+ (uintptr_t)to_name, (uintptr_t)from_name);
+ }
+
+#if CONFIG_FSE
+ if (from_name != NULL && to_name != NULL) {
+ if (from_truncated || to_truncated) {
+ // set it here since only the from_finfo gets reported up to user space
+ from_finfo.mode |= FSE_TRUNCATED_PATH;
+ }
+
+ if (tvap && tvp) {
+ vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
+ }
+ if (fvap) {
+ vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
+ }
+
+ if (tvp) {
+ add_fsevent(FSE_RENAME, ctx,
+ FSE_ARG_STRING, from_len, from_name,
+ FSE_ARG_FINFO, &from_finfo,
+ FSE_ARG_STRING, to_len, to_name,
+ FSE_ARG_FINFO, &to_finfo,
+ FSE_ARG_DONE);
+ if (flags & VFS_RENAME_SWAP) {
+ /*
+ * Strictly speaking, swap is the equivalent of
+ * *three* renames. FSEvents clients should only take
+ * the events as a hint, so we only bother reporting
+ * two.
+ */
+ add_fsevent(FSE_RENAME, ctx,
+ FSE_ARG_STRING, to_len, to_name,
+ FSE_ARG_FINFO, &to_finfo,
+ FSE_ARG_STRING, from_len, from_name,
+ FSE_ARG_FINFO, &from_finfo,
+ FSE_ARG_DONE);
+ }
+ } else {
+ add_fsevent(FSE_RENAME, ctx,
+ FSE_ARG_STRING, from_len, from_name,
+ FSE_ARG_FINFO, &from_finfo,
+ FSE_ARG_STRING, to_len, to_name,
+ FSE_ARG_DONE);
+ }
+ }
+#endif /* CONFIG_FSE */
+
+ /*
+ * update filesystem's mount point data
+ */
+ if (mntrename) {
+ char *cp, *pathend, *mpname;
+ char * tobuf;
+ struct mount *mp;
+ int maxlen;
+ size_t len = 0;
+
+ mp = fvp->v_mountedhere;
+
+ if (vfs_busy(mp, LK_NOWAIT)) {
+ error = EBUSY;
+ goto out1;
+ }
+ MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+
+ if (UIO_SEG_IS_USER_SPACE(segflg))
+ error = copyinstr(to, tobuf, MAXPATHLEN, &len);
+ else
+ error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
+ if (!error) {
+ /* find current mount point prefix */
+ pathend = &mp->mnt_vfsstat.f_mntonname[0];
+ for (cp = pathend; *cp != '\0'; ++cp) {
+ if (*cp == '/')
+ pathend = cp + 1;
+ }
+ /* find last component of target name */
+ for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
+ if (*cp == '/')
+ mpname = cp + 1;
+ }
+ /* append name to prefix */
+ maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
+ bzero(pathend, maxlen);
+ strlcpy(pathend, mpname, maxlen);
+ }
+ FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
+
+ vfs_unbusy(mp);
+ }
+ /*
+ * fix up name & parent pointers. note that we first
+ * check that fvp has the same name/parent pointers it
+ * had before the rename call... this is a 'weak' check
+ * at best...
+ *
+ * XXX oparent and oname may not be set in the compound vnop case
+ */
+ if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
+ int update_flags;
+
+ update_flags = VNODE_UPDATE_NAME;
+
+ if (fdvp != tdvp)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
+ }
+out1:
+ if (to_name != NULL) {
+ RELEASE_PATH(to_name);
+ to_name = NULL;
+ }
+ if (from_name != NULL) {
+ RELEASE_PATH(from_name);
+ from_name = NULL;
+ }
+ if (holding_mntlock) {
+ mount_unlock_renames(locked_mp);
+ mount_drop(locked_mp, 0);
+ holding_mntlock = 0;
+ }
+ if (tdvp) {
+ /*
+ * nameidone has to happen before we vnode_put(tdvp)
+ * since it may need to release the fs_nodelock on the tdvp
+ */
+ nameidone(tond);
+
+ if (tvp)
+ vnode_put(tvp);
+ vnode_put(tdvp);
+ }
+ if (fdvp) {
+ /*
+ * nameidone has to happen before we vnode_put(fdvp)
+ * since it may need to release the fs_nodelock on the fdvp
+ */
+ nameidone(fromnd);
+
+ if (fvp)
+ vnode_put(fvp);
+ vnode_put(fdvp);
+ }
+
+ /*
+ * If things changed after we did the namei, then we will re-drive
+ * this rename call from the top.
+ */
+ if (do_retry) {
+ do_retry = 0;
+ goto retry;
+ }
+
+ FREE(__rename_data, M_TEMP);
+ return (error);
+}
+
+int
+rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
+{
+ return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
+ AT_FDCWD, uap->to, UIO_USERSPACE, 0));
+}
+
+int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
+{
+ return renameat_internal(
+ vfs_context_current(),
+ uap->fromfd, uap->from,
+ uap->tofd, uap->to,
+ UIO_USERSPACE, uap->flags);
+}
+
+int
+renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
+{
+ return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
+ uap->tofd, uap->to, UIO_USERSPACE, 0));
+}
+
+/*
+ * Make a directory file.
+ *
+ * Returns: 0 Success
+ * EEXIST
+ * namei:???
+ * vnode_authorize:???
+ * vn_create:???
+ */
+/* ARGSUSED */
+static int
+mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
+ enum uio_seg segflg)
+{
+ vnode_t vp, dvp;
+ int error;
+ int update_flags = 0;
+ int batched;
+ struct nameidata nd;
+
+ AUDIT_ARG(mode, vap->va_mode);
+ NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
+ path, ctx);
+ nd.ni_cnd.cn_flags |= WILLBEDIR;
+ nd.ni_flag = NAMEI_COMPOUNDMKDIR;
+
+continue_lookup:
+ error = nameiat(&nd, fd);
+ if (error)
+ return (error);
+ dvp = nd.ni_dvp;
+ vp = nd.ni_vp;
+
+ if (vp != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+
+ batched = vnode_compound_mkdir_available(dvp);
+
+ VATTR_SET(vap, va_type, VDIR);
+
+ /*
+ * XXX
+ * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
+ * only get EXISTS or EISDIR for existing path components, and not that it could see
+ * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
+ * it will fail in a spurious manner. Need to figure out if this is valid behavior.
+ */
+ if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
+ if (error == EACCES || error == EPERM) {
+ int error2;
+
+ nameidone(&nd);
+ vnode_put(dvp);
+ dvp = NULLVP;
+
+ /*
+ * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
+ * rather than EACCESS if the target exists.
+ */
+ NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
+ path, ctx);
+ error2 = nameiat(&nd, fd);
+ if (error2) {
+ goto out;
+ } else {
+ vp = nd.ni_vp;
+ error = EEXIST;
+ goto out;
+ }
+ }
+
+ goto out;
+ }
+
+ /*
+ * make the directory
+ */
+ if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
+ if (error == EKEEPLOOKING) {
+ nd.ni_vp = vp;
+ goto continue_lookup;
+ }
+
+ goto out;
+ }
+
+ // Make sure the name & parent pointers are hooked up
+ if (vp->v_name == NULL)
+ update_flags |= VNODE_UPDATE_NAME;
+ if (vp->v_parent == NULLVP)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ if (update_flags)
+ vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
+
+#if CONFIG_FSE
+ add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
+#endif
+
+out:
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+
+ if (vp)
+ vnode_put(vp);
+ if (dvp)
+ vnode_put(dvp);
+
+ return (error);
+}
+
+/*
+ * mkdir_extended: Create a directory; with extended security (ACL).
+ *
+ * Parameters: p Process requesting to create the directory
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->path Path of directory to create
+ * uap->mode Access permissions to set
+ * uap->xsecurity ACL to set
+ *
+ * Returns: 0 Success
+ * !0 Not success
+ *
+ */
+int
+mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
+{
+ int ciferror;
+ kauth_filesec_t xsecdst;
+ struct vnode_attr va;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+
+ xsecdst = NULL;
+ if ((uap->xsecurity != USER_ADDR_NULL) &&
+ ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
+ return ciferror;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
+ if (xsecdst != NULL)
+ VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+
+ ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
+ UIO_USERSPACE);
+ if (xsecdst != NULL)
+ kauth_filesec_free(xsecdst);
+ return ciferror;
+}
+
+int
+mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
+
+ return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
+ UIO_USERSPACE));
+}
+
+int
+mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
+
+ return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
+ UIO_USERSPACE));
+}
+
+static int
+rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
+ enum uio_seg segflg)
+{
+ vnode_t vp, dvp;
+ int error;
+ struct nameidata nd;
+ char *path = NULL;
+ int len=0;
+ int has_listeners = 0;
+ int need_event = 0;
+ int truncated = 0;
+#if CONFIG_FSE
+ struct vnode_attr va;
+#endif /* CONFIG_FSE */
+ struct vnode_attr *vap = NULL;
+ int restart_count = 0;
+ int batched;
+
+ int restart_flag;
+
+ /*
+ * This loop exists to restart rmdir in the unlikely case that two
+ * processes are simultaneously trying to remove the same directory
+ * containing orphaned appleDouble files.
+ */
+ do {
+ NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
+ segflg, dirpath, ctx);
+ nd.ni_flag = NAMEI_COMPOUNDRMDIR;
+continue_lookup:
+ restart_flag = 0;
+ vap = NULL;
+
+ error = nameiat(&nd, fd);
+ if (error)
+ return (error);
+
+ dvp = nd.ni_dvp;
+ vp = nd.ni_vp;
+
+ if (vp) {
+ batched = vnode_compound_rmdir_available(vp);
+
+ if (vp->v_flag & VROOT) {
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ error = EBUSY;
+ goto out;
+ }
+
+#if DEVELOPMENT || DEBUG
+ /*
+ * XXX VSWAP: Check for entitlements or special flag here
+ * so we can restrict access appropriately.
+ */
+#else /* DEVELOPMENT || DEBUG */
+
+ if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
+ error = EPERM;
+ goto out;
+ }
+#endif /* DEVELOPMENT || DEBUG */
+
+ /*
+ * Removed a check here; we used to abort if vp's vid
+ * was not the same as what we'd seen the last time around.
+ * I do not think that check was valid, because if we retry
+ * and all dirents are gone, the directory could legitimately
+ * be recycled but still be present in a situation where we would
+ * have had permission to delete. Therefore, we won't make
+ * an effort to preserve that check now that we may not have a
+ * vp here.
+ */
+
+ if (!batched) {
+ error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
+ if (error) {
+ if (error == ENOENT) {
+ assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ restart_flag = 1;
+ restart_count += 1;
+ }
+ }
+ goto out;
+ }
+ }
+ } else {
+ batched = 1;
+
+ if (!vnode_compound_rmdir_available(dvp)) {
+ panic("No error, but no compound rmdir?");
+ }
+ }
+
+#if CONFIG_FSE
+ fse_info finfo;
+
+ need_event = need_fsevent(FSE_DELETE, dvp);
+ if (need_event) {
+ if (!batched) {
+ get_fse_info(vp, &finfo, ctx);
+ } else {
+ error = vfs_get_notify_attributes(&va);
+ if (error) {
+ goto out;
+ }
+
+ vap = &va;
+ }
+ }
+#endif
+ has_listeners = kauth_authorize_fileop_has_listeners();
+ if (need_event || has_listeners) {
+ if (path == NULL) {
+ GET_PATH(path);
+ if (path == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ }
+
+ len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
+#if CONFIG_FSE
+ if (truncated) {
+ finfo.mode |= FSE_TRUNCATED_PATH;
+ }
+#endif
+ }
+
+ error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
+ nd.ni_vp = vp;
+ if (vp == NULLVP) {
+ /* Couldn't find a vnode */
+ goto out;
+ }
+
+ if (error == EKEEPLOOKING) {
+ goto continue_lookup;
+ } else if (batched && error == ENOENT) {
+ assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ /*
+ * For compound VNOPs, the authorization callback
+ * may return ENOENT in case of racing hard link lookups
+ * redrive the lookup.
+ */
+ restart_flag = 1;
+ restart_count += 1;
+ goto out;
+ }
+ }
+#if CONFIG_APPLEDOUBLE
+ /*
+ * Special case to remove orphaned AppleDouble
+ * files. I don't like putting this in the kernel,
+ * but carbon does not like putting this in carbon either,
+ * so here we are.
+ */
+ if (error == ENOTEMPTY) {
+ error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
+ if (error == EBUSY) {
+ goto out;
+ }
+
+
+ /*
+ * Assuming everything went well, we will try the RMDIR again
+ */
+ if (!error)
+ error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
+ }
+#endif /* CONFIG_APPLEDOUBLE */
+ /*
+ * Call out to allow 3rd party notification of delete.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ if (!error) {
+ if (has_listeners) {
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_DELETE,
+ (uintptr_t)vp,
+ (uintptr_t)path);
+ }
+
+ if (vp->v_flag & VISHARDLINK) {
+ // see the comment in unlink1() about why we update
+ // the parent of a hard link when it is removed
+ vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
+ }
+
+#if CONFIG_FSE
+ if (need_event) {
+ if (vap) {
+ vnode_get_fse_info_from_vap(vp, &finfo, vap);
+ }
+ add_fsevent(FSE_DELETE, ctx,
+ FSE_ARG_STRING, len, path,
+ FSE_ARG_FINFO, &finfo,
+ FSE_ARG_DONE);
+ }
+#endif
+ }
+
+out:
+ if (path != NULL) {
+ RELEASE_PATH(path);
+ path = NULL;
+ }
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+ vnode_put(dvp);
+
+ if (vp)
+ vnode_put(vp);
+
+ if (restart_flag == 0) {
+ wakeup_one((caddr_t)vp);
+ return (error);
+ }
+ tsleep(vp, PVFS, "rm AD", 1);
+
+ } while (restart_flag != 0);
+
+ return (error);
+
+}
+
+/*
+ * Remove a directory file.
+ */
+/* ARGSUSED */
+int
+rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
+{
+ return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
+ CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
+}
+
+/* Get direntry length padded to 8 byte alignment */
+#define DIRENT64_LEN(namlen) \
+ ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
+
+/* Get dirent length padded to 4 byte alignment */
+#define DIRENT_LEN(namelen) \
+ ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
+
+/* Get the end of this dirent */
+#define DIRENT_END(dep) \
+ (((char *)(dep)) + (dep)->d_reclen - 1)
+
+errno_t
+vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
+ int *numdirent, vfs_context_t ctxp)
+{
+ /* Check if fs natively supports VNODE_READDIR_EXTENDED */
+ if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
+ ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
+ return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
+ } else {
+ size_t bufsize;
+ void * bufptr;
+ uio_t auio;
+ struct direntry *entry64;
+ struct dirent *dep;
+ int bytesread;
+ int error;
+
+ /*
+ * We're here because the underlying file system does not
+ * support direnties or we mounted denying support so we must
+ * fall back to dirents and convert them to direntries.
+ *
+ * Our kernel buffer needs to be smaller since re-packing will
+ * expand each dirent. The worse case (when the name length
+ * is 3 or less) corresponds to a struct direntry size of 32
+ * bytes (8-byte aligned) and a struct dirent size of 12 bytes
+ * (4-byte aligned). So having a buffer that is 3/8 the size
+ * will prevent us from reading more than we can pack.
+ *
+ * Since this buffer is wired memory, we will limit the
+ * buffer size to a maximum of 32K. We would really like to
+ * use 32K in the MIN(), but we use magic number 87371 to
+ * prevent uio_resid() * 3 / 8 from overflowing.
+ */
+ bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
+ MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
+ if (bufptr == NULL) {
+ return ENOMEM;
+ }
+
+ auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
+ uio_addiov(auio, (uintptr_t)bufptr, bufsize);
+ auio->uio_offset = uio->uio_offset;
+
+ error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
+
+ dep = (struct dirent *)bufptr;
+ bytesread = bufsize - uio_resid(auio);
+
+ MALLOC(entry64, struct direntry *, sizeof(struct direntry),
+ M_TEMP, M_WAITOK);
+ /*
+ * Convert all the entries and copy them out to user's buffer.
+ */
+ while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
+ size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
+
+ if (DIRENT_END(dep) > ((char *)bufptr + bytesread) ||
+ DIRENT_LEN(dep->d_namlen) > dep->d_reclen) {
+ printf("%s: %s: Bad dirent recived from directory %s\n", __func__,
+ vp->v_mount->mnt_vfsstat.f_mntonname,
+ vp->v_name ? vp->v_name : "<unknown>");
+ error = EIO;
+ break;
+ }
+
+ bzero(entry64, enbufsize);
+ /* Convert a dirent to a dirent64. */
+ entry64->d_ino = dep->d_ino;
+ entry64->d_seekoff = 0;
+ entry64->d_reclen = enbufsize;
+ entry64->d_namlen = dep->d_namlen;
+ entry64->d_type = dep->d_type;
+ bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
+
+ /* Move to next entry. */
+ dep = (struct dirent *)((char *)dep + dep->d_reclen);
+
+ /* Copy entry64 to user's buffer. */
+ error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
+ }
+
+ /* Update the real offset using the offset we got from VNOP_READDIR. */
+ if (error == 0) {
+ uio->uio_offset = auio->uio_offset;
+ }
+ uio_free(auio);
+ FREE(bufptr, M_TEMP);
+ FREE(entry64, M_TEMP);
+ return (error);
+ }
+}
+
+#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
+
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+static int
+getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
+ off_t *offset, int flags)
+{
+ vnode_t vp;
+ struct vfs_context context = *vfs_context_current(); /* local copy */
+ struct fileproc *fp;
+ uio_t auio;
+ int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ off_t loff;
+ int error, eofflag, numdirent;
+ char uio_buf[ UIO_SIZEOF(1) ];
+
+ error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
+ if (error) {
+ return (error);
+ }
+ if ((fp->f_fglob->fg_flag & FREAD) == 0) {
+ AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
+ error = EBADF;
+ goto out;
+ }
+
+ if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
+ bufsize = GETDIRENTRIES_MAXBUFSIZE;
+
+#if CONFIG_MACF
+ error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
+ if (error)
+ goto out;
+#endif
+ if ( (error = vnode_getwithref(vp)) ) {
+ goto out;
+ }
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+unionread:
+ if (vp->v_type != VDIR) {
+ (void)vnode_put(vp);
+ error = EINVAL;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_readdir(&context, vp);
+ if (error != 0) {
+ (void)vnode_put(vp);
+ goto out;
+ }
+#endif /* MAC */
+
+ loff = fp->f_fglob->fg_offset;
+ auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, bufp, bufsize);
+
+ if (flags & VNODE_READDIR_EXTENDED) {
+ error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
+ fp->f_fglob->fg_offset = uio_offset(auio);
+ } else {
+ error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
+ fp->f_fglob->fg_offset = uio_offset(auio);
+ }
+ if (error) {
+ (void)vnode_put(vp);
+ goto out;
+ }
+
+ if ((user_ssize_t)bufsize == uio_resid(auio)){
+ if (union_dircheckp) {
+ error = union_dircheckp(&vp, fp, &context);
+ if (error == -1)
+ goto unionread;
+ if (error) {
+ (void)vnode_put(vp);
+ goto out;
+ }
+ }
+
+ if ((vp->v_mount->mnt_flag & MNT_UNION)) {
+ struct vnode *tvp = vp;
+ if (lookup_traverse_union(tvp, &vp, &context) == 0) {
+ vnode_ref(vp);
+ fp->f_fglob->fg_data = (caddr_t) vp;
+ fp->f_fglob->fg_offset = 0;
+ vnode_rele(tvp);
+ vnode_put(tvp);
+ goto unionread;
+ }
+ vp = tvp;
+ }
+ }
+
+ vnode_put(vp);
+ if (offset) {
+ *offset = loff;
+ }
+
+ *bytesread = bufsize - uio_resid(auio);
+out:
+ file_drop(fd);
+ return (error);
+}
+
+
+int
+getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
+{
+ off_t offset;
+ ssize_t bytesread;
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+ error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
+
+ if (error == 0) {
+ if (proc_is64bit(p)) {
+ user64_long_t base = (user64_long_t)offset;
+ error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
+ } else {
+ user32_long_t base = (user32_long_t)offset;
+ error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
+ }
+ *retval = bytesread;
+ }
+ return (error);
+}
+
+int
+getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
+{
+ off_t offset;
+ ssize_t bytesread;
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+ error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
+
+ if (error == 0) {
+ *retval = bytesread;
+ error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
+ }
+ return (error);
+}
+
+
+/*
+ * Set the mode mask for creation of filesystem nodes.
+ * XXX implement xsecurity
+ */
+#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
+static int
+umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
+{
+ struct filedesc *fdp;
+
+ AUDIT_ARG(mask, newmask);
+ proc_fdlock(p);
+ fdp = p->p_fd;
+ *retval = fdp->fd_cmask;
+ fdp->fd_cmask = newmask & ALLPERMS;
+ proc_fdunlock(p);
+ return (0);
+}
+
+/*
+ * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
+ *
+ * Parameters: p Process requesting to set the umask
+ * uap User argument descriptor (see below)
+ * retval umask of the process (parameter p)
+ *
+ * Indirect: uap->newmask umask to set
+ * uap->xsecurity ACL to set
+ *
+ * Returns: 0 Success
+ * !0 Not success
+ *
+ */
+int
+umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
+{
+ int ciferror;
+ kauth_filesec_t xsecdst;
+
+ xsecdst = KAUTH_FILESEC_NONE;
+ if (uap->xsecurity != USER_ADDR_NULL) {
+ if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
+ return ciferror;
+ } else {
+ xsecdst = KAUTH_FILESEC_NONE;
+ }
+
+ ciferror = umask1(p, uap->newmask, xsecdst, retval);
+
+ if (xsecdst != KAUTH_FILESEC_NONE)
+ kauth_filesec_free(xsecdst);
+ return ciferror;
+}
+
+int
+umask(proc_t p, struct umask_args *uap, int32_t *retval)
+{
+ return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
+}
+
+/*
+ * Void all references to file by ripping underlying filesystem
+ * away from vnode.
+ */
+/* ARGSUSED */
+int
+revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp;
+ struct vnode_attr va;
+ vfs_context_t ctx = vfs_context_current();
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+ uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+
+ nameidone(&nd);
+
+ if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
+ error = ENOTSUP;
+ goto out;
+ }
+
+ if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
+ error = EBUSY;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_revoke(ctx, vp);
+ if (error)
+ goto out;
+#endif
+
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_uid);
+ if ((error = vnode_getattr(vp, &va, ctx)))
+ goto out;
+ if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
+ (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
+ goto out;
+ if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
+ VNOP_REVOKE(vp, REVOKEALL, ctx);
+out:
+ vnode_put(vp);
+ return (error);
+}
+
+
+/*
+ * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
+ * The following system calls are designed to support features
+ * which are specific to the HFS & HFS Plus volume formats
+ */
+
+
+/*
+ * Obtain attribute information on objects in a directory while enumerating
+ * the directory.
+ */
+/* ARGSUSED */
+int
+getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
+{
+ vnode_t vp;
+ struct fileproc *fp;
+ uio_t auio = NULL;
+ int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ uint32_t count = 0, savecount = 0;
+ uint32_t newstate = 0;
+ int error, eofflag;
+ uint32_t loff = 0;
+ struct attrlist attributelist;
+ vfs_context_t ctx = vfs_context_current();
+ int fd = uap->fd;
+ char uio_buf[ UIO_SIZEOF(1) ];
+ kauth_action_t action;
+
+ AUDIT_ARG(fd, fd);
+
+ /* Get the attributes into kernel space */
+ if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
+ return(error);
+ }
+ if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
+ return(error);
+ }
+ savecount = count;
+ if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
+ return (error);
+ }
+ if ((fp->f_fglob->fg_flag & FREAD) == 0) {
+ AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
+ error = EBADF;
+ goto out;
+ }
+
+
+#if CONFIG_MACF
+ error = mac_file_check_change_offset(vfs_context_ucred(ctx),
+ fp->f_fglob);
+ if (error)
+ goto out;
+#endif
+
+
+ if ( (error = vnode_getwithref(vp)) )
+ goto out;
+
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+unionread:
+ if (vp->v_type != VDIR) {
+ (void)vnode_put(vp);
+ error = EINVAL;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_readdir(ctx, vp);
+ if (error != 0) {
+ (void)vnode_put(vp);
+ goto out;
+ }
+#endif /* MAC */
+
+ /* set up the uio structure which will contain the users return buffer */
+ loff = fp->f_fglob->fg_offset;
+ auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, uap->buffer, uap->buffersize);
+
+ /*
+ * If the only item requested is file names, we can let that past with
+ * just LIST_DIRECTORY. If they want any other attributes, that means
+ * they need SEARCH as well.
+ */
+ action = KAUTH_VNODE_LIST_DIRECTORY;
+ if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
+ attributelist.fileattr || attributelist.dirattr)
+ action |= KAUTH_VNODE_SEARCH;
+
+ if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
+
+ /* Believe it or not, uap->options only has 32-bits of valid
+ * info, so truncate before extending again */
+
+ error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
+ (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
+ }
+
+ if (error) {
+ (void) vnode_put(vp);
+ goto out;
+ }
+
+ /*
+ * If we've got the last entry of a directory in a union mount
+ * then reset the eofflag and pretend there's still more to come.
+ * The next call will again set eofflag and the buffer will be empty,
+ * so traverse to the underlying directory and do the directory
+ * read there.
+ */
+ if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
+ if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
+ eofflag = 0;
+ } else { // Empty buffer
+ struct vnode *tvp = vp;
+ if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
+ vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
+ fp->f_fglob->fg_data = (caddr_t) vp;
+ fp->f_fglob->fg_offset = 0; // reset index for new dir
+ count = savecount;
+ vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
+ vnode_put(tvp);
+ goto unionread;
+ }
+ vp = tvp;
+ }
+ }
+
+ (void)vnode_put(vp);
+
+ if (error)
+ goto out;
+ fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
+
+ if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
+ goto out;
+ if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
+ goto out;
+ if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
+ goto out;
+
+ *retval = eofflag; /* similar to getdirentries */
+ error = 0;
+out:
+ file_drop(fd);
+ return (error); /* return error earlier, an retval of 0 or 1 now */
+
+} /* end of getdirentriesattr system call */
+
+/*
+* Exchange data between two files
+*/
+
+/* ARGSUSED */
+int
+exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
+{
+
+ struct nameidata fnd, snd;
+ vfs_context_t ctx = vfs_context_current();
+ vnode_t fvp;
+ vnode_t svp;
+ int error;
+ u_int32_t nameiflags;
+ char *fpath = NULL;
+ char *spath = NULL;
+ int flen=0, slen=0;
+ int from_truncated=0, to_truncated=0;
+#if CONFIG_FSE
+ fse_info f_finfo, s_finfo;
+#endif
+
+ nameiflags = 0;
+ if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
+
+ NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path1, ctx);
+
+ error = namei(&fnd);
+ if (error)
+ goto out2;
+
+ nameidone(&fnd);
+ fvp = fnd.ni_vp;
+
+ NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
+ UIO_USERSPACE, uap->path2, ctx);
+
+ error = namei(&snd);
+ if (error) {
+ vnode_put(fvp);
+ goto out2;
+ }
+ nameidone(&snd);
+ svp = snd.ni_vp;
+
+ /*
+ * if the files are the same, return an inval error
+ */
+ if (svp == fvp) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * if the files are on different volumes, return an error
+ */
+ if (svp->v_mount != fvp->v_mount) {
+ error = EXDEV;
+ goto out;
+ }
+
+ /* If they're not files, return an error */
+ if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
+ error = EINVAL;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_exchangedata(ctx,
+ fvp, svp);
+ if (error)
+ goto out;
+#endif
+ if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
+ ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
+ goto out;
+
+ if (
+#if CONFIG_FSE
+ need_fsevent(FSE_EXCHANGE, fvp) ||
+#endif
+ kauth_authorize_fileop_has_listeners()) {
+ GET_PATH(fpath);
+ GET_PATH(spath);
+ if (fpath == NULL || spath == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+
+ flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
+ slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
+
+#if CONFIG_FSE
+ get_fse_info(fvp, &f_finfo, ctx);
+ get_fse_info(svp, &s_finfo, ctx);
+ if (from_truncated || to_truncated) {
+ // set it here since only the f_finfo gets reported up to user space
+ f_finfo.mode |= FSE_TRUNCATED_PATH;