+ AUDIT_ARG(fd, uap->fd);
+
+ if ( (error = file_vnode(uap->fd, &vp)) )
+ return (error);
+
+ error = vnode_getwithref(vp);
+ if (error) {
+ file_drop(uap->fd);
+ return (error);
+ }
+
+ AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
+
+ mp = vp->v_mount;
+ if (!mp) {
+ error = EBADF;
+ goto out;
+ }
+ sp = &mp->mnt_vfsstat;
+ if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
+ goto out;
+ }
+
+ error = statfs64_common(mp, sp, uap->buf);
+
+out:
+ file_drop(uap->fd);
+ vnode_put(vp);
+
+ return (error);
+}
+
+struct getfsstat_struct {
+ user_addr_t sfsp;
+ user_addr_t *mp;
+ int count;
+ int maxcount;
+ int flags;
+ int error;
+};
+
+
+static int
+getfsstat_callback(mount_t mp, void * arg)
+{
+
+ struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
+ struct vfsstatfs *sp;
+ int error, my_size;
+ vfs_context_t ctx = vfs_context_current();
+
+ if (fstp->sfsp && fstp->count < fstp->maxcount) {
+ sp = &mp->mnt_vfsstat;
+ /*
+ * If MNT_NOWAIT is specified, do not refresh the
+ * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
+ */
+ if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
+ (error = vfs_update_vfsstat(mp, ctx,
+ VFS_USER_EVENT))) {
+ KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
+ return(VFS_RETURNED);
+ }
+
+ /*
+ * Need to handle LP64 version of struct statfs
+ */
+ error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
+ if (error) {
+ fstp->error = error;
+ return(VFS_RETURNED_DONE);
+ }
+ fstp->sfsp += my_size;
+
+ if (fstp->mp) {
+#if CONFIG_MACF
+ error = mac_mount_label_get(mp, *fstp->mp);
+ if (error) {
+ fstp->error = error;
+ return(VFS_RETURNED_DONE);
+ }
+#endif
+ fstp->mp++;
+ }
+ }
+ fstp->count++;
+ return(VFS_RETURNED);
+}
+
+/*
+ * Get statistics on all filesystems.
+ */
+int
+getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
+{
+ struct __mac_getfsstat_args muap;
+
+ muap.buf = uap->buf;
+ muap.bufsize = uap->bufsize;
+ muap.mac = USER_ADDR_NULL;
+ muap.macsize = 0;
+ muap.flags = uap->flags;
+
+ return (__mac_getfsstat(p, &muap, retval));
+}
+
+/*
+ * __mac_getfsstat: Get MAC-related file system statistics
+ *
+ * Parameters: p (ignored)
+ * uap User argument descriptor (see below)
+ * retval Count of file system statistics (N stats)
+ *
+ * Indirect: uap->bufsize Buffer size
+ * uap->macsize MAC info size
+ * uap->buf Buffer where information will be returned
+ * uap->mac MAC info
+ * uap->flags File system flags
+ *
+ *
+ * Returns: 0 Success
+ * !0 Not success
+ *
+ */
+int
+__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
+{
+ user_addr_t sfsp;
+ user_addr_t *mp;
+ size_t count, maxcount, bufsize, macsize;
+ struct getfsstat_struct fst;
+
+ bufsize = (size_t) uap->bufsize;
+ macsize = (size_t) uap->macsize;
+
+ if (IS_64BIT_PROCESS(p)) {
+ maxcount = bufsize / sizeof(struct user64_statfs);
+ }
+ else {
+ maxcount = bufsize / sizeof(struct user32_statfs);
+ }
+ sfsp = uap->buf;
+ count = 0;
+
+ mp = NULL;
+
+#if CONFIG_MACF
+ if (uap->mac != USER_ADDR_NULL) {
+ u_int32_t *mp0;
+ int error;
+ unsigned int i;
+
+ count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
+ if (count != maxcount)
+ return (EINVAL);
+
+ /* Copy in the array */
+ MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
+ if (mp0 == NULL) {
+ return (ENOMEM);
+ }
+
+ error = copyin(uap->mac, mp0, macsize);
+ if (error) {
+ FREE(mp0, M_MACTEMP);
+ return (error);
+ }
+
+ /* Normalize to an array of user_addr_t */
+ MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
+ if (mp == NULL) {
+ FREE(mp0, M_MACTEMP);
+ return (ENOMEM);
+ }
+
+ for (i = 0; i < count; i++) {
+ if (IS_64BIT_PROCESS(p))
+ mp[i] = ((user_addr_t *)mp0)[i];
+ else
+ mp[i] = (user_addr_t)mp0[i];
+ }
+ FREE(mp0, M_MACTEMP);
+ }
+#endif
+
+
+ fst.sfsp = sfsp;
+ fst.mp = mp;
+ fst.flags = uap->flags;
+ fst.count = 0;
+ fst.error = 0;
+ fst.maxcount = maxcount;
+
+
+ vfs_iterate(0, getfsstat_callback, &fst);
+
+ if (mp)
+ FREE(mp, M_MACTEMP);
+
+ if (fst.error ) {
+ KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
+ return(fst.error);
+ }
+
+ if (fst.sfsp && fst.count > fst.maxcount)
+ *retval = fst.maxcount;
+ else
+ *retval = fst.count;
+ return (0);
+}
+
+static int
+getfsstat64_callback(mount_t mp, void * arg)
+{
+ struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
+ struct vfsstatfs *sp;
+ int error;
+
+ if (fstp->sfsp && fstp->count < fstp->maxcount) {
+ sp = &mp->mnt_vfsstat;
+ /*
+ * If MNT_NOWAIT is specified, do not refresh the fsstat
+ * cache. MNT_WAIT overrides MNT_NOWAIT.
+ *
+ * We treat MNT_DWAIT as MNT_WAIT for all instances of
+ * getfsstat, since the constants are out of the same
+ * namespace.
+ */
+ if (((fstp->flags & MNT_NOWAIT) == 0 ||
+ (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
+ (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
+ KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
+ return(VFS_RETURNED);
+ }
+
+ error = statfs64_common(mp, sp, fstp->sfsp);
+ if (error) {
+ fstp->error = error;
+ return(VFS_RETURNED_DONE);
+ }
+ fstp->sfsp += sizeof(struct statfs64);
+ }
+ fstp->count++;
+ return(VFS_RETURNED);
+}
+
+/*
+ * Get statistics on all file systems in 64 bit mode.
+ */
+int
+getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
+{
+ user_addr_t sfsp;
+ int count, maxcount;
+ struct getfsstat_struct fst;
+
+ maxcount = uap->bufsize / sizeof(struct statfs64);
+
+ sfsp = uap->buf;
+ count = 0;
+
+ fst.sfsp = sfsp;
+ fst.flags = uap->flags;
+ fst.count = 0;
+ fst.error = 0;
+ fst.maxcount = maxcount;
+
+ vfs_iterate(0, getfsstat64_callback, &fst);
+
+ if (fst.error ) {
+ KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
+ return(fst.error);
+ }
+
+ if (fst.sfsp && fst.count > fst.maxcount)
+ *retval = fst.maxcount;
+ else
+ *retval = fst.count;
+
+ return (0);
+}
+
+/*
+ * Change current working directory to a given file descriptor.
+ */
+/* ARGSUSED */
+static int
+common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
+{
+ struct filedesc *fdp = p->p_fd;
+ vnode_t vp;
+ vnode_t tdp;
+ vnode_t tvp;
+ struct mount *mp;
+ int error;
+ vfs_context_t ctx = vfs_context_current();
+
+ AUDIT_ARG(fd, uap->fd);
+ if (per_thread && uap->fd == -1) {
+ /*
+ * Switching back from per-thread to per process CWD; verify we
+ * in fact have one before proceeding. The only success case
+ * for this code path is to return 0 preemptively after zapping
+ * the thread structure contents.
+ */
+ thread_t th = vfs_context_thread(ctx);
+ if (th) {
+ uthread_t uth = get_bsdthread_info(th);
+ tvp = uth->uu_cdir;
+ uth->uu_cdir = NULLVP;
+ if (tvp != NULLVP) {
+ vnode_rele(tvp);
+ return (0);
+ }
+ }
+ return (EBADF);
+ }
+
+ if ( (error = file_vnode(uap->fd, &vp)) )
+ return(error);
+ if ( (error = vnode_getwithref(vp)) ) {
+ file_drop(uap->fd);
+ return(error);
+ }
+
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+ if (vp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_chdir(ctx, vp);
+ if (error)
+ goto out;
+#endif
+ error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
+ if (error)
+ goto out;
+
+ while (!error && (mp = vp->v_mountedhere) != NULL) {
+ if (vfs_busy(mp, LK_NOWAIT)) {
+ error = EACCES;
+ goto out;
+ }
+ error = VFS_ROOT(mp, &tdp, ctx);
+ vfs_unbusy(mp);
+ if (error)
+ break;
+ vnode_put(vp);
+ vp = tdp;
+ }
+ if (error)
+ goto out;
+ if ( (error = vnode_ref(vp)) )
+ goto out;
+ vnode_put(vp);
+
+ if (per_thread) {
+ thread_t th = vfs_context_thread(ctx);
+ if (th) {
+ uthread_t uth = get_bsdthread_info(th);
+ tvp = uth->uu_cdir;
+ uth->uu_cdir = vp;
+ OSBitOrAtomic(P_THCWD, &p->p_flag);
+ } else {
+ vnode_rele(vp);
+ return (ENOENT);
+ }
+ } else {
+ proc_fdlock(p);
+ tvp = fdp->fd_cdir;
+ fdp->fd_cdir = vp;
+ proc_fdunlock(p);
+ }
+
+ if (tvp)
+ vnode_rele(tvp);
+ file_drop(uap->fd);
+
+ return (0);
+out:
+ vnode_put(vp);
+ file_drop(uap->fd);
+
+ return(error);
+}
+
+int
+fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
+{
+ return common_fchdir(p, uap, 0);
+}
+
+int
+__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
+{
+ return common_fchdir(p, (void *)uap, 1);
+}
+
+/*
+ * Change current working directory (".").
+ *
+ * Returns: 0 Success
+ * change_dir:ENOTDIR
+ * change_dir:???
+ * vnode_ref:ENOENT No such file or directory
+ */
+/* ARGSUSED */
+static int
+common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
+{
+ struct filedesc *fdp = p->p_fd;
+ int error;
+ struct nameidata nd;
+ vnode_t tvp;
+ vfs_context_t ctx = vfs_context_current();
+
+ NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = change_dir(&nd, ctx);
+ if (error)
+ return (error);
+ if ( (error = vnode_ref(nd.ni_vp)) ) {
+ vnode_put(nd.ni_vp);
+ return (error);
+ }
+ /*
+ * drop the iocount we picked up in change_dir
+ */
+ vnode_put(nd.ni_vp);
+
+ if (per_thread) {
+ thread_t th = vfs_context_thread(ctx);
+ if (th) {
+ uthread_t uth = get_bsdthread_info(th);
+ tvp = uth->uu_cdir;
+ uth->uu_cdir = nd.ni_vp;
+ OSBitOrAtomic(P_THCWD, &p->p_flag);
+ } else {
+ vnode_rele(nd.ni_vp);
+ return (ENOENT);
+ }
+ } else {
+ proc_fdlock(p);
+ tvp = fdp->fd_cdir;
+ fdp->fd_cdir = nd.ni_vp;
+ proc_fdunlock(p);
+ }
+
+ if (tvp)
+ vnode_rele(tvp);
+
+ return (0);
+}
+
+
+/*
+ * chdir
+ *
+ * Change current working directory (".") for the entire process
+ *
+ * Parameters: p Process requesting the call
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect parameters: uap->path Directory path
+ *
+ * Returns: 0 Success
+ * common_chdir: ENOTDIR
+ * common_chdir: ENOENT No such file or directory
+ * common_chdir: ???
+ *
+ */
+int
+chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
+{
+ return common_chdir(p, (void *)uap, 0);
+}
+
+/*
+ * __pthread_chdir
+ *
+ * Change current working directory (".") for a single thread
+ *
+ * Parameters: p Process requesting the call
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect parameters: uap->path Directory path
+ *
+ * Returns: 0 Success
+ * common_chdir: ENOTDIR
+ * common_chdir: ENOENT No such file or directory
+ * common_chdir: ???
+ *
+ */
+int
+__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
+{
+ return common_chdir(p, (void *)uap, 1);
+}
+
+
+/*
+ * Change notion of root (``/'') directory.
+ */
+/* ARGSUSED */
+int
+chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
+{
+ struct filedesc *fdp = p->p_fd;
+ int error;
+ struct nameidata nd;
+ vnode_t tvp;
+ vfs_context_t ctx = vfs_context_current();
+
+ if ((error = suser(kauth_cred_get(), &p->p_acflag)))
+ return (error);
+
+ NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = change_dir(&nd, ctx);
+ if (error)
+ return (error);
+
+#if CONFIG_MACF
+ error = mac_vnode_check_chroot(ctx, nd.ni_vp,
+ &nd.ni_cnd);
+ if (error) {
+ vnode_put(nd.ni_vp);
+ return (error);
+ }
+#endif
+
+ if ( (error = vnode_ref(nd.ni_vp)) ) {
+ vnode_put(nd.ni_vp);
+ return (error);
+ }
+ vnode_put(nd.ni_vp);
+
+ proc_fdlock(p);
+ tvp = fdp->fd_rdir;
+ fdp->fd_rdir = nd.ni_vp;
+ fdp->fd_flags |= FD_CHROOT;
+ proc_fdunlock(p);
+
+ if (tvp != NULL)
+ vnode_rele(tvp);
+
+ return (0);
+}
+
+/*
+ * Common routine for chroot and chdir.
+ *
+ * Returns: 0 Success
+ * ENOTDIR Not a directory
+ * namei:??? [anything namei can return]
+ * vnode_authorize:??? [anything vnode_authorize can return]
+ */
+static int
+change_dir(struct nameidata *ndp, vfs_context_t ctx)
+{
+ vnode_t vp;
+ int error;
+
+ if ((error = namei(ndp)))
+ return (error);
+ nameidone(ndp);
+ vp = ndp->ni_vp;
+
+ if (vp->v_type != VDIR) {
+ vnode_put(vp);
+ return (ENOTDIR);
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_chdir(ctx, vp);
+ if (error) {
+ vnode_put(vp);
+ return (error);
+ }
+#endif
+
+ error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
+ if (error) {
+ vnode_put(vp);
+ return (error);
+ }
+
+ return (error);
+}
+
+/*
+ * Check permissions, allocate an open file structure,
+ * and call the device open routine if any.
+ *
+ * Returns: 0 Success
+ * EINVAL
+ * EINTR
+ * falloc:ENFILE
+ * falloc:EMFILE
+ * falloc:ENOMEM
+ * vn_open_auth:???
+ * dupfdopen:???
+ * VNOP_ADVLOCK:???
+ * vnode_setsize:???
+ *
+ * XXX Need to implement uid, gid
+ */
+int
+open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
+ struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
+ int32_t *retval)
+{
+ proc_t p = vfs_context_proc(ctx);
+ uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
+ struct fileproc *fp;
+ vnode_t vp;
+ int flags, oflags;
+ int type, indx, error;
+ struct flock lf;
+ int no_controlling_tty = 0;
+ int deny_controlling_tty = 0;
+ struct session *sessp = SESSION_NULL;
+
+ oflags = uflags;
+
+ if ((oflags & O_ACCMODE) == O_ACCMODE)
+ return(EINVAL);
+ flags = FFLAGS(uflags);
+
+ AUDIT_ARG(fflags, oflags);
+ AUDIT_ARG(mode, vap->va_mode);
+
+ if ((error = falloc_withalloc(p,
+ &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
+ return (error);
+ }
+ uu->uu_dupfd = -indx - 1;
+
+ if (!(p->p_flag & P_CONTROLT)) {
+ sessp = proc_session(p);
+ no_controlling_tty = 1;
+ /*
+ * If conditions would warrant getting a controlling tty if
+ * the device being opened is a tty (see ttyopen in tty.c),
+ * but the open flags deny it, set a flag in the session to
+ * prevent it.
+ */
+ if (SESS_LEADER(p, sessp) &&
+ sessp->s_ttyvp == NULL &&
+ (flags & O_NOCTTY)) {
+ session_lock(sessp);
+ sessp->s_flags |= S_NOCTTY;
+ session_unlock(sessp);
+ deny_controlling_tty = 1;
+ }
+ }
+
+ if ((error = vn_open_auth(ndp, &flags, vap))) {
+ if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
+ if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
+ fp_drop(p, indx, NULL, 0);
+ *retval = indx;
+ if (deny_controlling_tty) {
+ session_lock(sessp);
+ sessp->s_flags &= ~S_NOCTTY;
+ session_unlock(sessp);
+ }
+ if (sessp != SESSION_NULL)
+ session_rele(sessp);
+ return (0);
+ }
+ }
+ if (error == ERESTART)
+ error = EINTR;
+ fp_free(p, indx, fp);
+
+ if (deny_controlling_tty) {
+ session_lock(sessp);
+ sessp->s_flags &= ~S_NOCTTY;
+ session_unlock(sessp);
+ }
+ if (sessp != SESSION_NULL)
+ session_rele(sessp);
+ return (error);
+ }
+ uu->uu_dupfd = 0;
+ vp = ndp->ni_vp;
+
+ fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
+ fp->f_fglob->fg_ops = &vnops;
+ fp->f_fglob->fg_data = (caddr_t)vp;
+
+#if CONFIG_PROTECT
+ if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) {
+ if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) {
+ fp->f_fglob->fg_flag |= FENCRYPTED;
+ }
+ }
+#endif
+
+ if (flags & (O_EXLOCK | O_SHLOCK)) {
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ if (flags & O_EXLOCK)
+ lf.l_type = F_WRLCK;
+ else
+ lf.l_type = F_RDLCK;
+ type = F_FLOCK;
+ if ((flags & FNONBLOCK) == 0)
+ type |= F_WAIT;
+#if CONFIG_MACF
+ error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
+ F_SETLK, &lf);
+ if (error)
+ goto bad;
+#endif
+ if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
+ goto bad;
+ fp->f_fglob->fg_flag |= FHASLOCK;
+ }
+
+ /* try to truncate by setting the size attribute */
+ if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
+ goto bad;
+
+ /*
+ * If the open flags denied the acquisition of a controlling tty,
+ * clear the flag in the session structure that prevented the lower
+ * level code from assigning one.
+ */
+ if (deny_controlling_tty) {
+ session_lock(sessp);
+ sessp->s_flags &= ~S_NOCTTY;
+ session_unlock(sessp);
+ }
+
+ /*
+ * If a controlling tty was set by the tty line discipline, then we
+ * want to set the vp of the tty into the session structure. We have
+ * a race here because we can't get to the vp for the tp in ttyopen,
+ * because it's not passed as a parameter in the open path.
+ */
+ if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
+ vnode_t ttyvp;
+
+ /*
+ * We already have a ref from vn_open_auth(), so we can demand another reference.
+ */
+ error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
+ if (error != 0) {
+ panic("vnode_ref_ext() with VNODE_REF_FORCE failed?!");
+ }
+
+ session_lock(sessp);
+ ttyvp = sessp->s_ttyvp;
+ sessp->s_ttyvp = vp;
+ sessp->s_ttyvid = vnode_vid(vp);
+ session_unlock(sessp);
+ if (ttyvp != NULLVP)
+ vnode_rele(ttyvp);
+ }
+
+ vnode_put(vp);
+
+ proc_fdlock(p);
+ if (flags & O_CLOEXEC)
+ *fdflags(p, indx) |= UF_EXCLOSE;
+ if (flags & O_CLOFORK)
+ *fdflags(p, indx) |= UF_FORKCLOSE;
+ procfdtbl_releasefd(p, indx, NULL);
+ fp_drop(p, indx, fp, 1);
+ proc_fdunlock(p);
+
+ *retval = indx;
+
+ if (sessp != SESSION_NULL)
+ session_rele(sessp);
+ return (0);
+bad:
+ if (deny_controlling_tty) {
+ session_lock(sessp);
+ sessp->s_flags &= ~S_NOCTTY;
+ session_unlock(sessp);
+ }
+ if (sessp != SESSION_NULL)
+ session_rele(sessp);
+
+ struct vfs_context context = *vfs_context_current();
+ context.vc_ucred = fp->f_fglob->fg_cred;
+
+ vn_close(vp, fp->f_fglob->fg_flag, &context);
+ vnode_put(vp);
+ fp_free(p, indx, fp);
+
+ return (error);
+}
+
+/*
+ * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
+ *
+ * Parameters: p Process requesting the open
+ * uap User argument descriptor (see below)
+ * retval Pointer to an area to receive the
+ * return calue from the system call
+ *
+ * Indirect: uap->path Path to open (same as 'open')
+ * uap->flags Flags to open (same as 'open'
+ * uap->uid UID to set, if creating
+ * uap->gid GID to set, if creating
+ * uap->mode File mode, if creating (same as 'open')
+ * uap->xsecurity ACL to set, if creating
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
+ *
+ * XXX: We should enummerate the possible errno values here, and where
+ * in the code they originated.
+ */
+int
+open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
+{
+ struct filedesc *fdp = p->p_fd;
+ int ciferror;
+ kauth_filesec_t xsecdst;
+ struct vnode_attr va;
+ struct nameidata nd;
+ int cmode;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+
+ xsecdst = NULL;
+ if ((uap->xsecurity != USER_ADDR_NULL) &&
+ ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
+ return ciferror;
+
+ VATTR_INIT(&va);
+ cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
+ VATTR_SET(&va, va_mode, cmode);
+ if (uap->uid != KAUTH_UID_NONE)
+ VATTR_SET(&va, va_uid, uap->uid);
+ if (uap->gid != KAUTH_GID_NONE)
+ VATTR_SET(&va, va_gid, uap->gid);
+ if (xsecdst != NULL)
+ VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+
+ NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+ uap->path, vfs_context_current());
+
+ ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
+ fileproc_alloc_init, NULL, retval);
+ if (xsecdst != NULL)
+ kauth_filesec_free(xsecdst);
+
+ return ciferror;
+}
+
+/*
+ * Go through the data-protected atomically controlled open (2)
+ *
+ * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
+ */
+int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
+ int flags = uap->flags;
+ int class = uap->class;
+ int dpflags = uap->dpflags;
+
+ /*
+ * Follow the same path as normal open(2)
+ * Look up the item if it exists, and acquire the vnode.
+ */
+ struct filedesc *fdp = p->p_fd;
+ struct vnode_attr va;
+ struct nameidata nd;
+ int cmode;
+ int error;
+
+ VATTR_INIT(&va);
+ /* Mask off all but regular access permissions */
+ cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
+ VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
+
+ NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+ uap->path, vfs_context_current());
+
+ /*
+ * Initialize the extra fields in vnode_attr to pass down our
+ * extra fields.
+ * 1. target cprotect class.
+ * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
+ */
+ if (flags & O_CREAT) {
+ VATTR_SET(&va, va_dataprotect_class, class);
+ }
+
+ if (dpflags & O_DP_GETRAWENCRYPTED) {
+ if ( flags & (O_RDWR | O_WRONLY)) {
+ /* Not allowed to write raw encrypted bytes */
+ return EINVAL;
+ }
+ VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
+ }
+
+ error = open1(vfs_context_current(), &nd, uap->flags, &va,
+ fileproc_alloc_init, NULL, retval);
+
+ return error;
+}
+
+
+int
+open(proc_t p, struct open_args *uap, int32_t *retval)
+{
+ __pthread_testcancel(1);
+ return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
+}
+
+int
+open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval)
+{
+ struct filedesc *fdp = p->p_fd;
+ struct vnode_attr va;
+ struct nameidata nd;
+ int cmode;
+
+ VATTR_INIT(&va);
+ /* Mask off all but regular access permissions */
+ cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
+ VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
+
+ NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+ uap->path, vfs_context_current());
+
+ return (open1(vfs_context_current(), &nd, uap->flags, &va,
+ fileproc_alloc_init, NULL, retval));
+}
+
+
+/*
+ * Create a special file.
+ */
+static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
+
+int
+mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+ vfs_context_t ctx = vfs_context_current();
+ int error;
+ struct nameidata nd;
+ vnode_t vp, dvp;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
+ VATTR_SET(&va, va_rdev, uap->dev);
+
+ /* If it's a mknod() of a FIFO, call mkfifo1() instead */
+ if ((uap->mode & S_IFMT) == S_IFIFO)
+ return(mkfifo1(ctx, uap->path, &va));
+
+ AUDIT_ARG(mode, uap->mode);
+ AUDIT_ARG(value32, uap->dev);
+
+ if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
+ return (error);
+ NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ dvp = nd.ni_dvp;
+ vp = nd.ni_vp;
+
+ if (vp != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+
+ switch (uap->mode & S_IFMT) {
+ case S_IFMT: /* used by badsect to flag bad sectors */
+ VATTR_SET(&va, va_type, VBAD);
+ break;
+ case S_IFCHR:
+ VATTR_SET(&va, va_type, VCHR);
+ break;
+ case S_IFBLK:
+ VATTR_SET(&va, va_type, VBLK);
+ break;
+ default:
+ error = EINVAL;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_create(ctx,
+ nd.ni_dvp, &nd.ni_cnd, &va);
+ if (error)
+ goto out;
+#endif
+
+ if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
+ goto out;
+
+ if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
+ goto out;
+
+ if (vp) {
+ int update_flags = 0;
+
+ // Make sure the name & parent pointers are hooked up
+ if (vp->v_name == NULL)
+ update_flags |= VNODE_UPDATE_NAME;
+ if (vp->v_parent == NULLVP)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ if (update_flags)
+ vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
+
+#if CONFIG_FSE
+ add_fsevent(FSE_CREATE_FILE, ctx,
+ FSE_ARG_VNODE, vp,
+ FSE_ARG_DONE);
+#endif
+ }
+
+out:
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+
+ if (vp)
+ vnode_put(vp);
+ vnode_put(dvp);
+
+ return (error);
+}
+
+/*
+ * Create a named pipe.
+ *
+ * Returns: 0 Success
+ * EEXIST
+ * namei:???
+ * vnode_authorize:???
+ * vn_create:???
+ */
+static int
+mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
+{
+ vnode_t vp, dvp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
+ UIO_USERSPACE, upath, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ dvp = nd.ni_dvp;
+ vp = nd.ni_vp;
+
+ /* check that this is a new file and authorize addition */
+ if (vp != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+ VATTR_SET(vap, va_type, VFIFO);
+
+ if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
+ goto out;
+
+ error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
+out:
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+
+ if (vp)
+ vnode_put(vp);
+ vnode_put(dvp);
+
+ return error;
+}
+
+
+/*
+ * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
+ *
+ * Parameters: p Process requesting the open
+ * uap User argument descriptor (see below)
+ * retval (Ignored)
+ *
+ * Indirect: uap->path Path to fifo (same as 'mkfifo')
+ * uap->uid UID to set
+ * uap->gid GID to set
+ * uap->mode File mode to set (same as 'mkfifo')
+ * uap->xsecurity ACL to set, if creating
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
+ *
+ * XXX: We should enummerate the possible errno values here, and where
+ * in the code they originated.
+ */
+int
+mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
+{
+ int ciferror;
+ kauth_filesec_t xsecdst;
+ struct vnode_attr va;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+
+ xsecdst = KAUTH_FILESEC_NONE;
+ if (uap->xsecurity != USER_ADDR_NULL) {
+ if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
+ return ciferror;
+ }
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
+ if (uap->uid != KAUTH_UID_NONE)
+ VATTR_SET(&va, va_uid, uap->uid);
+ if (uap->gid != KAUTH_GID_NONE)
+ VATTR_SET(&va, va_gid, uap->gid);
+ if (xsecdst != KAUTH_FILESEC_NONE)
+ VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+
+ ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
+
+ if (xsecdst != KAUTH_FILESEC_NONE)
+ kauth_filesec_free(xsecdst);
+ return ciferror;
+}
+
+/* ARGSUSED */
+int
+mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
+
+ return(mkfifo1(vfs_context_current(), uap->path, &va));
+}
+
+
+static char *
+my_strrchr(char *p, int ch)
+{
+ char *save;
+
+ for (save = NULL;; ++p) {
+ if (*p == ch)
+ save = p;
+ if (!*p)
+ return(save);
+ }
+ /* NOTREACHED */
+}
+
+extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
+
+int
+safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
+{
+ int ret, len = _len;
+
+ *truncated_path = 0;
+ ret = vn_getpath(dvp, path, &len);
+ if (ret == 0 && len < (MAXPATHLEN - 1)) {
+ if (leafname) {
+ path[len-1] = '/';
+ len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
+ if (len > MAXPATHLEN) {
+ char *ptr;
+
+ // the string got truncated!
+ *truncated_path = 1;
+ ptr = my_strrchr(path, '/');
+ if (ptr) {
+ *ptr = '\0'; // chop off the string at the last directory component
+ }
+ len = strlen(path) + 1;
+ }
+ }
+ } else if (ret == 0) {
+ *truncated_path = 1;
+ } else if (ret != 0) {
+ struct vnode *mydvp=dvp;
+
+ if (ret != ENOSPC) {
+ printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
+ dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
+ }
+ *truncated_path = 1;
+
+ do {
+ if (mydvp->v_parent != NULL) {
+ mydvp = mydvp->v_parent;
+ } else if (mydvp->v_mount) {
+ strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
+ break;
+ } else {
+ // no parent and no mount point? only thing is to punt and say "/" changed
+ strlcpy(path, "/", _len);
+ len = 2;
+ mydvp = NULL;
+ }
+
+ if (mydvp == NULL) {
+ break;
+ }
+
+ len = _len;
+ ret = vn_getpath(mydvp, path, &len);
+ } while (ret == ENOSPC);
+ }
+
+ return len;
+}
+
+
+/*
+ * Make a hard file link.
+ *
+ * Returns: 0 Success
+ * EPERM
+ * EEXIST
+ * EXDEV
+ * namei:???
+ * vnode_authorize:???
+ * VNOP_LINK:???
+ */
+/* ARGSUSED */
+int
+link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp, dvp, lvp;
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+ int error;
+#if CONFIG_FSE
+ fse_info finfo;
+#endif
+ int need_event, has_listeners;
+ char *target_path = NULL;
+ int truncated=0;
+
+ vp = dvp = lvp = NULLVP;
+
+ /* look up the object we are linking to */
+ NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+
+ nameidone(&nd);
+
+ /*
+ * Normally, linking to directories is not supported.
+ * However, some file systems may have limited support.
+ */
+ if (vp->v_type == VDIR) {
+ if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
+ error = EPERM; /* POSIX */
+ goto out;
+ }
+ /* Linking to a directory requires ownership. */
+ if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
+ struct vnode_attr dva;
+
+ VATTR_INIT(&dva);
+ VATTR_WANTED(&dva, va_uid);
+ if (vnode_getattr(vp, &dva, ctx) != 0 ||
+ !VATTR_IS_SUPPORTED(&dva, va_uid) ||
+ (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
+ error = EACCES;
+ goto out;
+ }
+ }
+ }
+
+ /* lookup the target node */
+#if CONFIG_TRIGGERS
+ nd.ni_op = OP_LINK;
+#endif
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
+ nd.ni_dirp = uap->link;
+ error = namei(&nd);
+ if (error != 0)
+ goto out;
+ dvp = nd.ni_dvp;
+ lvp = nd.ni_vp;
+
+#if CONFIG_MACF
+ if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
+ goto out2;
+#endif
+
+ /* or to anything that kauth doesn't want us to (eg. immutable items) */
+ if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
+ goto out2;
+
+ /* target node must not exist */
+ if (lvp != NULLVP) {
+ error = EEXIST;
+ goto out2;
+ }
+ /* cannot link across mountpoints */
+ if (vnode_mount(vp) != vnode_mount(dvp)) {
+ error = EXDEV;
+ goto out2;
+ }
+
+ /* authorize creation of the target note */
+ if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
+ goto out2;
+
+ /* and finally make the link */
+ error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
+ if (error)
+ goto out2;
+
+#if CONFIG_MACF
+ (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
+#endif
+
+#if CONFIG_FSE
+ need_event = need_fsevent(FSE_CREATE_FILE, dvp);
+#else
+ need_event = 0;
+#endif
+ has_listeners = kauth_authorize_fileop_has_listeners();
+
+ if (need_event || has_listeners) {
+ char *link_to_path = NULL;
+ int len, link_name_len;
+
+ /* build the path to the new link file */
+ GET_PATH(target_path);
+ if (target_path == NULL) {
+ error = ENOMEM;
+ goto out2;
+ }
+
+ len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
+
+ if (has_listeners) {
+ /* build the path to file we are linking to */
+ GET_PATH(link_to_path);
+ if (link_to_path == NULL) {
+ error = ENOMEM;
+ goto out2;
+ }
+
+ link_name_len = MAXPATHLEN;
+ vn_getpath(vp, link_to_path, &link_name_len);
+
+ /*
+ * Call out to allow 3rd party notification of rename.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
+ (uintptr_t)link_to_path, (uintptr_t)target_path);
+ if (link_to_path != NULL) {
+ RELEASE_PATH(link_to_path);
+ }
+ }
+#if CONFIG_FSE
+ if (need_event) {
+ /* construct fsevent */
+ if (get_fse_info(vp, &finfo, ctx) == 0) {
+ if (truncated) {
+ finfo.mode |= FSE_TRUNCATED_PATH;
+ }
+
+ // build the path to the destination of the link
+ add_fsevent(FSE_CREATE_FILE, ctx,
+ FSE_ARG_STRING, len, target_path,
+ FSE_ARG_FINFO, &finfo,
+ FSE_ARG_DONE);
+ }
+ if (vp->v_parent) {
+ add_fsevent(FSE_STAT_CHANGED, ctx,
+ FSE_ARG_VNODE, vp->v_parent,
+ FSE_ARG_DONE);
+ }
+ }
+#endif
+ }
+out2:
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+ if (target_path != NULL) {
+ RELEASE_PATH(target_path);
+ }
+out:
+ if (lvp)
+ vnode_put(lvp);
+ if (dvp)
+ vnode_put(dvp);
+ vnode_put(vp);
+ return (error);
+}
+
+/*
+ * Make a symbolic link.
+ *
+ * We could add support for ACLs here too...
+ */
+/* ARGSUSED */
+int
+symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+ char *path;
+ int error;
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+ vnode_t vp, dvp;
+ size_t dummy=0;
+
+ MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
+ if (error)
+ goto out;
+ AUDIT_ARG(text, path); /* This is the link string */
+
+ NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
+ UIO_USERSPACE, uap->link, ctx);
+ error = namei(&nd);
+ if (error)
+ goto out;
+ dvp = nd.ni_dvp;
+ vp = nd.ni_vp;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_type, VLNK);
+ VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
+#if CONFIG_MACF
+ error = mac_vnode_check_create(ctx,
+ dvp, &nd.ni_cnd, &va);
+#endif
+ if (error != 0) {
+ goto skipit;
+ }
+
+ if (vp != NULL) {
+ error = EEXIST;
+ goto skipit;
+ }
+
+ /* authorize */
+ if (error == 0)
+ error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
+ /* get default ownership, etc. */
+ if (error == 0)
+ error = vnode_authattr_new(dvp, &va, 0, ctx);
+ if (error == 0)
+ error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
+
+#if CONFIG_MACF
+ if (error == 0)
+ error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
+#endif
+
+ /* do fallback attribute handling */
+ if (error == 0)
+ error = vnode_setattr_fallback(vp, &va, ctx);
+
+ if (error == 0) {
+ int update_flags = 0;
+
+ if (vp == NULL) {
+ nd.ni_cnd.cn_nameiop = LOOKUP;
+#if CONFIG_TRIGGERS
+ nd.ni_op = OP_LOOKUP;
+#endif
+ nd.ni_cnd.cn_flags = 0;
+ error = namei(&nd);
+ vp = nd.ni_vp;
+
+ if (vp == NULL)
+ goto skipit;
+ }
+
+#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
+ /* call out to allow 3rd party notification of rename.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ if (kauth_authorize_fileop_has_listeners() &&
+ namei(&nd) == 0) {
+ char *new_link_path = NULL;
+ int len;
+
+ /* build the path to the new link file */
+ new_link_path = get_pathbuff();
+ len = MAXPATHLEN;
+ vn_getpath(dvp, new_link_path, &len);
+ if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
+ new_link_path[len - 1] = '/';
+ strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
+ }
+
+ kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
+ (uintptr_t)path, (uintptr_t)new_link_path);
+ if (new_link_path != NULL)
+ release_pathbuff(new_link_path);
+ }
+#endif
+ // Make sure the name & parent pointers are hooked up
+ if (vp->v_name == NULL)
+ update_flags |= VNODE_UPDATE_NAME;
+ if (vp->v_parent == NULLVP)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ if (update_flags)
+ vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
+
+#if CONFIG_FSE
+ add_fsevent(FSE_CREATE_FILE, ctx,
+ FSE_ARG_VNODE, vp,
+ FSE_ARG_DONE);
+#endif
+ }
+
+skipit:
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+
+ if (vp)
+ vnode_put(vp);
+ vnode_put(dvp);
+out:
+ FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
+
+ return (error);
+}
+
+/*
+ * Delete a whiteout from the filesystem.
+ * XXX authorization not implmented for whiteouts
+ */
+int
+undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval)
+{
+ int error;
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+ vnode_t vp, dvp;
+
+ NDINIT(&nd, DELETE, OP_UNLINK, LOCKPARENT | DOWHITEOUT | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ dvp = nd.ni_dvp;
+ vp = nd.ni_vp;
+
+ if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
+ error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
+ } else
+ error = EEXIST;
+
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+
+ if (vp)
+ vnode_put(vp);
+ vnode_put(dvp);
+
+ return (error);
+}
+
+
+/*
+ * Delete a name from the filesystem.
+ */
+/* ARGSUSED */
+int
+unlink1(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags)
+{
+ vnode_t vp, dvp;
+ int error;
+ struct componentname *cnp;
+ char *path = NULL;
+ int len=0;
+#if CONFIG_FSE
+ fse_info finfo;
+ struct vnode_attr va;
+#endif
+ int flags = 0;
+ int need_event = 0;
+ int has_listeners = 0;
+ int truncated_path=0;
+ int batched;
+ struct vnode_attr *vap = NULL;
+
+#if NAMEDRSRCFORK
+ /* unlink or delete is allowed on rsrc forks and named streams */
+ ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
+#endif
+
+ ndp->ni_cnd.cn_flags |= LOCKPARENT;
+ ndp->ni_flag |= NAMEI_COMPOUNDREMOVE;
+ cnp = &ndp->ni_cnd;
+
+lookup_continue:
+ error = namei(ndp);
+ if (error)
+ return (error);
+
+ dvp = ndp->ni_dvp;
+ vp = ndp->ni_vp;
+
+
+ /* With Carbon delete semantics, busy files cannot be deleted */
+ if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
+ flags |= VNODE_REMOVE_NODELETEBUSY;
+ }
+
+ /* Skip any potential upcalls if told to. */
+ if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
+ flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
+ }
+
+ if (vp) {
+ batched = vnode_compound_remove_available(vp);
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT) {
+ error = EBUSY;
+ }
+
+ if (!batched) {
+ error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
+ if (error) {
+ goto out;
+ }
+ }
+ } else {
+ batched = 1;
+
+ if (!vnode_compound_remove_available(dvp)) {
+ panic("No vp, but no compound remove?");
+ }
+ }
+
+#if CONFIG_FSE
+ need_event = need_fsevent(FSE_DELETE, dvp);
+ if (need_event) {
+ if (!batched) {
+ if ((vp->v_flag & VISHARDLINK) == 0) {
+ /* XXX need to get these data in batched VNOP */
+ get_fse_info(vp, &finfo, ctx);
+ }
+ } else {
+ error = vfs_get_notify_attributes(&va);
+ if (error) {
+ goto out;
+ }
+
+ vap = &va;
+ }
+ }
+#endif
+ has_listeners = kauth_authorize_fileop_has_listeners();
+ if (need_event || has_listeners) {
+ if (path == NULL) {
+ GET_PATH(path);
+ if (path == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ }
+ len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
+ }
+
+#if NAMEDRSRCFORK
+ if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
+ error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
+ else
+#endif
+ {
+ error = vn_remove(dvp, &ndp->ni_vp, ndp, flags, vap, ctx);
+ vp = ndp->ni_vp;
+ if (error == EKEEPLOOKING) {
+ if (!batched) {
+ panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
+ }
+
+ if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+ panic("EKEEPLOOKING, but continue flag not set?");
+ }
+
+ if (vnode_isdir(vp)) {
+ error = EISDIR;
+ goto out;
+ }
+ goto lookup_continue;
+ }
+ }
+
+ /*
+ * Call out to allow 3rd party notification of delete.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ if (!error) {
+ if (has_listeners) {
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_DELETE,
+ (uintptr_t)vp,
+ (uintptr_t)path);
+ }
+
+ if (vp->v_flag & VISHARDLINK) {
+ //
+ // if a hardlink gets deleted we want to blow away the
+ // v_parent link because the path that got us to this
+ // instance of the link is no longer valid. this will
+ // force the next call to get the path to ask the file
+ // system instead of just following the v_parent link.
+ //
+ vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
+ }
+
+#if CONFIG_FSE
+ if (need_event) {
+ if (vp->v_flag & VISHARDLINK) {
+ get_fse_info(vp, &finfo, ctx);
+ } else if (vap) {
+ vnode_get_fse_info_from_vap(vp, &finfo, vap);
+ }
+ if (truncated_path) {
+ finfo.mode |= FSE_TRUNCATED_PATH;
+ }
+ add_fsevent(FSE_DELETE, ctx,
+ FSE_ARG_STRING, len, path,
+ FSE_ARG_FINFO, &finfo,
+ FSE_ARG_DONE);
+ }
+#endif
+ }
+
+out:
+ if (path != NULL)
+ RELEASE_PATH(path);
+
+#if NAMEDRSRCFORK
+ /* recycle the deleted rsrc fork vnode to force a reclaim, which
+ * will cause its shadow file to go away if necessary.
+ */
+ if (vp && (vnode_isnamedstream(vp)) &&
+ (vp->v_parent != NULLVP) &&
+ vnode_isshadow(vp)) {
+ vnode_recycle(vp);
+ }
+#endif
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(ndp);
+ vnode_put(dvp);
+ if (vp) {
+ vnode_put(vp);
+ }
+ return (error);
+}
+
+/*
+ * Delete a name from the filesystem using POSIX semantics.
+ */
+int
+unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
+{
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+
+ NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
+ uap->path, ctx);
+ return unlink1(ctx, &nd, 0);
+}
+
+/*
+ * Delete a name from the filesystem using Carbon semantics.
+ */
+int
+delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
+{
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+
+ NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
+ uap->path, ctx);
+ return unlink1(ctx, &nd, VNODE_REMOVE_NODELETEBUSY);
+}
+
+/*
+ * Reposition read/write file offset.
+ */
+int
+lseek(proc_t p, struct lseek_args *uap, off_t *retval)
+{
+ struct fileproc *fp;
+ vnode_t vp;
+ struct vfs_context *ctx;
+ off_t offset = uap->offset, file_size;
+ int error;
+
+ if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
+ if (error == ENOTSUP)
+ return (ESPIPE);
+ return (error);
+ }
+ if (vnode_isfifo(vp)) {
+ file_drop(uap->fd);
+ return(ESPIPE);
+ }
+
+
+ ctx = vfs_context_current();
+#if CONFIG_MACF
+ if (uap->whence == L_INCR && uap->offset == 0)
+ error = mac_file_check_get_offset(vfs_context_ucred(ctx),
+ fp->f_fglob);
+ else
+ error = mac_file_check_change_offset(vfs_context_ucred(ctx),
+ fp->f_fglob);
+ if (error) {
+ file_drop(uap->fd);
+ return (error);
+ }
+#endif
+ if ( (error = vnode_getwithref(vp)) ) {
+ file_drop(uap->fd);
+ return(error);
+ }
+
+ switch (uap->whence) {
+ case L_INCR:
+ offset += fp->f_fglob->fg_offset;
+ break;
+ case L_XTND:
+ if ((error = vnode_size(vp, &file_size, ctx)) != 0)
+ break;
+ offset += file_size;
+ break;
+ case L_SET:
+ break;
+ default:
+ error = EINVAL;
+ }
+ if (error == 0) {
+ if (uap->offset > 0 && offset < 0) {
+ /* Incremented/relative move past max size */
+ error = EOVERFLOW;
+ } else {
+ /*
+ * Allow negative offsets on character devices, per
+ * POSIX 1003.1-2001. Most likely for writing disk
+ * labels.
+ */
+ if (offset < 0 && vp->v_type != VCHR) {
+ /* Decremented/relative move before start */
+ error = EINVAL;
+ } else {
+ /* Success */
+ fp->f_fglob->fg_offset = offset;
+ *retval = fp->f_fglob->fg_offset;
+ }
+ }
+ }
+
+ /*
+ * An lseek can affect whether data is "available to read." Use
+ * hint of NOTE_NONE so no EVFILT_VNODE events fire
+ */
+ post_event_if_success(vp, error, NOTE_NONE);
+ (void)vnode_put(vp);
+ file_drop(uap->fd);
+ return (error);
+}
+
+
+/*
+ * Check access permissions.
+ *
+ * Returns: 0 Success
+ * vnode_authorize:???
+ */
+static int
+access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
+{
+ kauth_action_t action;
+ int error;
+
+ /*
+ * If just the regular access bits, convert them to something
+ * that vnode_authorize will understand.
+ */
+ if (!(uflags & _ACCESS_EXTENDED_MASK)) {
+ action = 0;
+ if (uflags & R_OK)
+ action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
+ if (uflags & W_OK) {
+ if (vnode_isdir(vp)) {
+ action |= KAUTH_VNODE_ADD_FILE |
+ KAUTH_VNODE_ADD_SUBDIRECTORY;
+ /* might want delete rights here too */
+ } else {
+ action |= KAUTH_VNODE_WRITE_DATA;
+ }
+ }
+ if (uflags & X_OK) {
+ if (vnode_isdir(vp)) {
+ action |= KAUTH_VNODE_SEARCH;
+ } else {
+ action |= KAUTH_VNODE_EXECUTE;
+ }
+ }
+ } else {
+ /* take advantage of definition of uflags */
+ action = uflags >> 8;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_access(ctx, vp, uflags);
+ if (error)
+ return (error);
+#endif /* MAC */
+
+ /* action == 0 means only check for existence */
+ if (action != 0) {
+ error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
+ } else {
+ error = 0;
+ }
+
+ return(error);
+}
+
+
+
+/*
+ * access_extended: Check access permissions in bulk.
+ *
+ * Description: uap->entries Pointer to an array of accessx
+ * descriptor structs, plus one or
+ * more NULL terminated strings (see
+ * "Notes" section below).
+ * uap->size Size of the area pointed to by
+ * uap->entries.
+ * uap->results Pointer to the results array.
+ *
+ * Returns: 0 Success
+ * ENOMEM Insufficient memory
+ * EINVAL Invalid arguments
+ * namei:EFAULT Bad address
+ * namei:ENAMETOOLONG Filename too long
+ * namei:ENOENT No such file or directory
+ * namei:ELOOP Too many levels of symbolic links
+ * namei:EBADF Bad file descriptor
+ * namei:ENOTDIR Not a directory
+ * namei:???
+ * access1:
+ *
+ * Implicit returns:
+ * uap->results Array contents modified
+ *
+ * Notes: The uap->entries are structured as an arbitrary length array
+ * of accessx descriptors, followed by one or more NULL terminated
+ * strings
+ *
+ * struct accessx_descriptor[0]
+ * ...
+ * struct accessx_descriptor[n]
+ * char name_data[0];
+ *
+ * We determine the entry count by walking the buffer containing
+ * the uap->entries argument descriptor. For each descriptor we
+ * see, the valid values for the offset ad_name_offset will be
+ * in the byte range:
+ *
+ * [ uap->entries + sizeof(struct accessx_descriptor) ]
+ * to
+ * [ uap->entries + uap->size - 2 ]
+ *
+ * since we must have at least one string, and the string must
+ * be at least one character plus the NULL terminator in length.
+ *
+ * XXX: Need to support the check-as uid argument
+ */
+int
+access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
+{
+ struct accessx_descriptor *input = NULL;
+ errno_t *result = NULL;
+ errno_t error = 0;
+ int wantdelete = 0;
+ unsigned int desc_max, desc_actual, i, j;
+ struct vfs_context context;
+ struct nameidata nd;
+ int niopts;
+ vnode_t vp = NULL;
+ vnode_t dvp = NULL;
+#define ACCESSX_MAX_DESCR_ON_STACK 10
+ struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
+
+ context.vc_ucred = NULL;
+
+ /*
+ * Validate parameters; if valid, copy the descriptor array and string
+ * arguments into local memory. Before proceeding, the following
+ * conditions must have been met:
+ *
+ * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
+ * o There must be sufficient room in the request for at least one
+ * descriptor and a one yte NUL terminated string.
+ * o The allocation of local storage must not fail.
+ */
+ if (uap->size > ACCESSX_MAX_TABLESIZE)
+ return(ENOMEM);
+ if (uap->size < (sizeof(struct accessx_descriptor) + 2))
+ return(EINVAL);
+ if (uap->size <= sizeof (stack_input)) {
+ input = stack_input;
+ } else {
+ MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
+ if (input == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ }
+ error = copyin(uap->entries, input, uap->size);
+ if (error)
+ goto out;
+
+ AUDIT_ARG(opaque, input, uap->size);
+
+ /*
+ * Force NUL termination of the copyin buffer to avoid nami() running
+ * off the end. If the caller passes us bogus data, they may get a
+ * bogus result.
+ */
+ ((char *)input)[uap->size - 1] = 0;
+
+ /*
+ * Access is defined as checking against the process' real identity,
+ * even if operations are checking the effective identity. This
+ * requires that we use a local vfs context.
+ */
+ context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
+ context.vc_thread = current_thread();
+
+ /*
+ * Find out how many entries we have, so we can allocate the result
+ * array by walking the list and adjusting the count downward by the
+ * earliest string offset we see.
+ */
+ desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
+ desc_actual = desc_max;
+ for (i = 0; i < desc_actual; i++) {
+ /*
+ * Take the offset to the name string for this entry and
+ * convert to an input array index, which would be one off
+ * the end of the array if this entry was the lowest-addressed
+ * name string.
+ */
+ j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
+
+ /*
+ * An offset greater than the max allowable offset is an error.
+ * It is also an error for any valid entry to point
+ * to a location prior to the end of the current entry, if
+ * it's not a reference to the string of the previous entry.
+ */
+ if (j > desc_max || (j != 0 && j <= i)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * An offset of 0 means use the previous descriptor's offset;
+ * this is used to chain multiple requests for the same file
+ * to avoid multiple lookups.
+ */
+ if (j == 0) {
+ /* This is not valid for the first entry */
+ if (i == 0) {
+ error = EINVAL;
+ goto out;
+ }
+ continue;
+ }
+
+ /*
+ * If the offset of the string for this descriptor is before
+ * what we believe is the current actual last descriptor,
+ * then we need to adjust our estimate downward; this permits
+ * the string table following the last descriptor to be out
+ * of order relative to the descriptor list.
+ */
+ if (j < desc_actual)
+ desc_actual = j;
+ }
+
+ /*
+ * We limit the actual number of descriptors we are willing to process
+ * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
+ * requested does not exceed this limit,
+ */
+ if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
+ error = ENOMEM;
+ goto out;
+ }
+ MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
+ if (result == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Do the work by iterating over the descriptor entries we know to
+ * at least appear to contain valid data.
+ */
+ error = 0;
+ for (i = 0; i < desc_actual; i++) {
+ /*
+ * If the ad_name_offset is 0, then we use the previous
+ * results to make the check; otherwise, we are looking up
+ * a new file name.
+ */
+ if (input[i].ad_name_offset != 0) {
+ /* discard old vnodes */
+ if (vp) {
+ vnode_put(vp);
+ vp = NULL;
+ }
+ if (dvp) {
+ vnode_put(dvp);
+ dvp = NULL;
+ }
+
+ /*
+ * Scan forward in the descriptor list to see if we
+ * need the parent vnode. We will need it if we are
+ * deleting, since we must have rights to remove
+ * entries in the parent directory, as well as the
+ * rights to delete the object itself.
+ */
+ wantdelete = input[i].ad_flags & _DELETE_OK;
+ for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
+ if (input[j].ad_flags & _DELETE_OK)
+ wantdelete = 1;
+
+ niopts = FOLLOW | AUDITVNPATH1;
+
+ /* need parent for vnode_authorize for deletion test */
+ if (wantdelete)
+ niopts |= WANTPARENT;
+
+ /* do the lookup */
+ NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
+ CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
+ &context);
+ error = namei(&nd);
+ if (!error) {
+ vp = nd.ni_vp;
+ if (wantdelete)
+ dvp = nd.ni_dvp;
+ }
+ nameidone(&nd);
+ }
+
+ /*
+ * Handle lookup errors.
+ */
+ switch(error) {
+ case ENOENT:
+ case EACCES:
+ case EPERM:
+ case ENOTDIR:
+ result[i] = error;
+ break;
+ case 0:
+ /* run this access check */
+ result[i] = access1(vp, dvp, input[i].ad_flags, &context);
+ break;
+ default:
+ /* fatal lookup error */
+
+ goto out;
+ }
+ }
+
+ AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
+
+ /* copy out results */
+ error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
+
+out:
+ if (input && input != stack_input)
+ FREE(input, M_TEMP);
+ if (result)
+ FREE(result, M_TEMP);
+ if (vp)
+ vnode_put(vp);
+ if (dvp)
+ vnode_put(dvp);
+ if (IS_VALID_CRED(context.vc_ucred))
+ kauth_cred_unref(&context.vc_ucred);
+ return(error);
+}
+
+
+/*
+ * Returns: 0 Success
+ * namei:EFAULT Bad address
+ * namei:ENAMETOOLONG Filename too long
+ * namei:ENOENT No such file or directory
+ * namei:ELOOP Too many levels of symbolic links
+ * namei:EBADF Bad file descriptor
+ * namei:ENOTDIR Not a directory
+ * namei:???
+ * access1:
+ */
+int
+access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
+{
+ int error;
+ struct nameidata nd;
+ int niopts;
+ struct vfs_context context;
+#if NAMEDRSRCFORK
+ int is_namedstream = 0;
+#endif
+
+ /*
+ * Access is defined as checking against the process'
+ * real identity, even if operations are checking the
+ * effective identity. So we need to tweak the credential
+ * in the context.
+ */
+ context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
+ context.vc_thread = current_thread();
+
+ niopts = FOLLOW | AUDITVNPATH1;
+ /* need parent for vnode_authorize for deletion test */
+ if (uap->flags & _DELETE_OK)
+ niopts |= WANTPARENT;
+ NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_USERSPACE,
+ uap->path, &context);
+
+#if NAMEDRSRCFORK
+ /* access(F_OK) calls are allowed for resource forks. */
+ if (uap->flags == F_OK)
+ nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
+#endif
+ error = namei(&nd);
+ if (error)
+ goto out;
+
+#if NAMEDRSRCFORK
+ /* Grab reference on the shadow stream file vnode to
+ * force an inactive on release which will mark it
+ * for recycle.
+ */
+ if (vnode_isnamedstream(nd.ni_vp) &&
+ (nd.ni_vp->v_parent != NULLVP) &&
+ vnode_isshadow(nd.ni_vp)) {
+ is_namedstream = 1;
+ vnode_ref(nd.ni_vp);
+ }
+#endif
+
+ error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
+
+#if NAMEDRSRCFORK
+ if (is_namedstream) {
+ vnode_rele(nd.ni_vp);
+ }
+#endif
+
+ vnode_put(nd.ni_vp);
+ if (uap->flags & _DELETE_OK)
+ vnode_put(nd.ni_dvp);
+ nameidone(&nd);
+
+out:
+ kauth_cred_unref(&context.vc_ucred);
+ return(error);
+}
+
+
+/*
+ * Returns: 0 Success
+ * EFAULT
+ * copyout:EFAULT
+ * namei:???
+ * vn_stat:???
+ */
+static int
+stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
+{
+ union {
+ struct stat sb;
+ struct stat64 sb64;
+ } source;
+ union {
+ struct user64_stat user64_sb;
+ struct user32_stat user32_sb;
+ struct user64_stat64 user64_sb64;
+ struct user32_stat64 user32_sb64;
+ } dest;
+ caddr_t sbp;
+ int error, my_size;
+ kauth_filesec_t fsec;
+ size_t xsecurity_bufsize;
+ void * statptr;
+
+#if NAMEDRSRCFORK
+ int is_namedstream = 0;
+ /* stat calls are allowed for resource forks. */
+ ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
+#endif
+ error = namei(ndp);
+ if (error)
+ return (error);
+ fsec = KAUTH_FILESEC_NONE;
+
+ statptr = (void *)&source;
+
+#if NAMEDRSRCFORK
+ /* Grab reference on the shadow stream file vnode to
+ * force an inactive on release which will mark it
+ * for recycle.
+ */
+ if (vnode_isnamedstream(ndp->ni_vp) &&
+ (ndp->ni_vp->v_parent != NULLVP) &&
+ vnode_isshadow(ndp->ni_vp)) {
+ is_namedstream = 1;
+ vnode_ref(ndp->ni_vp);
+ }
+#endif
+
+ error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
+
+#if NAMEDRSRCFORK
+ if (is_namedstream) {
+ vnode_rele(ndp->ni_vp);
+ }
+#endif
+ vnode_put(ndp->ni_vp);
+ nameidone(ndp);
+
+ if (error)
+ return (error);
+ /* Zap spare fields */
+ if (isstat64 != 0) {
+ source.sb64.st_lspare = 0;
+ source.sb64.st_qspare[0] = 0LL;
+ source.sb64.st_qspare[1] = 0LL;
+ if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
+ munge_user64_stat64(&source.sb64, &dest.user64_sb64);
+ my_size = sizeof(dest.user64_sb64);
+ sbp = (caddr_t)&dest.user64_sb64;
+ } else {
+ munge_user32_stat64(&source.sb64, &dest.user32_sb64);
+ my_size = sizeof(dest.user32_sb64);
+ sbp = (caddr_t)&dest.user32_sb64;
+ }
+ /*
+ * Check if we raced (post lookup) against the last unlink of a file.
+ */
+ if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
+ source.sb64.st_nlink = 1;
+ }
+ } else {
+ source.sb.st_lspare = 0;
+ source.sb.st_qspare[0] = 0LL;
+ source.sb.st_qspare[1] = 0LL;
+ if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
+ munge_user64_stat(&source.sb, &dest.user64_sb);
+ my_size = sizeof(dest.user64_sb);
+ sbp = (caddr_t)&dest.user64_sb;
+ } else {
+ munge_user32_stat(&source.sb, &dest.user32_sb);
+ my_size = sizeof(dest.user32_sb);
+ sbp = (caddr_t)&dest.user32_sb;
+ }
+
+ /*
+ * Check if we raced (post lookup) against the last unlink of a file.
+ */
+ if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
+ source.sb.st_nlink = 1;
+ }
+ }
+ if ((error = copyout(sbp, ub, my_size)) != 0)
+ goto out;
+
+ /* caller wants extended security information? */
+ if (xsecurity != USER_ADDR_NULL) {
+
+ /* did we get any? */
+ if (fsec == KAUTH_FILESEC_NONE) {
+ if (susize(xsecurity_size, 0) != 0) {
+ error = EFAULT;
+ goto out;
+ }
+ } else {
+ /* find the user buffer size */
+ xsecurity_bufsize = fusize(xsecurity_size);
+
+ /* copy out the actual data size */
+ if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
+ error = EFAULT;
+ goto out;
+ }
+
+ /* if the caller supplied enough room, copy out to it */
+ if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
+ error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
+ }
+ }
+out:
+ if (fsec != KAUTH_FILESEC_NONE)
+ kauth_filesec_free(fsec);
+ return (error);
+}
+
+/*
+ * Get file status; this version follows links.
+ *
+ * Returns: 0 Success
+ * stat2:??? [see stat2() in this file]
+ */
+static int
+stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
+{
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+
+ NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, path, ctx);
+ return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
+}
+
+/*
+ * stat_extended: Get file status; with extended security (ACL).
+ *
+ * Parameters: p (ignored)
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->path Path of file to get status from
+ * uap->ub User buffer (holds file status info)
+ * uap->xsecurity ACL to get (extended security)
+ * uap->xsecurity_size Size of ACL
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ */
+int
+stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused int32_t *retval)
+{
+ return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
+}
+
+/*
+ * Returns: 0 Success
+ * stat1:??? [see stat1() in this file]
+ */
+int
+stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
+{
+ return(stat1(uap->path, uap->ub, 0, 0, 0));
+}
+
+int
+stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
+{
+ return(stat1(uap->path, uap->ub, 0, 0, 1));
+}
+
+/*
+ * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
+ *
+ * Parameters: p (ignored)
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->path Path of file to get status from
+ * uap->ub User buffer (holds file status info)
+ * uap->xsecurity ACL to get (extended security)
+ * uap->xsecurity_size Size of ACL
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ */
+int
+stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
+{
+ return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
+}
+/*
+ * Get file status; this version does not follow links.
+ */
+static int
+lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
+{
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+
+ NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | NOFOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, path, ctx);
+
+ return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
+}
+
+/*
+ * lstat_extended: Get file status; does not follow links; with extended security (ACL).
+ *
+ * Parameters: p (ignored)
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->path Path of file to get status from
+ * uap->ub User buffer (holds file status info)
+ * uap->xsecurity ACL to get (extended security)
+ * uap->xsecurity_size Size of ACL
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ */
+int
+lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
+{
+ return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
+}
+
+int
+lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
+{
+ return(lstat1(uap->path, uap->ub, 0, 0, 0));
+}
+
+int
+lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
+{
+ return(lstat1(uap->path, uap->ub, 0, 0, 1));
+}
+
+/*
+ * lstat64_extended: Get file status; can handle large inode numbers; does not
+ * follow links; with extended security (ACL).
+ *
+ * Parameters: p (ignored)
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->path Path of file to get status from
+ * uap->ub User buffer (holds file status info)
+ * uap->xsecurity ACL to get (extended security)
+ * uap->xsecurity_size Size of ACL
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ */
+int
+lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
+{
+ return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
+}
+
+/*
+ * Get configurable pathname variables.
+ *
+ * Returns: 0 Success
+ * namei:???
+ * vn_pathconf:???
+ *
+ * Notes: Global implementation constants are intended to be
+ * implemented in this function directly; all other constants
+ * are per-FS implementation, and therefore must be handled in
+ * each respective FS, instead.
+ *
+ * XXX We implement some things globally right now that should actually be
+ * XXX per-FS; we will need to deal with this at some point.
+ */
+/* ARGSUSED */
+int
+pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
+{
+ int error;
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+
+ NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+
+ error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
+
+ vnode_put(nd.ni_vp);
+ nameidone(&nd);
+ return (error);
+}
+
+/*
+ * Return target name of a symbolic link.
+ */
+/* ARGSUSED */
+int
+readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
+{
+ vnode_t vp;
+ uio_t auio;
+ int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ int error;
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+ char uio_buf[ UIO_SIZEOF(1) ];
+
+ NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+
+ nameidone(&nd);
+
+ auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
+ &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, uap->buf, uap->count);
+ if (vp->v_type != VLNK)
+ error = EINVAL;
+ else {
+#if CONFIG_MACF
+ error = mac_vnode_check_readlink(ctx,
+ vp);
+#endif
+ if (error == 0)
+ error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
+ if (error == 0)
+ error = VNOP_READLINK(vp, auio, ctx);
+ }
+ vnode_put(vp);
+
+ /* Safe: uio_resid() is bounded above by "count", and "count" is an int */
+ *retval = uap->count - (int)uio_resid(auio);
+ return (error);
+}
+
+/*
+ * Change file flags.
+ */
+static int
+chflags1(vnode_t vp, int flags, vfs_context_t ctx)
+{
+ struct vnode_attr va;
+ kauth_action_t action;
+ int error;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_flags, flags);
+
+#if CONFIG_MACF
+ error = mac_vnode_check_setflags(ctx, vp, flags);
+ if (error)
+ goto out;
+#endif
+
+ /* request authorisation, disregard immutability */
+ if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
+ goto out;
+ /*
+ * Request that the auth layer disregard those file flags it's allowed to when
+ * authorizing this operation; we need to do this in order to be able to
+ * clear immutable flags.
+ */
+ if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
+ goto out;
+ error = vnode_setattr(vp, &va, ctx);
+
+ if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
+ error = ENOTSUP;
+ }
+out:
+ vnode_put(vp);
+ return(error);
+}
+
+/*
+ * Change flags of a file given a path name.
+ */
+/* ARGSUSED */
+int
+chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp;
+ vfs_context_t ctx = vfs_context_current();
+ int error;
+ struct nameidata nd;
+
+ AUDIT_ARG(fflags, uap->flags);
+ NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+ nameidone(&nd);
+
+ error = chflags1(vp, uap->flags, ctx);
+
+ return(error);
+}
+
+/*
+ * Change flags of a file given a file descriptor.
+ */
+/* ARGSUSED */
+int
+fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp;
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+ AUDIT_ARG(fflags, uap->flags);
+ if ( (error = file_vnode(uap->fd, &vp)) )
+ return (error);
+
+ if ((error = vnode_getwithref(vp))) {
+ file_drop(uap->fd);
+ return(error);
+ }
+
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+ error = chflags1(vp, uap->flags, vfs_context_current());
+
+ file_drop(uap->fd);
+ return (error);
+}
+
+/*
+ * Change security information on a filesystem object.
+ *
+ * Returns: 0 Success
+ * EPERM Operation not permitted
+ * vnode_authattr:??? [anything vnode_authattr can return]
+ * vnode_authorize:??? [anything vnode_authorize can return]
+ * vnode_setattr:??? [anything vnode_setattr can return]
+ *
+ * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
+ * translated to EPERM before being returned.
+ */
+static int
+chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
+{
+ kauth_action_t action;
+ int error;
+
+ AUDIT_ARG(mode, vap->va_mode);
+ /* XXX audit new args */
+
+#if NAMEDSTREAMS
+ /* chmod calls are not allowed for resource forks. */
+ if (vp->v_flag & VISNAMEDSTREAM) {
+ return (EPERM);
+ }
+#endif
+
+#if CONFIG_MACF
+ if (VATTR_IS_ACTIVE(vap, va_mode) &&
+ (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
+ return (error);
+#endif
+
+ /* make sure that the caller is allowed to set this security information */
+ if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
+ ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
+ if (error == EACCES)
+ error = EPERM;
+ return(error);
+ }
+
+ error = vnode_setattr(vp, vap, ctx);
+
+ return (error);
+}
+
+
+/*
+ * Change mode of a file given a path name.
+ *
+ * Returns: 0 Success
+ * namei:??? [anything namei can return]
+ * chmod2:??? [anything chmod2 can return]
+ */
+static int
+chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
+{
+ struct nameidata nd;
+ int error;
+
+ NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, path, ctx);
+ if ((error = namei(&nd)))
+ return (error);
+ error = chmod2(ctx, nd.ni_vp, vap);
+ vnode_put(nd.ni_vp);
+ nameidone(&nd);
+ return(error);
+}
+
+/*
+ * chmod_extended: Change the mode of a file given a path name; with extended
+ * argument list (including extended security (ACL)).
+ *
+ * Parameters: p Process requesting the open
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->path Path to object (same as 'chmod')
+ * uap->uid UID to set
+ * uap->gid GID to set
+ * uap->mode File mode to set (same as 'chmod')
+ * uap->xsecurity ACL to set (or delete)
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
+ *
+ * XXX: We should enummerate the possible errno values here, and where
+ * in the code they originated.
+ */
+int
+chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
+{
+ int error;
+ struct vnode_attr va;
+ kauth_filesec_t xsecdst;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+
+ VATTR_INIT(&va);
+ if (uap->mode != -1)
+ VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
+ if (uap->uid != KAUTH_UID_NONE)
+ VATTR_SET(&va, va_uid, uap->uid);
+ if (uap->gid != KAUTH_GID_NONE)
+ VATTR_SET(&va, va_gid, uap->gid);
+
+ xsecdst = NULL;
+ switch(uap->xsecurity) {
+ /* explicit remove request */
+ case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
+ VATTR_SET(&va, va_acl, NULL);
+ break;
+ /* not being set */
+ case USER_ADDR_NULL:
+ break;
+ default:
+ if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
+ return(error);
+ VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+ KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
+ }
+
+ error = chmod1(vfs_context_current(), uap->path, &va);
+
+ if (xsecdst != NULL)
+ kauth_filesec_free(xsecdst);
+ return(error);
+}
+
+/*
+ * Returns: 0 Success
+ * chmod1:??? [anything chmod1 can return]
+ */
+int
+chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
+
+ return(chmod1(vfs_context_current(), uap->path, &va));
+}
+
+/*
+ * Change mode of a file given a file descriptor.
+ */
+static int
+fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
+{
+ vnode_t vp;
+ int error;
+
+ AUDIT_ARG(fd, fd);
+
+ if ((error = file_vnode(fd, &vp)) != 0)
+ return (error);
+ if ((error = vnode_getwithref(vp)) != 0) {
+ file_drop(fd);
+ return(error);
+ }
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+ error = chmod2(vfs_context_current(), vp, vap);
+ (void)vnode_put(vp);
+ file_drop(fd);
+
+ return (error);
+}
+
+/*
+ * fchmod_extended: Change mode of a file given a file descriptor; with
+ * extended argument list (including extended security (ACL)).
+ *
+ * Parameters: p Process requesting to change file mode
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->mode File mode to set (same as 'chmod')
+ * uap->uid UID to set
+ * uap->gid GID to set
+ * uap->xsecurity ACL to set (or delete)
+ * uap->fd File descriptor of file to change mode
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ */
+int
+fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
+{
+ int error;
+ struct vnode_attr va;
+ kauth_filesec_t xsecdst;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+
+ VATTR_INIT(&va);
+ if (uap->mode != -1)
+ VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
+ if (uap->uid != KAUTH_UID_NONE)
+ VATTR_SET(&va, va_uid, uap->uid);
+ if (uap->gid != KAUTH_GID_NONE)
+ VATTR_SET(&va, va_gid, uap->gid);
+
+ xsecdst = NULL;
+ switch(uap->xsecurity) {
+ case USER_ADDR_NULL:
+ VATTR_SET(&va, va_acl, NULL);
+ break;
+ case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
+ VATTR_SET(&va, va_acl, NULL);
+ break;
+ /* not being set */
+ case CAST_USER_ADDR_T(-1):
+ break;
+ default:
+ if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
+ return(error);
+ VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+ }
+
+ error = fchmod1(p, uap->fd, &va);
+
+
+ switch(uap->xsecurity) {
+ case USER_ADDR_NULL:
+ case CAST_USER_ADDR_T(-1):
+ break;
+ default:
+ if (xsecdst != NULL)
+ kauth_filesec_free(xsecdst);
+ }
+ return(error);
+}
+
+int
+fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
+
+ return(fchmod1(p, uap->fd, &va));
+}
+
+
+/*
+ * Set ownership given a path name.
+ */
+/* ARGSUSED */
+static int
+chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int follow)
+{
+ vnode_t vp;
+ struct vnode_attr va;
+ int error;
+ struct nameidata nd;
+ kauth_action_t action;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+
+ NDINIT(&nd, LOOKUP, OP_SETATTR,
+ (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+
+ nameidone(&nd);
+
+ VATTR_INIT(&va);
+ if (uap->uid != VNOVAL)
+ VATTR_SET(&va, va_uid, uap->uid);
+ if (uap->gid != VNOVAL)
+ VATTR_SET(&va, va_gid, uap->gid);
+
+#if CONFIG_MACF
+ error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
+ if (error)
+ goto out;
+#endif
+
+ /* preflight and authorize attribute changes */
+ if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
+ goto out;
+ if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
+ goto out;
+ error = vnode_setattr(vp, &va, ctx);
+
+out:
+ /*
+ * EACCES is only allowed from namei(); permissions failure should
+ * return EPERM, so we need to translate the error code.
+ */
+ if (error == EACCES)
+ error = EPERM;
+
+ vnode_put(vp);
+ return (error);
+}
+
+int
+chown(__unused proc_t p, struct chown_args *uap, int32_t *retval)
+{
+ return chown1(vfs_context_current(), uap, retval, 1);
+}
+
+int
+lchown(__unused proc_t p, struct lchown_args *uap, int32_t *retval)
+{
+ /* Argument list identical, but machine generated; cast for chown1() */
+ return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
+}
+
+/*
+ * Set ownership given a file descriptor.
+ */
+/* ARGSUSED */
+int
+fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+ vfs_context_t ctx = vfs_context_current();
+ vnode_t vp;
+ int error;
+ kauth_action_t action;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+ AUDIT_ARG(fd, uap->fd);
+
+ if ( (error = file_vnode(uap->fd, &vp)) )
+ return (error);
+
+ if ( (error = vnode_getwithref(vp)) ) {
+ file_drop(uap->fd);
+ return(error);
+ }
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+ VATTR_INIT(&va);
+ if (uap->uid != VNOVAL)
+ VATTR_SET(&va, va_uid, uap->uid);
+ if (uap->gid != VNOVAL)
+ VATTR_SET(&va, va_gid, uap->gid);
+
+#if NAMEDSTREAMS
+ /* chown calls are not allowed for resource forks. */
+ if (vp->v_flag & VISNAMEDSTREAM) {
+ error = EPERM;
+ goto out;
+ }
+#endif
+
+#if CONFIG_MACF
+ error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
+ if (error)
+ goto out;
+#endif
+
+ /* preflight and authorize attribute changes */
+ if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
+ goto out;
+ if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
+ if (error == EACCES)
+ error = EPERM;
+ goto out;
+ }
+ error = vnode_setattr(vp, &va, ctx);
+
+out:
+ (void)vnode_put(vp);
+ file_drop(uap->fd);
+ return (error);
+}
+
+static int
+getutimes(user_addr_t usrtvp, struct timespec *tsp)
+{
+ int error;
+
+ if (usrtvp == USER_ADDR_NULL) {
+ struct timeval old_tv;
+ /* XXX Y2038 bug because of microtime argument */
+ microtime(&old_tv);
+ TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
+ tsp[1] = tsp[0];
+ } else {
+ if (IS_64BIT_PROCESS(current_proc())) {
+ struct user64_timeval tv[2];
+ error = copyin(usrtvp, (void *)tv, sizeof(tv));
+ if (error)
+ return (error);
+ TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
+ TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
+ } else {
+ struct user32_timeval tv[2];
+ error = copyin(usrtvp, (void *)tv, sizeof(tv));
+ if (error)
+ return (error);
+ TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
+ TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
+ }
+ }
+ return 0;
+}
+
+static int
+setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
+ int nullflag)
+{
+ int error;
+ struct vnode_attr va;
+ kauth_action_t action;
+
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_access_time, ts[0]);
+ VATTR_SET(&va, va_modify_time, ts[1]);
+ if (nullflag)
+ va.va_vaflags |= VA_UTIMES_NULL;
+
+#if NAMEDSTREAMS
+ /* utimes calls are not allowed for resource forks. */
+ if (vp->v_flag & VISNAMEDSTREAM) {
+ error = EPERM;
+ goto out;
+ }
+#endif
+
+#if CONFIG_MACF
+ error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
+ if (error)
+ goto out;
+#endif
+ if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
+ if (!nullflag && error == EACCES)
+ error = EPERM;
+ goto out;
+ }
+
+ /* since we may not need to auth anything, check here */
+ if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
+ if (!nullflag && error == EACCES)
+ error = EPERM;
+ goto out;
+ }
+ error = vnode_setattr(vp, &va, ctx);
+
+out:
+ return error;
+}
+
+/*
+ * Set the access and modification times of a file.
+ */
+/* ARGSUSED */
+int
+utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
+{
+ struct timespec ts[2];
+ user_addr_t usrtvp;
+ int error;
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+
+ /*
+ * AUDIT: Needed to change the order of operations to do the
+ * name lookup first because auditing wants the path.
+ */
+ NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ nameidone(&nd);
+
+ /*
+ * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
+ * the current time instead.
+ */
+ usrtvp = uap->tptr;
+ if ((error = getutimes(usrtvp, ts)) != 0)
+ goto out;
+
+ error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
+
+out:
+ vnode_put(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * Set the access and modification times of a file.
+ */
+/* ARGSUSED */
+int
+futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
+{
+ struct timespec ts[2];
+ vnode_t vp;
+ user_addr_t usrtvp;
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+ usrtvp = uap->tptr;
+ if ((error = getutimes(usrtvp, ts)) != 0)
+ return (error);
+ if ((error = file_vnode(uap->fd, &vp)) != 0)
+ return (error);
+ if((error = vnode_getwithref(vp))) {
+ file_drop(uap->fd);
+ return(error);
+ }
+
+ error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
+ vnode_put(vp);
+ file_drop(uap->fd);
+ return(error);
+}
+
+/*
+ * Truncate a file given its path name.
+ */
+/* ARGSUSED */
+int
+truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp;
+ struct vnode_attr va;
+ vfs_context_t ctx = vfs_context_current();
+ int error;
+ struct nameidata nd;
+ kauth_action_t action;
+
+ if (uap->length < 0)
+ return(EINVAL);
+ NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ if ((error = namei(&nd)))
+ return (error);
+ vp = nd.ni_vp;
+
+ nameidone(&nd);
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_data_size, uap->length);
+
+#if CONFIG_MACF
+ error = mac_vnode_check_truncate(ctx, NOCRED, vp);
+ if (error)
+ goto out;
+#endif
+
+ if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
+ goto out;
+ if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
+ goto out;
+ error = vnode_setattr(vp, &va, ctx);
+out:
+ vnode_put(vp);
+ return (error);
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+/* ARGSUSED */
+int
+ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
+{
+ vfs_context_t ctx = vfs_context_current();
+ struct vnode_attr va;
+ vnode_t vp;
+ struct fileproc *fp;
+ int error ;
+ int fd = uap->fd;
+
+ AUDIT_ARG(fd, uap->fd);
+ if (uap->length < 0)
+ return(EINVAL);
+
+ if ( (error = fp_lookup(p,fd,&fp,0)) ) {
+ return(error);
+ }
+
+ switch (FILEGLOB_DTYPE(fp->f_fglob)) {
+ case DTYPE_PSXSHM:
+ error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
+ goto out;
+ case DTYPE_VNODE:
+ break;
+ default:
+ error = EINVAL;
+ goto out;
+ }
+
+ vp = (vnode_t)fp->f_fglob->fg_data;
+
+ if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
+ AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
+ error = EINVAL;
+ goto out;
+ }
+
+ if ((error = vnode_getwithref(vp)) != 0) {
+ goto out;
+ }
+
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+#if CONFIG_MACF
+ error = mac_vnode_check_truncate(ctx,
+ fp->f_fglob->fg_cred, vp);
+ if (error) {
+ (void)vnode_put(vp);
+ goto out;
+ }
+#endif
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_data_size, uap->length);
+ error = vnode_setattr(vp, &va, ctx);
+ (void)vnode_put(vp);
+out:
+ file_drop(fd);
+ return (error);
+}
+
+
+/*
+ * Sync an open file with synchronized I/O _file_ integrity completion
+ */
+/* ARGSUSED */
+int
+fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
+{
+ __pthread_testcancel(1);
+ return(fsync_common(p, uap, MNT_WAIT));
+}
+
+
+/*
+ * Sync an open file with synchronized I/O _file_ integrity completion
+ *
+ * Notes: This is a legacy support function that does not test for
+ * thread cancellation points.
+ */
+/* ARGSUSED */
+int
+fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
+{
+ return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
+}
+
+
+/*
+ * Sync an open file with synchronized I/O _data_ integrity completion
+ */
+/* ARGSUSED */
+int
+fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
+{
+ __pthread_testcancel(1);
+ return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
+}
+
+
+/*
+ * fsync_common
+ *
+ * Common fsync code to support both synchronized I/O file integrity completion
+ * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
+ *
+ * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
+ * will only guarantee that the file data contents are retrievable. If
+ * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
+ * includes additional metadata unnecessary for retrieving the file data
+ * contents, such as atime, mtime, ctime, etc., also be committed to stable
+ * storage.
+ *
+ * Parameters: p The process
+ * uap->fd The descriptor to synchronize
+ * flags The data integrity flags
+ *
+ * Returns: int Success
+ * fp_getfvp:EBADF Bad file descriptor
+ * fp_getfvp:ENOTSUP fd does not refer to a vnode
+ * VNOP_FSYNC:??? unspecified
+ *
+ * Notes: We use struct fsync_args because it is a short name, and all
+ * caller argument structures are otherwise identical.
+ */
+static int
+fsync_common(proc_t p, struct fsync_args *uap, int flags)
+{
+ vnode_t vp;
+ struct fileproc *fp;
+ vfs_context_t ctx = vfs_context_current();
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+
+ if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
+ return (error);
+ if ( (error = vnode_getwithref(vp)) ) {
+ file_drop(uap->fd);
+ return(error);
+ }
+
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+ error = VNOP_FSYNC(vp, flags, ctx);
+
+#if NAMEDRSRCFORK
+ /* Sync resource fork shadow file if necessary. */
+ if ((error == 0) &&
+ (vp->v_flag & VISNAMEDSTREAM) &&
+ (vp->v_parent != NULLVP) &&
+ vnode_isshadow(vp) &&
+ (fp->f_flags & FP_WRITTEN)) {
+ (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
+ }
+#endif
+
+ (void)vnode_put(vp);
+ file_drop(uap->fd);
+ return (error);
+}
+
+/*
+ * Duplicate files. Source must be a file, target must be a file or
+ * must not exist.
+ *
+ * XXX Copyfile authorisation checking is woefully inadequate, and will not
+ * perform inheritance correctly.
+ */
+/* ARGSUSED */
+int
+copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
+{
+ vnode_t tvp, fvp, tdvp, sdvp;
+ struct nameidata fromnd, tond;
+ int error;
+ vfs_context_t ctx = vfs_context_current();
+
+ /* Check that the flags are valid. */
+
+ if (uap->flags & ~CPF_MASK) {
+ return(EINVAL);
+ }
+
+ NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
+ UIO_USERSPACE, uap->from, ctx);
+ if ((error = namei(&fromnd)))
+ return (error);
+ fvp = fromnd.ni_vp;
+
+ NDINIT(&tond, CREATE, OP_LINK,
+ LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
+ UIO_USERSPACE, uap->to, ctx);
+ if ((error = namei(&tond))) {
+ goto out1;
+ }
+ tdvp = tond.ni_dvp;
+ tvp = tond.ni_vp;
+
+ if (tvp != NULL) {
+ if (!(uap->flags & CPF_OVERWRITE)) {
+ error = EEXIST;
+ goto out;
+ }
+ }
+ if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
+ error = EISDIR;
+ goto out;
+ }
+
+ if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
+ goto out;
+
+ if (fvp == tdvp)
+ error = EINVAL;
+ /*
+ * If source is the same as the destination (that is the
+ * same inode number) then there is nothing to do.
+ * (fixed to have POSIX semantics - CSM 3/2/98)
+ */
+ if (fvp == tvp)
+ error = -1;
+ if (!error)
+ error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
+out:
+ sdvp = tond.ni_startdir;
+ /*
+ * nameidone has to happen before we vnode_put(tdvp)
+ * since it may need to release the fs_nodelock on the tdvp
+ */
+ nameidone(&tond);
+
+ if (tvp)
+ vnode_put(tvp);
+ vnode_put(tdvp);
+ vnode_put(sdvp);
+out1:
+ vnode_put(fvp);
+
+ if (fromnd.ni_startdir)
+ vnode_put(fromnd.ni_startdir);
+ nameidone(&fromnd);
+
+ if (error == -1)
+ return (0);
+ return (error);
+}
+
+
+/*
+ * Rename files. Source and destination must either both be directories,
+ * or both not be directories. If target is a directory, it must be empty.
+ */
+/* ARGSUSED */
+int
+rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
+{
+ vnode_t tvp, tdvp;
+ vnode_t fvp, fdvp;
+ struct nameidata *fromnd, *tond;
+ vfs_context_t ctx = vfs_context_current();
+ int error;
+ int do_retry;
+ int mntrename;
+ int need_event;
+ const char *oname = NULL;
+ char *from_name = NULL, *to_name = NULL;
+ int from_len=0, to_len=0;
+ int holding_mntlock;
+ mount_t locked_mp = NULL;
+ vnode_t oparent = NULLVP;
+#if CONFIG_FSE
+ fse_info from_finfo, to_finfo;
+#endif
+ int from_truncated=0, to_truncated;
+ int batched = 0;
+ struct vnode_attr *fvap, *tvap;
+ int continuing = 0;
+ /* carving out a chunk for structs that are too big to be on stack. */
+ struct {
+ struct nameidata from_node, to_node;
+ struct vnode_attr fv_attr, tv_attr;
+ } * __rename_data;
+ MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
+ fromnd = &__rename_data->from_node;
+ tond = &__rename_data->to_node;
+
+ holding_mntlock = 0;
+ do_retry = 0;
+retry:
+ fvp = tvp = NULL;
+ fdvp = tdvp = NULL;
+ fvap = tvap = NULL;
+ mntrename = FALSE;
+
+ NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
+ UIO_USERSPACE, uap->from, ctx);
+ fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
+
+ NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
+ UIO_USERSPACE, uap->to, ctx);
+ tond->ni_flag = NAMEI_COMPOUNDRENAME;
+
+continue_lookup:
+ if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
+ if ( (error = namei(fromnd)) )
+ goto out1;
+ fdvp = fromnd->ni_dvp;
+ fvp = fromnd->ni_vp;
+
+ if (fvp && fvp->v_type == VDIR)
+ tond->ni_cnd.cn_flags |= WILLBEDIR;
+ }
+
+ if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
+ if ( (error = namei(tond)) ) {
+ /*
+ * Translate error code for rename("dir1", "dir2/.").
+ */
+ if (error == EISDIR && fvp->v_type == VDIR)
+ error = EINVAL;
+ goto out1;
+ }
+ tdvp = tond->ni_dvp;
+ tvp = tond->ni_vp;
+ }
+
+ batched = vnode_compound_rename_available(fdvp);
+ if (!fvp) {
+ /*
+ * Claim: this check will never reject a valid rename.
+ * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
+ * Suppose fdvp and tdvp are not on the same mount.
+ * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
+ * then you can't move it to within another dir on the same mountpoint.
+ * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
+ *
+ * If this check passes, then we are safe to pass these vnodes to the same FS.
+ */
+ if (fdvp->v_mount != tdvp->v_mount) {
+ error = EXDEV;
+ goto out1;
+ }
+ goto skipped_lookup;
+ }
+
+ if (!batched) {
+ error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
+ if (error) {
+ if (error == ENOENT) {
+ /*
+ * We encountered a race where after doing the namei, tvp stops
+ * being valid. If so, simply re-drive the rename call from the
+ * top.
+ */
+ do_retry = 1;
+ }
+ goto out1;
+ }
+ }
+
+ /*
+ * If the source and destination are the same (i.e. they're
+ * links to the same vnode) and the target file system is
+ * case sensitive, then there is nothing to do.
+ *
+ * XXX Come back to this.
+ */
+ if (fvp == tvp) {
+ int pathconf_val;
+
+ /*
+ * Note: if _PC_CASE_SENSITIVE selector isn't supported,
+ * then assume that this file system is case sensitive.
+ */
+ if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
+ pathconf_val != 0) {
+ goto out1;
+ }
+ }
+
+ /*
+ * Allow the renaming of mount points.
+ * - target must not exist
+ * - target must reside in the same directory as source
+ * - union mounts cannot be renamed
+ * - "/" cannot be renamed
+ *
+ * XXX Handle this in VFS after a continued lookup (if we missed
+ * in the cache to start off)
+ */
+ if ((fvp->v_flag & VROOT) &&
+ (fvp->v_type == VDIR) &&
+ (tvp == NULL) &&
+ (fvp->v_mountedhere == NULL) &&
+ (fdvp == tdvp) &&
+ ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
+ (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
+ vnode_t coveredvp;
+
+ /* switch fvp to the covered vnode */
+ coveredvp = fvp->v_mount->mnt_vnodecovered;
+ if ( (vnode_getwithref(coveredvp)) ) {
+ error = ENOENT;
+ goto out1;
+ }
+ vnode_put(fvp);
+
+ fvp = coveredvp;
+ mntrename = TRUE;
+ }
+ /*
+ * Check for cross-device rename.
+ */
+ if ((fvp->v_mount != tdvp->v_mount) ||
+ (tvp && (fvp->v_mount != tvp->v_mount))) {
+ error = EXDEV;
+ goto out1;
+ }
+
+ /*
+ * If source is the same as the destination (that is the
+ * same inode number) then there is nothing to do...
+ * EXCEPT if the underlying file system supports case
+ * insensitivity and is case preserving. In this case
+ * the file system needs to handle the special case of
+ * getting the same vnode as target (fvp) and source (tvp).
+ *
+ * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
+ * and _PC_CASE_PRESERVING can have this exception, and they need to
+ * handle the special case of getting the same vnode as target and
+ * source. NOTE: Then the target is unlocked going into vnop_rename,
+ * so not to cause locking problems. There is a single reference on tvp.
+ *
+ * NOTE - that fvp == tvp also occurs if they are hard linked and
+ * that correct behaviour then is just to return success without doing
+ * anything.
+ *
+ * XXX filesystem should take care of this itself, perhaps...
+ */
+ if (fvp == tvp && fdvp == tdvp) {
+ if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
+ !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
+ fromnd->ni_cnd.cn_namelen)) {
+ goto out1;
+ }
+ }
+
+ if (holding_mntlock && fvp->v_mount != locked_mp) {
+ /*
+ * we're holding a reference and lock
+ * on locked_mp, but it no longer matches
+ * what we want to do... so drop our hold
+ */
+ mount_unlock_renames(locked_mp);
+ mount_drop(locked_mp, 0);
+ holding_mntlock = 0;
+ }
+ if (tdvp != fdvp && fvp->v_type == VDIR) {
+ /*
+ * serialize renames that re-shape
+ * the tree... if holding_mntlock is
+ * set, then we're ready to go...
+ * otherwise we
+ * first need to drop the iocounts
+ * we picked up, second take the
+ * lock to serialize the access,
+ * then finally start the lookup
+ * process over with the lock held
+ */
+ if (!holding_mntlock) {
+ /*
+ * need to grab a reference on
+ * the mount point before we
+ * drop all the iocounts... once
+ * the iocounts are gone, the mount
+ * could follow
+ */
+ locked_mp = fvp->v_mount;
+ mount_ref(locked_mp, 0);
+
+ /*
+ * nameidone has to happen before we vnode_put(tvp)
+ * since it may need to release the fs_nodelock on the tvp
+ */
+ nameidone(tond);
+
+ if (tvp)
+ vnode_put(tvp);
+ vnode_put(tdvp);
+
+ /*
+ * nameidone has to happen before we vnode_put(fdvp)
+ * since it may need to release the fs_nodelock on the fvp
+ */
+ nameidone(fromnd);
+
+ vnode_put(fvp);
+ vnode_put(fdvp);
+
+ mount_lock_renames(locked_mp);
+ holding_mntlock = 1;
+
+ goto retry;
+ }
+ } else {
+ /*
+ * when we dropped the iocounts to take
+ * the lock, we allowed the identity of
+ * the various vnodes to change... if they did,
+ * we may no longer be dealing with a rename
+ * that reshapes the tree... once we're holding
+ * the iocounts, the vnodes can't change type
+ * so we're free to drop the lock at this point
+ * and continue on
+ */
+ if (holding_mntlock) {
+ mount_unlock_renames(locked_mp);
+ mount_drop(locked_mp, 0);
+ holding_mntlock = 0;
+ }
+ }
+
+ // save these off so we can later verify that fvp is the same
+ oname = fvp->v_name;
+ oparent = fvp->v_parent;
+
+skipped_lookup:
+#if CONFIG_FSE
+ need_event = need_fsevent(FSE_RENAME, fdvp);
+ if (need_event) {
+ if (fvp) {
+ get_fse_info(fvp, &from_finfo, ctx);
+ } else {
+ error = vfs_get_notify_attributes(&__rename_data->fv_attr);
+ if (error) {
+ goto out1;
+ }
+
+ fvap = &__rename_data->fv_attr;
+ }
+
+ if (tvp) {
+ get_fse_info(tvp, &to_finfo, ctx);
+ } else if (batched) {
+ error = vfs_get_notify_attributes(&__rename_data->tv_attr);
+ if (error) {
+ goto out1;
+ }
+
+ tvap = &__rename_data->tv_attr;
+ }
+ }
+#else
+ need_event = 0;
+#endif /* CONFIG_FSE */
+
+ if (need_event || kauth_authorize_fileop_has_listeners()) {
+ if (from_name == NULL) {
+ GET_PATH(from_name);
+ if (from_name == NULL) {
+ error = ENOMEM;
+ goto out1;
+ }
+ }
+
+ from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
+
+ if (to_name == NULL) {
+ GET_PATH(to_name);
+ if (to_name == NULL) {
+ error = ENOMEM;
+ goto out1;
+ }
+ }
+
+ to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
+ }
+
+ error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
+ tdvp, &tvp, &tond->ni_cnd, tvap,
+ 0, ctx);
+
+ if (holding_mntlock) {
+ /*
+ * we can drop our serialization
+ * lock now
+ */
+ mount_unlock_renames(locked_mp);
+ mount_drop(locked_mp, 0);
+ holding_mntlock = 0;
+ }
+ if (error) {
+ if (error == EKEEPLOOKING) {
+ if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+ if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+ panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
+ }
+ }
+
+ fromnd->ni_vp = fvp;
+ tond->ni_vp = tvp;
+
+ goto continue_lookup;
+ }
+
+ /*
+ * We may encounter a race in the VNOP where the destination didn't
+ * exist when we did the namei, but it does by the time we go and
+ * try to create the entry. In this case, we should re-drive this rename
+ * call from the top again. Currently, only HFS bubbles out ERECYCLE,
+ * but other filesystems susceptible to this race could return it, too.
+ */
+ if (error == ERECYCLE) {
+ do_retry = 1;
+ }
+
+ goto out1;
+ }
+
+ /* call out to allow 3rd party notification of rename.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_RENAME,
+ (uintptr_t)from_name, (uintptr_t)to_name);
+
+#if CONFIG_FSE
+ if (from_name != NULL && to_name != NULL) {
+ if (from_truncated || to_truncated) {
+ // set it here since only the from_finfo gets reported up to user space
+ from_finfo.mode |= FSE_TRUNCATED_PATH;
+ }
+
+ if (tvap && tvp) {
+ vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
+ }
+ if (fvap) {
+ vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
+ }
+
+ if (tvp) {
+ add_fsevent(FSE_RENAME, ctx,
+ FSE_ARG_STRING, from_len, from_name,
+ FSE_ARG_FINFO, &from_finfo,
+ FSE_ARG_STRING, to_len, to_name,
+ FSE_ARG_FINFO, &to_finfo,
+ FSE_ARG_DONE);
+ } else {
+ add_fsevent(FSE_RENAME, ctx,
+ FSE_ARG_STRING, from_len, from_name,
+ FSE_ARG_FINFO, &from_finfo,
+ FSE_ARG_STRING, to_len, to_name,
+ FSE_ARG_DONE);
+ }
+ }
+#endif /* CONFIG_FSE */
+
+ /*
+ * update filesystem's mount point data
+ */
+ if (mntrename) {
+ char *cp, *pathend, *mpname;
+ char * tobuf;
+ struct mount *mp;
+ int maxlen;
+ size_t len = 0;
+
+ mp = fvp->v_mountedhere;
+
+ if (vfs_busy(mp, LK_NOWAIT)) {
+ error = EBUSY;
+ goto out1;
+ }
+ MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+
+ error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
+ if (!error) {
+ /* find current mount point prefix */
+ pathend = &mp->mnt_vfsstat.f_mntonname[0];
+ for (cp = pathend; *cp != '\0'; ++cp) {
+ if (*cp == '/')
+ pathend = cp + 1;
+ }
+ /* find last component of target name */
+ for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
+ if (*cp == '/')
+ mpname = cp + 1;
+ }
+ /* append name to prefix */
+ maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
+ bzero(pathend, maxlen);
+ strlcpy(pathend, mpname, maxlen);
+ }
+ FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
+
+ vfs_unbusy(mp);
+ }
+ /*
+ * fix up name & parent pointers. note that we first
+ * check that fvp has the same name/parent pointers it
+ * had before the rename call... this is a 'weak' check
+ * at best...
+ *
+ * XXX oparent and oname may not be set in the compound vnop case
+ */
+ if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
+ int update_flags;
+
+ update_flags = VNODE_UPDATE_NAME;
+
+ if (fdvp != tdvp)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
+ }
+out1:
+ if (to_name != NULL) {
+ RELEASE_PATH(to_name);
+ to_name = NULL;
+ }
+ if (from_name != NULL) {
+ RELEASE_PATH(from_name);
+ from_name = NULL;
+ }
+ if (holding_mntlock) {
+ mount_unlock_renames(locked_mp);
+ mount_drop(locked_mp, 0);
+ holding_mntlock = 0;
+ }
+ if (tdvp) {
+ /*
+ * nameidone has to happen before we vnode_put(tdvp)
+ * since it may need to release the fs_nodelock on the tdvp
+ */
+ nameidone(tond);
+
+ if (tvp)
+ vnode_put(tvp);
+ vnode_put(tdvp);
+ }
+ if (fdvp) {
+ /*
+ * nameidone has to happen before we vnode_put(fdvp)
+ * since it may need to release the fs_nodelock on the fdvp
+ */
+ nameidone(fromnd);
+
+ if (fvp)
+ vnode_put(fvp);
+ vnode_put(fdvp);
+ }
+
+
+ /*
+ * If things changed after we did the namei, then we will re-drive
+ * this rename call from the top.
+ */
+ if (do_retry) {
+ do_retry = 0;
+ goto retry;
+ }
+
+ FREE(__rename_data, M_TEMP);
+ return (error);
+}
+
+/*
+ * Make a directory file.
+ *
+ * Returns: 0 Success
+ * EEXIST
+ * namei:???
+ * vnode_authorize:???
+ * vn_create:???
+ */
+/* ARGSUSED */
+static int
+mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
+{
+ vnode_t vp, dvp;
+ int error;
+ int update_flags = 0;
+ int batched;
+ struct nameidata nd;
+
+ AUDIT_ARG(mode, vap->va_mode);
+ NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE,
+ path, ctx);
+ nd.ni_cnd.cn_flags |= WILLBEDIR;
+ nd.ni_flag = NAMEI_COMPOUNDMKDIR;
+
+continue_lookup:
+ error = namei(&nd);
+ if (error)
+ return (error);
+ dvp = nd.ni_dvp;
+ vp = nd.ni_vp;
+
+ if (vp != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+
+ batched = vnode_compound_mkdir_available(dvp);
+
+ VATTR_SET(vap, va_type, VDIR);
+
+ /*
+ * XXX
+ * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
+ * only get EXISTS or EISDIR for existing path components, and not that it could see
+ * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
+ * it will fail in a spurious manner. Need to figure out if this is valid behavior.
+ */
+ if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
+ if (error == EACCES || error == EPERM) {
+ int error2;
+
+ nameidone(&nd);
+ vnode_put(dvp);
+ dvp = NULLVP;
+
+ /*
+ * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
+ * rather than EACCESS if the target exists.
+ */
+ NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, UIO_USERSPACE,
+ path, ctx);
+ error2 = namei(&nd);
+ if (error2) {
+ goto out;
+ } else {
+ vp = nd.ni_vp;
+ error = EEXIST;
+ goto out;
+ }
+ }
+
+ goto out;
+ }
+
+ /*
+ * make the directory
+ */
+ if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
+ if (error == EKEEPLOOKING) {
+ nd.ni_vp = vp;
+ goto continue_lookup;
+ }
+
+ goto out;
+ }
+
+ // Make sure the name & parent pointers are hooked up
+ if (vp->v_name == NULL)
+ update_flags |= VNODE_UPDATE_NAME;
+ if (vp->v_parent == NULLVP)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ if (update_flags)
+ vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
+
+#if CONFIG_FSE
+ add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
+#endif
+
+out:
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+
+ if (vp)
+ vnode_put(vp);
+ if (dvp)
+ vnode_put(dvp);
+
+ return (error);
+}
+
+/*
+ * mkdir_extended: Create a directory; with extended security (ACL).
+ *
+ * Parameters: p Process requesting to create the directory
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->path Path of directory to create
+ * uap->mode Access permissions to set
+ * uap->xsecurity ACL to set
+ *
+ * Returns: 0 Success
+ * !0 Not success
+ *
+ */
+int
+mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
+{
+ int ciferror;
+ kauth_filesec_t xsecdst;
+ struct vnode_attr va;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+
+ xsecdst = NULL;
+ if ((uap->xsecurity != USER_ADDR_NULL) &&
+ ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
+ return ciferror;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
+ if (xsecdst != NULL)
+ VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+
+ ciferror = mkdir1(vfs_context_current(), uap->path, &va);
+ if (xsecdst != NULL)
+ kauth_filesec_free(xsecdst);
+ return ciferror;
+}
+
+int
+mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
+
+ return(mkdir1(vfs_context_current(), uap->path, &va));
+}
+
+/*
+ * Remove a directory file.
+ */
+/* ARGSUSED */
+int
+rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp, dvp;
+ int error;
+ struct nameidata nd;
+ char *path = NULL;
+ int len=0;
+ int has_listeners = 0;
+ int need_event = 0;
+ int truncated = 0;
+ vfs_context_t ctx = vfs_context_current();
+#if CONFIG_FSE
+ struct vnode_attr va;
+#endif /* CONFIG_FSE */
+ struct vnode_attr *vap = NULL;
+ int batched;
+
+ int restart_flag;
+
+ /*
+ * This loop exists to restart rmdir in the unlikely case that two
+ * processes are simultaneously trying to remove the same directory
+ * containing orphaned appleDouble files.
+ */
+ do {
+ NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ nd.ni_flag = NAMEI_COMPOUNDRMDIR;
+continue_lookup:
+ restart_flag = 0;
+ vap = NULL;
+
+ error = namei(&nd);
+ if (error)
+ return (error);
+
+ dvp = nd.ni_dvp;
+ vp = nd.ni_vp;
+
+ if (vp) {
+ batched = vnode_compound_rmdir_available(vp);
+
+ if (vp->v_flag & VROOT) {
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ error = EBUSY;
+ goto out;
+ }
+
+ /*
+ * Removed a check here; we used to abort if vp's vid
+ * was not the same as what we'd seen the last time around.
+ * I do not think that check was valid, because if we retry
+ * and all dirents are gone, the directory could legitimately
+ * be recycled but still be present in a situation where we would
+ * have had permission to delete. Therefore, we won't make
+ * an effort to preserve that check now that we may not have a
+ * vp here.
+ */
+
+ if (!batched) {
+ error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
+ if (error) {
+ goto out;
+ }
+ }
+ } else {
+ batched = 1;
+
+ if (!vnode_compound_rmdir_available(dvp)) {
+ panic("No error, but no compound rmdir?");
+ }
+ }
+
+#if CONFIG_FSE
+ fse_info finfo;
+
+ need_event = need_fsevent(FSE_DELETE, dvp);
+ if (need_event) {
+ if (!batched) {
+ get_fse_info(vp, &finfo, ctx);
+ } else {
+ error = vfs_get_notify_attributes(&va);
+ if (error) {
+ goto out;
+ }
+
+ vap = &va;
+ }
+ }
+#endif
+ has_listeners = kauth_authorize_fileop_has_listeners();
+ if (need_event || has_listeners) {
+ if (path == NULL) {
+ GET_PATH(path);
+ if (path == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ }
+
+ len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
+#if CONFIG_FSE
+ if (truncated) {
+ finfo.mode |= FSE_TRUNCATED_PATH;
+ }
+#endif
+ }
+
+ error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
+ nd.ni_vp = vp;
+ if (vp == NULLVP) {
+ /* Couldn't find a vnode */
+ goto out;
+ }
+
+ if (error == EKEEPLOOKING) {
+ goto continue_lookup;
+ }
+#if CONFIG_APPLEDOUBLE
+ /*
+ * Special case to remove orphaned AppleDouble
+ * files. I don't like putting this in the kernel,
+ * but carbon does not like putting this in carbon either,
+ * so here we are.
+ */
+ if (error == ENOTEMPTY) {
+ error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
+ if (error == EBUSY) {
+ goto out;
+ }
+
+
+ /*
+ * Assuming everything went well, we will try the RMDIR again
+ */
+ if (!error)
+ error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
+ }
+#endif /* CONFIG_APPLEDOUBLE */
+ /*
+ * Call out to allow 3rd party notification of delete.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ if (!error) {
+ if (has_listeners) {
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_DELETE,
+ (uintptr_t)vp,
+ (uintptr_t)path);
+ }
+
+ if (vp->v_flag & VISHARDLINK) {
+ // see the comment in unlink1() about why we update
+ // the parent of a hard link when it is removed
+ vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
+ }
+
+#if CONFIG_FSE
+ if (need_event) {
+ if (vap) {
+ vnode_get_fse_info_from_vap(vp, &finfo, vap);
+ }
+ add_fsevent(FSE_DELETE, ctx,
+ FSE_ARG_STRING, len, path,
+ FSE_ARG_FINFO, &finfo,
+ FSE_ARG_DONE);
+ }
+#endif
+ }
+
+out:
+ if (path != NULL) {
+ RELEASE_PATH(path);
+ path = NULL;
+ }
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+ vnode_put(dvp);
+
+ if (vp)
+ vnode_put(vp);
+
+ if (restart_flag == 0) {
+ wakeup_one((caddr_t)vp);
+ return (error);
+ }
+ tsleep(vp, PVFS, "rm AD", 1);
+
+ } while (restart_flag != 0);
+
+ return (error);
+
+}
+
+/* Get direntry length padded to 8 byte alignment */
+#define DIRENT64_LEN(namlen) \
+ ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
+
+static errno_t
+vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
+ int *numdirent, vfs_context_t ctxp)
+{
+ /* Check if fs natively supports VNODE_READDIR_EXTENDED */
+ if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
+ ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
+ return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
+ } else {
+ size_t bufsize;
+ void * bufptr;
+ uio_t auio;
+ struct direntry *entry64;
+ struct dirent *dep;
+ int bytesread;
+ int error;
+
+ /*
+ * Our kernel buffer needs to be smaller since re-packing
+ * will expand each dirent. The worse case (when the name
+ * length is 3) corresponds to a struct direntry size of 32
+ * bytes (8-byte aligned) and a struct dirent size of 12 bytes
+ * (4-byte aligned). So having a buffer that is 3/8 the size
+ * will prevent us from reading more than we can pack.
+ *
+ * Since this buffer is wired memory, we will limit the
+ * buffer size to a maximum of 32K. We would really like to
+ * use 32K in the MIN(), but we use magic number 87371 to
+ * prevent uio_resid() * 3 / 8 from overflowing.
+ */
+ bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
+ MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
+ if (bufptr == NULL) {
+ return ENOMEM;
+ }
+
+ auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
+ uio_addiov(auio, (uintptr_t)bufptr, bufsize);
+ auio->uio_offset = uio->uio_offset;
+
+ error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
+
+ dep = (struct dirent *)bufptr;
+ bytesread = bufsize - uio_resid(auio);
+
+ MALLOC(entry64, struct direntry *, sizeof(struct direntry),
+ M_TEMP, M_WAITOK);
+ /*
+ * Convert all the entries and copy them out to user's buffer.
+ */
+ while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
+ size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
+
+ bzero(entry64, enbufsize);
+ /* Convert a dirent to a dirent64. */
+ entry64->d_ino = dep->d_ino;
+ entry64->d_seekoff = 0;
+ entry64->d_reclen = enbufsize;
+ entry64->d_namlen = dep->d_namlen;
+ entry64->d_type = dep->d_type;
+ bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
+
+ /* Move to next entry. */
+ dep = (struct dirent *)((char *)dep + dep->d_reclen);
+
+ /* Copy entry64 to user's buffer. */
+ error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
+ }
+
+ /* Update the real offset using the offset we got from VNOP_READDIR. */
+ if (error == 0) {
+ uio->uio_offset = auio->uio_offset;
+ }
+ uio_free(auio);
+ FREE(bufptr, M_TEMP);
+ FREE(entry64, M_TEMP);
+ return (error);
+ }
+}
+
+#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
+
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+static int
+getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
+ off_t *offset, int flags)
+{
+ vnode_t vp;
+ struct vfs_context context = *vfs_context_current(); /* local copy */
+ struct fileproc *fp;
+ uio_t auio;
+ int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ off_t loff;
+ int error, eofflag, numdirent;
+ char uio_buf[ UIO_SIZEOF(1) ];
+
+ error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
+ if (error) {
+ return (error);
+ }
+ if ((fp->f_fglob->fg_flag & FREAD) == 0) {
+ AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
+ error = EBADF;
+ goto out;
+ }
+
+ if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
+ bufsize = GETDIRENTRIES_MAXBUFSIZE;
+
+#if CONFIG_MACF
+ error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
+ if (error)
+ goto out;
+#endif
+ if ( (error = vnode_getwithref(vp)) ) {
+ goto out;
+ }
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+unionread:
+ if (vp->v_type != VDIR) {
+ (void)vnode_put(vp);
+ error = EINVAL;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_readdir(&context, vp);
+ if (error != 0) {
+ (void)vnode_put(vp);
+ goto out;
+ }
+#endif /* MAC */
+
+ loff = fp->f_fglob->fg_offset;
+ auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, bufp, bufsize);
+
+ if (flags & VNODE_READDIR_EXTENDED) {
+ error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
+ fp->f_fglob->fg_offset = uio_offset(auio);
+ } else {
+ error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
+ fp->f_fglob->fg_offset = uio_offset(auio);
+ }
+ if (error) {
+ (void)vnode_put(vp);
+ goto out;
+ }
+
+ if ((user_ssize_t)bufsize == uio_resid(auio)){
+ if (union_dircheckp) {
+ error = union_dircheckp(&vp, fp, &context);
+ if (error == -1)
+ goto unionread;
+ if (error)
+ goto out;
+ }
+
+ if ((vp->v_mount->mnt_flag & MNT_UNION)) {
+ struct vnode *tvp = vp;
+ if (lookup_traverse_union(tvp, &vp, &context) == 0) {
+ vnode_ref(vp);
+ fp->f_fglob->fg_data = (caddr_t) vp;
+ fp->f_fglob->fg_offset = 0;
+ vnode_rele(tvp);
+ vnode_put(tvp);
+ goto unionread;
+ }
+ vp = tvp;
+ }
+ }
+
+ vnode_put(vp);
+ if (offset) {
+ *offset = loff;
+ }
+
+ *bytesread = bufsize - uio_resid(auio);
+out:
+ file_drop(fd);
+ return (error);
+}
+
+
+int
+getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
+{
+ off_t offset;
+ ssize_t bytesread;
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+ error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
+
+ if (error == 0) {
+ if (proc_is64bit(p)) {
+ user64_long_t base = (user64_long_t)offset;
+ error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
+ } else {
+ user32_long_t base = (user32_long_t)offset;
+ error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
+ }
+ *retval = bytesread;
+ }
+ return (error);
+}
+
+int
+getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
+{
+ off_t offset;
+ ssize_t bytesread;
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+ error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
+
+ if (error == 0) {
+ *retval = bytesread;
+ error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
+ }
+ return (error);
+}
+
+
+/*
+ * Set the mode mask for creation of filesystem nodes.
+ * XXX implement xsecurity
+ */
+#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
+static int
+umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
+{
+ struct filedesc *fdp;
+
+ AUDIT_ARG(mask, newmask);
+ proc_fdlock(p);
+ fdp = p->p_fd;
+ *retval = fdp->fd_cmask;
+ fdp->fd_cmask = newmask & ALLPERMS;
+ proc_fdunlock(p);
+ return (0);
+}
+
+/*
+ * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
+ *
+ * Parameters: p Process requesting to set the umask
+ * uap User argument descriptor (see below)
+ * retval umask of the process (parameter p)
+ *
+ * Indirect: uap->newmask umask to set
+ * uap->xsecurity ACL to set
+ *
+ * Returns: 0 Success
+ * !0 Not success
+ *
+ */
+int
+umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
+{
+ int ciferror;
+ kauth_filesec_t xsecdst;
+
+ xsecdst = KAUTH_FILESEC_NONE;
+ if (uap->xsecurity != USER_ADDR_NULL) {
+ if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
+ return ciferror;
+ } else {
+ xsecdst = KAUTH_FILESEC_NONE;
+ }
+
+ ciferror = umask1(p, uap->newmask, xsecdst, retval);
+
+ if (xsecdst != KAUTH_FILESEC_NONE)
+ kauth_filesec_free(xsecdst);
+ return ciferror;
+}
+
+int
+umask(proc_t p, struct umask_args *uap, int32_t *retval)
+{
+ return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
+}
+
+/*
+ * Void all references to file by ripping underlying filesystem
+ * away from vnode.
+ */
+/* ARGSUSED */
+int
+revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp;
+ struct vnode_attr va;
+ vfs_context_t ctx = vfs_context_current();
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+ uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+
+ nameidone(&nd);
+
+ if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
+ error = ENOTSUP;
+ goto out;
+ }
+
+ if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
+ error = EBUSY;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_revoke(ctx, vp);
+ if (error)
+ goto out;
+#endif
+
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_uid);
+ if ((error = vnode_getattr(vp, &va, ctx)))
+ goto out;
+ if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
+ (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
+ goto out;
+ if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
+ VNOP_REVOKE(vp, REVOKEALL, ctx);
+out:
+ vnode_put(vp);
+ return (error);
+}
+
+
+/*
+ * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
+ * The following system calls are designed to support features
+ * which are specific to the HFS & HFS Plus volume formats
+ */
+
+
+/*
+ * Obtain attribute information on objects in a directory while enumerating
+ * the directory.
+ */
+/* ARGSUSED */
+int
+getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
+{
+ vnode_t vp;
+ struct fileproc *fp;
+ uio_t auio = NULL;
+ int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ uint32_t count, savecount;
+ uint32_t newstate;
+ int error, eofflag;
+ uint32_t loff;
+ struct attrlist attributelist;
+ vfs_context_t ctx = vfs_context_current();
+ int fd = uap->fd;
+ char uio_buf[ UIO_SIZEOF(1) ];
+ kauth_action_t action;
+
+ AUDIT_ARG(fd, fd);
+
+ /* Get the attributes into kernel space */
+ if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
+ return(error);
+ }
+ if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
+ return(error);
+ }
+ savecount = count;
+ if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
+ return (error);
+ }
+ if ((fp->f_fglob->fg_flag & FREAD) == 0) {
+ AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
+ error = EBADF;
+ goto out;
+ }
+
+
+#if CONFIG_MACF
+ error = mac_file_check_change_offset(vfs_context_ucred(ctx),
+ fp->f_fglob);
+ if (error)
+ goto out;
+#endif
+
+
+ if ( (error = vnode_getwithref(vp)) )
+ goto out;
+
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+unionread:
+ if (vp->v_type != VDIR) {
+ (void)vnode_put(vp);
+ error = EINVAL;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_readdir(ctx, vp);
+ if (error != 0) {
+ (void)vnode_put(vp);
+ goto out;
+ }
+#endif /* MAC */
+
+ /* set up the uio structure which will contain the users return buffer */
+ loff = fp->f_fglob->fg_offset;
+ auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, uap->buffer, uap->buffersize);
+
+ /*
+ * If the only item requested is file names, we can let that past with
+ * just LIST_DIRECTORY. If they want any other attributes, that means
+ * they need SEARCH as well.
+ */
+ action = KAUTH_VNODE_LIST_DIRECTORY;
+ if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
+ attributelist.fileattr || attributelist.dirattr)
+ action |= KAUTH_VNODE_SEARCH;
+
+ if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
+
+ /* Believe it or not, uap->options only has 32-bits of valid
+ * info, so truncate before extending again */
+
+ error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
+ (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
+ }
+
+ if (error) {
+ (void) vnode_put(vp);
+ goto out;
+ }
+
+ /*
+ * If we've got the last entry of a directory in a union mount
+ * then reset the eofflag and pretend there's still more to come.
+ * The next call will again set eofflag and the buffer will be empty,
+ * so traverse to the underlying directory and do the directory
+ * read there.
+ */
+ if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
+ if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
+ eofflag = 0;
+ } else { // Empty buffer
+ struct vnode *tvp = vp;
+ if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
+ vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
+ fp->f_fglob->fg_data = (caddr_t) vp;
+ fp->f_fglob->fg_offset = 0; // reset index for new dir
+ count = savecount;
+ vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
+ vnode_put(tvp);
+ goto unionread;
+ }
+ vp = tvp;
+ }
+ }
+
+ (void)vnode_put(vp);
+
+ if (error)
+ goto out;
+ fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
+
+ if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
+ goto out;
+ if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
+ goto out;
+ if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
+ goto out;
+
+ *retval = eofflag; /* similar to getdirentries */
+ error = 0;
+out:
+ file_drop(fd);
+ return (error); /* return error earlier, an retval of 0 or 1 now */
+
+} /* end of getdirentriesattr system call */
+
+/*
+* Exchange data between two files
+*/
+
+/* ARGSUSED */
+int
+exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
+{
+
+ struct nameidata fnd, snd;
+ vfs_context_t ctx = vfs_context_current();
+ vnode_t fvp;
+ vnode_t svp;
+ int error;
+ u_int32_t nameiflags;
+ char *fpath = NULL;
+ char *spath = NULL;
+ int flen=0, slen=0;
+ int from_truncated=0, to_truncated=0;
+#if CONFIG_FSE
+ fse_info f_finfo, s_finfo;
+#endif
+
+ nameiflags = 0;
+ if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
+
+ NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path1, ctx);
+
+ error = namei(&fnd);
+ if (error)
+ goto out2;
+
+ nameidone(&fnd);
+ fvp = fnd.ni_vp;
+
+ NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
+ UIO_USERSPACE, uap->path2, ctx);
+
+ error = namei(&snd);
+ if (error) {
+ vnode_put(fvp);
+ goto out2;
+ }
+ nameidone(&snd);
+ svp = snd.ni_vp;
+
+ /*
+ * if the files are the same, return an inval error
+ */
+ if (svp == fvp) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * if the files are on different volumes, return an error
+ */
+ if (svp->v_mount != fvp->v_mount) {
+ error = EXDEV;
+ goto out;
+ }
+
+ /* If they're not files, return an error */
+ if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
+ error = EINVAL;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_exchangedata(ctx,
+ fvp, svp);
+ if (error)
+ goto out;
+#endif
+ if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
+ ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
+ goto out;
+
+ if (
+#if CONFIG_FSE
+ need_fsevent(FSE_EXCHANGE, fvp) ||
+#endif
+ kauth_authorize_fileop_has_listeners()) {
+ GET_PATH(fpath);
+ GET_PATH(spath);
+ if (fpath == NULL || spath == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+
+ flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
+ slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
+
+#if CONFIG_FSE
+ get_fse_info(fvp, &f_finfo, ctx);
+ get_fse_info(svp, &s_finfo, ctx);
+ if (from_truncated || to_truncated) {
+ // set it here since only the f_finfo gets reported up to user space
+ f_finfo.mode |= FSE_TRUNCATED_PATH;
+ }
+#endif
+ }
+ /* Ok, make the call */
+ error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
+
+ if (error == 0) {
+ const char *tmpname;
+
+ if (fpath != NULL && spath != NULL) {
+ /* call out to allow 3rd party notification of exchangedata.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
+ (uintptr_t)fpath, (uintptr_t)spath);
+ }
+ name_cache_lock();
+
+ tmpname = fvp->v_name;
+ fvp->v_name = svp->v_name;
+ svp->v_name = tmpname;
+
+ if (fvp->v_parent != svp->v_parent) {
+ vnode_t tmp;
+
+ tmp = fvp->v_parent;
+ fvp->v_parent = svp->v_parent;
+ svp->v_parent = tmp;
+ }
+ name_cache_unlock();
+
+#if CONFIG_FSE
+ if (fpath != NULL && spath != NULL) {
+ add_fsevent(FSE_EXCHANGE, ctx,
+ FSE_ARG_STRING, flen, fpath,
+ FSE_ARG_FINFO, &f_finfo,
+ FSE_ARG_STRING, slen, spath,
+ FSE_ARG_FINFO, &s_finfo,
+ FSE_ARG_DONE);
+ }
+#endif
+ }
+
+out:
+ if (fpath != NULL)
+ RELEASE_PATH(fpath);
+ if (spath != NULL)
+ RELEASE_PATH(spath);
+ vnode_put(svp);
+ vnode_put(fvp);
+out2:
+ return (error);
+}
+
+/*
+ * Return (in MB) the amount of freespace on the given vnode's volume.
+ */
+uint32_t freespace_mb(vnode_t vp);
+
+uint32_t
+freespace_mb(vnode_t vp)
+{
+ vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
+ return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
+ vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
+}
+
+#if CONFIG_SEARCHFS
+
+/* ARGSUSED */
+
+int
+searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp, tvp;
+ int i, error=0;
+ int fserror = 0;
+ struct nameidata nd;
+ struct user64_fssearchblock searchblock;
+ struct searchstate *state;
+ struct attrlist *returnattrs;
+ struct timeval timelimit;
+ void *searchparams1,*searchparams2;
+ uio_t auio = NULL;
+ int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ uint32_t nummatches;
+ int mallocsize;
+ uint32_t nameiflags;
+ vfs_context_t ctx = vfs_context_current();
+ char uio_buf[ UIO_SIZEOF(1) ];
+
+ /* Start by copying in fsearchblock parameter list */
+ if (IS_64BIT_PROCESS(p)) {
+ error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
+ timelimit.tv_sec = searchblock.timelimit.tv_sec;
+ timelimit.tv_usec = searchblock.timelimit.tv_usec;
+ }
+ else {
+ struct user32_fssearchblock tmp_searchblock;
+
+ error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
+ // munge into 64-bit version
+ searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
+ searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
+ searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
+ searchblock.maxmatches = tmp_searchblock.maxmatches;
+ /*
+ * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
+ * from a 32 bit long, and tv_usec is already a signed 32 bit int.
+ */
+ timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
+ timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
+ searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
+ searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
+ searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
+ searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
+ searchblock.searchattrs = tmp_searchblock.searchattrs;
+ }
+ if (error)
+ return(error);
+
+ /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
+ */
+ if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
+ searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
+ return(EINVAL);
+
+ /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
+ /* It all has to do into local memory and it's not that big so we might as well put it all together. */
+ /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
+ /* block. */
+
+ mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
+ sizeof(struct attrlist) + sizeof(struct searchstate);
+
+ MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
+
+ /* Now set up the various pointers to the correct place in our newly allocated memory */
+
+ searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
+ returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
+ state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
+
+ /* Now copy in the stuff given our local variables. */
+
+ if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
+ goto freeandexit;
+
+ if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
+ goto freeandexit;
+
+ if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
+ goto freeandexit;
+
+ if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
+ goto freeandexit;
+
+ /*
+ * When searching a union mount, need to set the
+ * start flag at the first call on each layer to
+ * reset state for the new volume.
+ */
+ if (uap->options & SRCHFS_START)
+ state->ss_union_layer = 0;
+ else
+ uap->options |= state->ss_union_flags;
+ state->ss_union_flags = 0;
+
+ /*
+ * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
+ * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
+ * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
+ * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
+ * validate the user-supplied data offset of the attrreference_t, we'll do it here.
+ */
+
+ if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
+ attrreference_t* string_ref;
+ u_int32_t* start_length;
+ user64_size_t param_length;
+
+ /* validate searchparams1 */
+ param_length = searchblock.sizeofsearchparams1;
+ /* skip the word that specifies length of the buffer */
+ start_length= (u_int32_t*) searchparams1;
+ start_length= start_length+1;
+ string_ref= (attrreference_t*) start_length;
+
+ /* ensure no negative offsets or too big offsets */
+ if (string_ref->attr_dataoffset < 0 ) {
+ error = EINVAL;
+ goto freeandexit;
+ }
+ if (string_ref->attr_length > MAXPATHLEN) {
+ error = EINVAL;
+ goto freeandexit;
+ }
+
+ /* Check for pointer overflow in the string ref */
+ if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
+ error = EINVAL;
+ goto freeandexit;
+ }
+
+ if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
+ error = EINVAL;
+ goto freeandexit;
+ }
+ if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
+ error = EINVAL;
+ goto freeandexit;
+ }
+ }
+
+ /* set up the uio structure which will contain the users return buffer */
+ auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
+
+ nameiflags = 0;
+ if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
+ NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+
+ error = namei(&nd);
+ if (error)
+ goto freeandexit;
+ vp = nd.ni_vp;
+ nameidone(&nd);
+
+ /*
+ * Switch to the root vnode for the volume
+ */
+ error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
+ if (error)
+ goto freeandexit;
+ vnode_put(vp);
+ vp = tvp;
+
+ /*
+ * If it's a union mount, the path lookup takes
+ * us to the top layer. But we may need to descend
+ * to a lower layer. For non-union mounts the layer
+ * is always zero.
+ */
+ for (i = 0; i < (int) state->ss_union_layer; i++) {
+ if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
+ break;
+ tvp = vp;
+ vp = vp->v_mount->mnt_vnodecovered;
+ if (vp == NULL) {
+ vp = tvp;
+ error = ENOENT;
+ goto freeandexit;
+ }
+ vnode_getwithref(vp);
+ vnode_put(tvp);
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
+ if (error) {
+ vnode_put(vp);
+ goto freeandexit;
+ }
+#endif
+
+
+ /*
+ * If searchblock.maxmatches == 0, then skip the search. This has happened
+ * before and sometimes the underlying code doesnt deal with it well.
+ */
+ if (searchblock.maxmatches == 0) {
+ nummatches = 0;
+ goto saveandexit;
+ }
+
+ /*
+ * Allright, we have everything we need, so lets make that call.
+ *
+ * We keep special track of the return value from the file system:
+ * EAGAIN is an acceptable error condition that shouldn't keep us
+ * from copying out any results...
+ */
+
+ fserror = VNOP_SEARCHFS(vp,
+ searchparams1,
+ searchparams2,
+ &searchblock.searchattrs,
+ (u_long)searchblock.maxmatches,
+ &timelimit,
+ returnattrs,
+ &nummatches,
+ (u_long)uap->scriptcode,
+ (u_long)uap->options,
+ auio,
+ (struct searchstate *) &state->ss_fsstate,
+ ctx);
+
+ /*
+ * If it's a union mount we need to be called again
+ * to search the mounted-on filesystem.
+ */
+ if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
+ state->ss_union_flags = SRCHFS_START;
+ state->ss_union_layer++; // search next layer down
+ fserror = EAGAIN;
+ }
+
+saveandexit:
+
+ vnode_put(vp);