X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/0b4e3aa066abc0728aacb4bbeb86f53f9737156e..593a1d5fd87cdf5b46dd5fcb84467b432cea0f91:/bsd/vfs/vfs_syscalls.c diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c index b5b5e1248..be9bfe17a 100644 --- a/bsd/vfs/vfs_syscalls.c +++ b/bsd/vfs/vfs_syscalls.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 1995-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1995-2008 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * Copyright (c) 1989, 1993 @@ -58,40 +64,142 @@ * * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95 */ +/* + * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ #include #include #include #include #include -#include +#include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include +#include #include #include #include #include +#include +#include +#include +#include +#include +#include +#include #include +#include #include +#include + +#include +#include + +#include +#include +#include + +#include + +#include + +#if CONFIG_MACF +#include +#include +#endif + +#if CONFIG_FSE +#define GET_PATH(x) \ + (x) = get_pathbuff(); +#define RELEASE_PATH(x) \ + release_pathbuff(x); +#else +#define GET_PATH(x) \ + MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK); +#define RELEASE_PATH(x) \ + FREE_ZONE((x), MAXPATHLEN, M_NAMEI); +#endif /* CONFIG_FSE */ + +/* struct for checkdirs iteration */ +struct cdirargs { + vnode_t olddp; + vnode_t newdp; +}; +/* callback for checkdirs iteration */ +static int checkdirs_callback(proc_t p, void * arg); + +static int change_dir(struct nameidata *ndp, vfs_context_t ctx); +static int checkdirs(vnode_t olddp, vfs_context_t ctx); +void enablequotas(struct mount *mp, vfs_context_t ctx); +static int getfsstat_callback(mount_t mp, void * arg); +static int getutimes(user_addr_t usrtvp, struct timespec *tsp); +static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag); +static int sync_callback(mount_t, void *); +static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, + user_addr_t bufp, int *sizep, boolean_t is_64_bit, + boolean_t partial_copy); +static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp); +int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t); + +__private_extern__ +int sync_internal(void); + +__private_extern__ +int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, register_t *); + +__private_extern__ +int unlink1(vfs_context_t, struct nameidata *, int); + + +#ifdef __APPLE_API_OBSOLETE +struct fstatv_args { + int fd; /* file descriptor of the target file */ + struct vstat *vsb; /* vstat structure for returned info */ +}; +struct lstatv_args { + const char *path; /* pathname of the target file */ + struct vstat *vsb; /* vstat structure for returned info */ +}; +struct mkcomplex_args { + const char *path; /* pathname of the file to be created */ + mode_t mode; /* access mode for the newly created file */ + u_long type; /* format of the complex file */ +}; +struct statv_args { + const char *path; /* pathname of the target file */ + struct vstat *vsb; /* vstat structure for returned info */ +}; + +int fstatv(proc_t p, struct fstatv_args *uap, register_t *retval); +int lstatv(proc_t p, struct lstatv_args *uap, register_t *retval); +int mkcomplex(proc_t p, struct mkcomplex_args *uap, register_t *retval); +int statv(proc_t p, struct statv_args *uap, register_t *retval); -struct lock__bsd__ exchangelock; +#endif /* __APPLE_API_OBSOLETE */ /* - * The currently logged-in user, for ownership of files/directories whose on-disk - * permissions are ignored: + * incremented each time a mount or unmount operation occurs + * used to invalidate the cached value of the rootvp in the + * mount structure utilized by cache_lookup_path */ -uid_t console_user; - -static int change_dir __P((struct nameidata *ndp, struct proc *p)); -static void checkdirs __P((struct vnode *olddp)); +int mount_generation = 0; /* counts number of mount and unmount operations */ unsigned int vfs_nummntops=0; +extern struct fileops vnops; +extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *); + + /* * Virtual File System System Calls */ @@ -99,182 +207,223 @@ unsigned int vfs_nummntops=0; /* * Mount a file system. */ -struct mount_args { - char *type; - char *path; - int flags; - caddr_t data; -}; /* ARGSUSED */ int -mount(p, uap, retval) - struct proc *p; - register struct mount_args *uap; - register_t *retval; +mount(proc_t p, struct mount_args *uap, __unused register_t *retval) +{ + struct __mac_mount_args muap; + + muap.type = uap->type; + muap.path = uap->path; + muap.flags = uap->flags; + muap.data = uap->data; + muap.mac_p = USER_ADDR_NULL; + return (__mac_mount(p, &muap, retval)); +} + +int +__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused register_t *retval) { struct vnode *vp; + struct vnode *devvp = NULLVP; + struct vnode *device_vnode = NULLVP; +#if CONFIG_MACF + struct vnode *rvp; +#endif struct mount *mp; - struct vfsconf *vfsp; - int error, flag; - struct vattr va; - u_long fstypenum; + struct vfstable *vfsp = (struct vfstable *)0; + int error, flag = 0; + struct vnode_attr va; + vfs_context_t ctx = vfs_context_current(); struct nameidata nd; + struct nameidata nd1; char fstypename[MFSNAMELEN]; size_t dummy=0; + user_addr_t devpath = USER_ADDR_NULL; + user_addr_t fsmountargs = uap->data; + int ronly = 0; + int mntalloc = 0; + mode_t accessmode; + boolean_t is_64bit; + boolean_t is_rwlock_locked = FALSE; + + AUDIT_ARG(fflags, uap->flags); + + is_64bit = proc_is64bit(p); + /* * Get vnode to be covered */ - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, - uap->path, p); - if (error = namei(&nd)) + NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = namei(&nd); + if (error) return (error); vp = nd.ni_vp; if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) uap->flags |= MNT_UPDATE; + + error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy); + if (error) + goto out1; if (uap->flags & MNT_UPDATE) { if ((vp->v_flag & VROOT) == 0) { - vput(vp); - return (EINVAL); + error = EINVAL; + goto out1; } mp = vp->v_mount; - flag = mp->mnt_flag; + + /* unmount in progress return error */ + mount_lock(mp); + if (mp->mnt_lflag & MNT_LUNMOUNT) { + mount_unlock(mp); + error = EBUSY; + goto out1; + } + mount_unlock(mp); + lck_rw_lock_exclusive(&mp->mnt_rwlock); + is_rwlock_locked = TRUE; /* * We only allow the filesystem to be reloaded if it * is currently mounted read-only. */ if ((uap->flags & MNT_RELOAD) && ((mp->mnt_flag & MNT_RDONLY) == 0)) { - vput(vp); - return (EOPNOTSUPP); /* Needs translation */ + error = ENOTSUP; + goto out1; } - mp->mnt_flag |= - uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); /* * Only root, or the user that did the original mount is * permitted to update it. */ - if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid && - (error = suser(p->p_ucred, &p->p_acflag))) { - vput(vp); - return (error); + if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) && + (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) { + goto out1; } +#if CONFIG_MACF + error = mac_mount_check_remount(ctx, mp); + if (error != 0) { + lck_rw_done(&mp->mnt_rwlock); + goto out1; + } +#endif /* - * Do not allow NFS export by non-root users. FOr non-root - * users, silently enforce MNT_NOSUID and MNT_NODEV, and - * MNT_NOEXEC if mount point is already MNT_NOEXEC. + * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, + * and MNT_NOEXEC if mount point is already MNT_NOEXEC. */ - if (p->p_ucred->cr_uid != 0) { - if (uap->flags & MNT_EXPORTED) { - vput(vp); - return (EPERM); - } + if (suser(vfs_context_ucred(ctx), NULL)) { uap->flags |= MNT_NOSUID | MNT_NODEV; - if (flag & MNT_NOEXEC) + if (mp->mnt_flag & MNT_NOEXEC) uap->flags |= MNT_NOEXEC; } - if (vfs_busy(mp, LK_NOWAIT, 0, p)) { - vput(vp); - return (EBUSY); - } - VOP_UNLOCK(vp, 0, p); + flag = mp->mnt_flag; + + mp->mnt_flag |= + uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); + + vfsp = mp->mnt_vtable; goto update; } /* * If the user is not root, ensure that they own the directory * onto which we are attempting to mount. */ - if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) || - (va.va_uid != p->p_ucred->cr_uid && - (error = suser(p->p_ucred, &p->p_acflag)))) { - vput(vp); - return (error); + VATTR_INIT(&va); + VATTR_WANTED(&va, va_uid); + if ((error = vnode_getattr(vp, &va, ctx)) || + (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) && + (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))) { + goto out1; } /* - * Do not allow NFS export by non-root users. FOr non-root - * users, silently enforce MNT_NOSUID and MNT_NODEV, and + * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and * MNT_NOEXEC if mount point is already MNT_NOEXEC. */ - if (p->p_ucred->cr_uid != 0) { - if (uap->flags & MNT_EXPORTED) { - vput(vp); - return (EPERM); - } + if (suser(vfs_context_ucred(ctx), NULL)) { uap->flags |= MNT_NOSUID | MNT_NODEV; if (vp->v_mount->mnt_flag & MNT_NOEXEC) uap->flags |= MNT_NOEXEC; } - if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) { - vput(vp); - return (error); - } + if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) ) + goto out1; + + if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) ) + goto out1; + if (vp->v_type != VDIR) { - vput(vp); - return (ENOTDIR); - } -#if COMPAT_43 - /* - * Historically filesystem types were identified by number. If we - * get an integer for the filesystem type instead of a string, we - * check to see if it matches one of the historic filesystem types. - */ - fstypenum = (u_long)uap->type; - if (fstypenum < maxvfsconf) { - for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) - if (vfsp->vfc_typenum == fstypenum) - break; - if (vfsp == NULL) { - vput(vp); - return (ENODEV); - } - strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN); - } else -#endif /* COMPAT_43 */ - if (error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy)) { - vput(vp); - return (error); + error = ENOTDIR; + goto out1; } + + /* XXXAUDIT: Should we capture the type on the error path as well? */ + AUDIT_ARG(text, fstypename); + mount_list_lock(); for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) - if (!strcmp(vfsp->vfc_name, fstypename)) + if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) break; + mount_list_unlock(); if (vfsp == NULL) { - vput(vp); - return (ENODEV); + error = ENODEV; + goto out1; } - simple_lock(&vp->v_interlock); +#if CONFIG_MACF + error = mac_mount_check_mount(ctx, vp, + &nd.ni_cnd, vfsp->vfc_name); + if (error != 0) + goto out1; +#endif if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) { - simple_unlock(&vp->v_interlock); - vput(vp); - return (EBUSY); + error = EBUSY; + goto out1; } + vnode_lock_spin(vp); SET(vp->v_flag, VMOUNT); - simple_unlock(&vp->v_interlock); + vnode_unlock(vp); /* * Allocate and initialize the filesystem. */ - mp = (struct mount *)_MALLOC_ZONE((u_long)sizeof(struct mount), + MALLOC_ZONE(mp, struct mount *, (u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); + mntalloc = 1; /* Initialize the default IO constraints */ mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; - - lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); - (void)vfs_busy(mp, LK_NOWAIT, 0, p); + mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt; + mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt; + mp->mnt_devblocksize = DEV_BSIZE; + mp->mnt_alignmentmask = PAGE_MASK; + mp->mnt_ioflags = 0; + mp->mnt_realrootvp = NULLVP; + mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL; + + TAILQ_INIT(&mp->mnt_vnodelist); + TAILQ_INIT(&mp->mnt_workerqueue); + TAILQ_INIT(&mp->mnt_newvnodes); + mount_lock_init(mp); + lck_rw_lock_exclusive(&mp->mnt_rwlock); + is_rwlock_locked = TRUE; mp->mnt_op = vfsp->vfc_vfsops; - mp->mnt_vfc = vfsp; + mp->mnt_vtable = vfsp; + mount_list_lock(); vfsp->vfc_refcount++; - mp->mnt_stat.f_type = vfsp->vfc_typenum; + mount_list_unlock(); + //mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; - strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); + strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); + strncpy(mp->mnt_vfsstat.f_mntonname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN); mp->mnt_vnodecovered = vp; - mp->mnt_stat.f_owner = p->p_ucred->cr_uid; - VOP_UNLOCK(vp, 0, p); + mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx)); + mp->mnt_devbsdunit = LOWPRI_MAX_NUM_DEV - 1; + /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */ + vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE); + update: /* * Set the mount level flags. @@ -285,16 +434,175 @@ update: mp->mnt_kern_flag |= MNTK_WANTRDWR; mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | - MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED); + MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED | + MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE); mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | - MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED); + MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED | + MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE); + +#if CONFIG_MACF + if (uap->flags & MNT_MULTILABEL) { + if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) { + error = EINVAL; + goto out1; + } + mp->mnt_flag |= MNT_MULTILABEL; + } +#endif + + if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) { + if (is_64bit) { + if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) ) + goto out1; + fsmountargs += sizeof(devpath); + } else { + char *tmp; + if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) ) + goto out1; + /* munge into LP64 addr */ + devpath = CAST_USER_ADDR_T(tmp); + fsmountargs += sizeof(tmp); + } + + /* if it is not update and device name needs to be parsed */ + if ((devpath)) { + NDINIT(&nd1, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx); + if ( (error = namei(&nd1)) ) + goto out1; + + strncpy(mp->mnt_vfsstat.f_mntfromname, nd1.ni_cnd.cn_pnbuf, MAXPATHLEN); + devvp = nd1.ni_vp; + + nameidone(&nd1); + + if (devvp->v_type != VBLK) { + error = ENOTBLK; + goto out2; + } + if (major(devvp->v_rdev) >= nblkdev) { + error = ENXIO; + goto out2; + } + /* + * If mount by non-root, then verify that user has necessary + * permissions on the device. + */ + if (suser(vfs_context_ucred(ctx), NULL) != 0) { + accessmode = KAUTH_VNODE_READ_DATA; + if ((mp->mnt_flag & MNT_RDONLY) == 0) + accessmode |= KAUTH_VNODE_WRITE_DATA; + if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0) + goto out2; + } + } + if (devpath && ((uap->flags & MNT_UPDATE) == 0)) { + if ( (error = vnode_ref(devvp)) ) + goto out2; + /* + * Disallow multiple mounts of the same device. + * Disallow mounting of a device that is currently in use + * (except for root, which might share swap device for miniroot). + * Flush out any old buffers remaining from a previous use. + */ + if ( (error = vfs_mountedon(devvp)) ) + goto out3; + + if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) { + error = EBUSY; + goto out3; + } + if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) { + error = ENOTBLK; + goto out3; + } + if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) ) + goto out3; + + ronly = (mp->mnt_flag & MNT_RDONLY) != 0; +#if CONFIG_MACF + error = mac_vnode_check_open(ctx, + devvp, + ronly ? FREAD : FREAD|FWRITE); + if (error) + goto out3; +#endif /* MAC */ + if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) ) + goto out3; + + mp->mnt_devvp = devvp; + device_vnode = devvp; + } else { + if ((mp->mnt_flag & MNT_RDONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { + /* + * If upgrade to read-write by non-root, then verify + * that user has necessary permissions on the device. + */ + device_vnode = mp->mnt_devvp; + if (device_vnode && suser(vfs_context_ucred(ctx), NULL)) { + if ((error = vnode_authorize(device_vnode, NULL, + KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) + goto out2; + } + } + device_vnode = NULLVP; + } + } +#if CONFIG_MACF + if ((uap->flags & MNT_UPDATE) == 0) { + mac_mount_label_init(mp); + mac_mount_label_associate(ctx, mp); + } + if (uap->mac_p != USER_ADDR_NULL) { + struct user_mac mac; + char *labelstr = NULL; + size_t ulen = 0; + + if ((uap->flags & MNT_UPDATE) != 0) { + error = mac_mount_check_label_update( + ctx, mp); + if (error != 0) + goto out3; + } + if (is_64bit) { + error = copyin(uap->mac_p, &mac, sizeof(mac)); + } else { + struct mac mac32; + error = copyin(uap->mac_p, &mac32, sizeof(mac32)); + mac.m_buflen = mac32.m_buflen; + mac.m_string = CAST_USER_ADDR_T(mac32.m_string); + } + if (error != 0) + goto out3; + if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) || + (mac.m_buflen < 2)) { + error = EINVAL; + goto out3; + } + MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK); + error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen); + if (error != 0) { + FREE(labelstr, M_MACTEMP); + goto out3; + } + AUDIT_ARG(mac_string, labelstr); + error = mac_mount_label_internalize(mp->mnt_mntlabel, labelstr); + FREE(labelstr, M_MACTEMP); + if (error != 0) + goto out3; + } +#endif + if (device_vnode != NULL) { + VNOP_IOCTL(device_vnode, DKIOCGETBSDUNIT, (caddr_t)&mp->mnt_devbsdunit, 0, NULL); + mp->mnt_devbsdunit %= LOWPRI_MAX_NUM_DEV; + } + /* * Mount the filesystem. */ - error = VFS_MOUNT(mp, uap->path, uap->data, &nd, p); - if (mp->mnt_flag & MNT_UPDATE) { - vrele(vp); + error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx); + + if (uap->flags & MNT_UPDATE) { if (mp->mnt_kern_flag & MNTK_WANTRDWR) mp->mnt_flag &= ~MNT_RDONLY; mp->mnt_flag &=~ @@ -302,78 +610,316 @@ update: mp->mnt_kern_flag &=~ MNTK_WANTRDWR; if (error) mp->mnt_flag = flag; - vfs_unbusy(mp, p); - return (error); + vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL); + lck_rw_done(&mp->mnt_rwlock); + is_rwlock_locked = FALSE; + if (!error) + enablequotas(mp, ctx); + goto out2; } /* * Put the new filesystem on the mount list after root. */ - cache_purge(vp); - if (!error) { - simple_lock(&vp->v_interlock); + if (error == 0) { + struct vfs_attr vfsattr; +#if CONFIG_MACF + if (vfs_flags(mp) & MNT_MULTILABEL) { + error = VFS_ROOT(mp, &rvp, ctx); + if (error) { + printf("%s() VFS_ROOT returned %d\n", __func__, error); + goto out3; + } + + /* VFS_ROOT provides reference so needref = 0 */ + error = vnode_label(mp, NULL, rvp, NULL, 0, ctx); + if (error) + goto out3; + } +#endif /* MAC */ + + vnode_lock_spin(vp); CLR(vp->v_flag, VMOUNT); - vp->v_mountedhere =mp; - simple_unlock(&vp->v_interlock); - simple_lock(&mountlist_slock); - CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - simple_unlock(&mountlist_slock); - checkdirs(vp); - VOP_UNLOCK(vp, 0, p); - vfs_unbusy(mp, p); - if (error = VFS_START(mp, 0, p)) - vrele(vp); + vp->v_mountedhere = mp; + vnode_unlock(vp); + + /* + * taking the name_cache_lock exclusively will + * insure that everyone is out of the fast path who + * might be trying to use a now stale copy of + * vp->v_mountedhere->mnt_realrootvp + * bumping mount_generation causes the cached values + * to be invalidated + */ + name_cache_lock(); + mount_generation++; + name_cache_unlock(); + + vnode_ref(vp); + error = checkdirs(vp, ctx); + if (error != 0) { + /* Unmount the filesystem as cdir/rdirs cannot be updated */ + goto out4; + } + /* + * there is no cleanup code here so I have made it void + * we need to revisit this + */ + (void)VFS_START(mp, 0, ctx); + + mount_list_add(mp); + lck_rw_done(&mp->mnt_rwlock); + is_rwlock_locked = FALSE; + + /* Check if this mounted file system supports EAs or named streams. */ + /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */ + VFSATTR_INIT(&vfsattr); + VFSATTR_WANTED(&vfsattr, f_capabilities); + if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 && + vfs_getattr(mp, &vfsattr, ctx) == 0 && + VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) { + if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) && + (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) { + mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS; + } +#if NAMEDSTREAMS + if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) && + (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) { + mp->mnt_kern_flag |= MNTK_NAMED_STREAMS; + } +#endif + /* Check if this file system supports path from id lookups. */ + if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) && + (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) { + mp->mnt_kern_flag |= MNTK_PATH_FROM_ID; + } else if (mp->mnt_flag & MNT_DOVOLFS) { + /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */ + mp->mnt_kern_flag |= MNTK_PATH_FROM_ID; + } + } + if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) { + mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS; + } + if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) { + mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT; + } /* increment the operations count */ - if (!error) - vfs_nummntops++; + OSAddAtomic(1, (SInt32 *)&vfs_nummntops); + enablequotas(mp, ctx); + + if (device_vnode) { + device_vnode->v_specflags |= SI_MOUNTEDON; + + /* + * cache the IO attributes for the underlying physical media... + * an error return indicates the underlying driver doesn't + * support all the queries necessary... however, reasonable + * defaults will have been set, so no reason to bail or care + */ + vfs_init_io_attributes(device_vnode, mp); + } + + /* Now that mount is setup, notify the listeners */ + vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL); } else { - simple_lock(&vp->v_interlock); + vnode_lock_spin(vp); CLR(vp->v_flag, VMOUNT); - simple_unlock(&vp->v_interlock); - mp->mnt_vfc->vfc_refcount--; - vfs_unbusy(mp, p); - _FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); - vput(vp); + vnode_unlock(vp); + mount_list_lock(); + mp->mnt_vtable->vfc_refcount--; + mount_list_unlock(); + + if (device_vnode ) { + VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx); + vnode_rele(device_vnode); + } + lck_rw_done(&mp->mnt_rwlock); + is_rwlock_locked = FALSE; + mount_lock_destroy(mp); +#if CONFIG_MACF + mac_mount_label_destroy(mp); +#endif + FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); } - return (error); + nameidone(&nd); + + /* + * drop I/O count on covered 'vp' and + * on the device vp if there was one + */ + if (devpath && devvp) + vnode_put(devvp); + vnode_put(vp); + + return(error); +out4: + (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx); + if (device_vnode != NULLVP) { + VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE, + ctx); + + } + vnode_lock_spin(vp); + vp->v_mountedhere = (mount_t) 0; + vnode_unlock(vp); + vnode_rele(vp); +out3: + if (devpath && ((uap->flags & MNT_UPDATE) == 0)) + vnode_rele(devvp); +out2: + if (devpath && devvp) + vnode_put(devvp); +out1: + /* Release mnt_rwlock only when it was taken */ + if (is_rwlock_locked == TRUE) { + lck_rw_done(&mp->mnt_rwlock); + } + if (mntalloc) { +#if CONFIG_MACF + mac_mount_label_destroy(mp); +#endif + mount_list_lock(); + vfsp->vfc_refcount--; + mount_list_unlock(); + FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); + } + vnode_put(vp); + nameidone(&nd); + + return(error); +} + +void +enablequotas(struct mount *mp, vfs_context_t ctx) +{ + struct nameidata qnd; + int type; + char qfpath[MAXPATHLEN]; + const char *qfname = QUOTAFILENAME; + const char *qfopsname = QUOTAOPSNAME; + const char *qfextension[] = INITQFNAMES; + + /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */ + if ((strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) + && (strncmp( mp->mnt_vfsstat.f_fstypename, "ufs", sizeof("ufs")) != 0)) + return; + + /* + * Enable filesystem disk quotas if necessary. + * We ignore errors as this should not interfere with final mount + */ + for (type=0; type < MAXQUOTAS; type++) { + snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]); + NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE32, CAST_USER_ADDR_T(qfpath), ctx); + if (namei(&qnd) != 0) + continue; /* option file to trigger quotas is not present */ + vnode_put(qnd.ni_vp); + nameidone(&qnd); + snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]); + + (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx); + } + return; +} + + +static int +checkdirs_callback(proc_t p, void * arg) +{ + struct cdirargs * cdrp = (struct cdirargs * )arg; + vnode_t olddp = cdrp->olddp; + vnode_t newdp = cdrp->newdp; + struct filedesc *fdp; + vnode_t tvp; + vnode_t fdp_cvp; + vnode_t fdp_rvp; + int cdir_changed = 0; + int rdir_changed = 0; + + /* + * XXX Also needs to iterate each thread in the process to see if it + * XXX is using a per-thread current working directory, and, if so, + * XXX update that as well. + */ + + proc_fdlock(p); + fdp = p->p_fd; + if (fdp == (struct filedesc *)0) { + proc_fdunlock(p); + return(PROC_RETURNED); + } + fdp_cvp = fdp->fd_cdir; + fdp_rvp = fdp->fd_rdir; + proc_fdunlock(p); + + if (fdp_cvp == olddp) { + vnode_ref(newdp); + tvp = fdp->fd_cdir; + fdp_cvp = newdp; + cdir_changed = 1; + vnode_rele(tvp); + } + if (fdp_rvp == olddp) { + vnode_ref(newdp); + tvp = fdp->fd_rdir; + fdp_rvp = newdp; + rdir_changed = 1; + vnode_rele(tvp); + } + if (cdir_changed || rdir_changed) { + proc_fdlock(p); + fdp->fd_cdir = fdp_cvp; + fdp->fd_rdir = fdp_rvp; + proc_fdunlock(p); + } + return(PROC_RETURNED); } + + /* * Scan all active processes to see if any of them have a current * or root directory onto which the new filesystem has just been * mounted. If so, replace them with the new mount point. */ -static void -checkdirs(olddp) - struct vnode *olddp; +static int +checkdirs(vnode_t olddp, vfs_context_t ctx) { - struct filedesc *fdp; - struct vnode *newdp; - struct proc *p; + vnode_t newdp; + vnode_t tvp; + int err; + struct cdirargs cdr; + struct uthread * uth = get_bsdthread_info(current_thread()); if (olddp->v_usecount == 1) - return; - if (VFS_ROOT(olddp->v_mountedhere, &newdp)) - panic("mount: lost mount"); - for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { - fdp = p->p_fd; - if (fdp->fd_cdir == olddp) { - vrele(fdp->fd_cdir); - VREF(newdp); - fdp->fd_cdir = newdp; - } - if (fdp->fd_rdir == olddp) { - vrele(fdp->fd_rdir); - VREF(newdp); - fdp->fd_rdir = newdp; - } + return(0); + if (uth != (struct uthread *)0) + uth->uu_notrigger = 1; + err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx); + if (uth != (struct uthread *)0) + uth->uu_notrigger = 0; + + if (err != 0) { +#if DIAGNOSTIC + panic("mount: lost mount: error %d", err); +#endif + return(err); } + + cdr.olddp = olddp; + cdr.newdp = newdp; + /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */ + proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL); + if (rootvnode == olddp) { - vrele(rootvnode); - VREF(newdp); + vnode_ref(newdp); + tvp = rootvnode; rootvnode = newdp; + vnode_rele(tvp); } - vput(newdp); + + vnode_put(newdp); + return(0); } /* @@ -382,111 +928,308 @@ checkdirs(olddp) * Note: unmount takes a path to the vnode mounted on as argument, * not special file (as before). */ -struct unmount_args { - char *path; - int flags; -}; /* ARGSUSED */ int -unmount(p, uap, retval) - struct proc *p; - register struct unmount_args *uap; - register_t *retval; +unmount(__unused proc_t p, struct unmount_args *uap, __unused register_t *retval) { - register struct vnode *vp; + vnode_t vp; struct mount *mp; int error; struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, - uap->path, p); - if (error = namei(&nd)) + NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = namei(&nd); + if (error) return (error); vp = nd.ni_vp; mp = vp->v_mount; + nameidone(&nd); - /* - * Only root, or the user that did the original mount is - * permitted to unmount this filesystem. - */ - if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && - (error = suser(p->p_ucred, &p->p_acflag))) { - vput(vp); +#if CONFIG_MACF + error = mac_mount_check_umount(ctx, mp); + if (error != 0) { + vnode_put(vp); return (error); } - - /* - * Don't allow unmounting the root file system. - */ - if (mp->mnt_flag & MNT_ROOTFS) { - vput(vp); - return (EBUSY); /* the root is always busy */ - } - +#endif /* * Must be the root of the filesystem */ if ((vp->v_flag & VROOT) == 0) { - vput(vp); + vnode_put(vp); return (EINVAL); } - vput(vp); - return (dounmount(mp, uap->flags, p)); + mount_ref(mp, 0); + vnode_put(vp); + /* safedounmount consumes the mount ref */ + return (safedounmount(mp, uap->flags, ctx)); +} + +int +vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx) +{ + mount_t mp; + + mp = mount_list_lookupby_fsid(fsid, 0, 1); + if (mp == (mount_t)0) { + return(ENOENT); + } + mount_ref(mp, 0); + mount_iterdrop(mp); + /* safedounmount consumes the mount ref */ + return(safedounmount(mp, flags, ctx)); } + /* - * Do the actual file system unmount. + * The mount struct comes with a mount ref which will be consumed. + * Do the actual file system unmount, prevent some common foot shooting. */ int -dounmount(mp, flags, p) - register struct mount *mp; - int flags; - struct proc *p; +safedounmount(struct mount *mp, int flags, vfs_context_t ctx) { - struct vnode *coveredvp; int error; + proc_t p = vfs_context_proc(ctx); - simple_lock(&mountlist_slock); - mp->mnt_kern_flag |= MNTK_UNMOUNT; - lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p); - mp->mnt_flag &=~ MNT_ASYNC; - ubc_umount(mp); /* release cached vnodes */ - cache_purgevfs(mp); /* remove cache entries for this file sys */ - if (((mp->mnt_flag & MNT_RDONLY) || - (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || - (flags & MNT_FORCE)) - error = VFS_UNMOUNT(mp, flags, p); - simple_lock(&mountlist_slock); - if (error) { - mp->mnt_kern_flag &= ~MNTK_UNMOUNT; - lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, - &mountlist_slock, p); + /* + * Only root, or the user that did the original mount is + * permitted to unmount this filesystem. + */ + if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) && + (error = suser(kauth_cred_get(), &p->p_acflag))) goto out; - } - /* increment the operations count */ + /* + * Don't allow unmounting the root file system. + */ + if (mp->mnt_flag & MNT_ROOTFS) { + error = EBUSY; /* the root is always busy */ + goto out; + } + + return (dounmount(mp, flags, 1, ctx)); + +out: + mount_drop(mp, 0); + return(error); +} + +/* + * Do the actual file system unmount. + */ +int +dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx) +{ + vnode_t coveredvp = (vnode_t)0; + int error; + int needwakeup = 0; + int forcedunmount = 0; + int lflags = 0; + struct vnode *devvp = NULLVP; + + if (flags & MNT_FORCE) + forcedunmount = 1; + mount_lock(mp); + /* XXX post jaguar fix LK_DRAIN - then clean this up */ + if ((flags & MNT_FORCE)) { + mp->mnt_kern_flag |= MNTK_FRCUNMOUNT; + mp->mnt_lflag |= MNT_LFORCE; + } + if (mp->mnt_lflag & MNT_LUNMOUNT) { + mp->mnt_lflag |= MNT_LWAIT; + if(withref != 0) + mount_drop(mp, 1); + msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL); + /* + * The prior unmount attempt has probably succeeded. + * Do not dereference mp here - returning EBUSY is safest. + */ + return (EBUSY); + } + mp->mnt_kern_flag |= MNTK_UNMOUNT; + mp->mnt_lflag |= MNT_LUNMOUNT; + mp->mnt_flag &=~ MNT_ASYNC; + /* + * anyone currently in the fast path that + * trips over the cached rootvp will be + * dumped out and forced into the slow path + * to regenerate a new cached value + */ + mp->mnt_realrootvp = NULLVP; + mount_unlock(mp); + + /* + * taking the name_cache_lock exclusively will + * insure that everyone is out of the fast path who + * might be trying to use a now stale copy of + * vp->v_mountedhere->mnt_realrootvp + * bumping mount_generation causes the cached values + * to be invalidated + */ + name_cache_lock(); + mount_generation++; + name_cache_unlock(); + + + lck_rw_lock_exclusive(&mp->mnt_rwlock); + if (withref != 0) + mount_drop(mp, 0); +#if CONFIG_FSE + fsevent_unmount(mp); /* has to come first! */ +#endif + error = 0; + if (forcedunmount == 0) { + ubc_umount(mp); /* release cached vnodes */ + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + error = VFS_SYNC(mp, MNT_WAIT, ctx); + if (error) { + mount_lock(mp); + mp->mnt_kern_flag &= ~MNTK_UNMOUNT; + mp->mnt_lflag &= ~MNT_LUNMOUNT; + mp->mnt_lflag &= ~MNT_LFORCE; + goto out; + } + } + } + + if (forcedunmount) + lflags |= FORCECLOSE; + error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags); + if ((forcedunmount == 0) && error) { + mount_lock(mp); + mp->mnt_kern_flag &= ~MNTK_UNMOUNT; + mp->mnt_lflag &= ~MNT_LUNMOUNT; + mp->mnt_lflag &= ~MNT_LFORCE; + goto out; + } + + /* make sure there are no one in the mount iterations or lookup */ + mount_iterdrain(mp); + + error = VFS_UNMOUNT(mp, flags, ctx); + if (error) { + mount_iterreset(mp); + mount_lock(mp); + mp->mnt_kern_flag &= ~MNTK_UNMOUNT; + mp->mnt_lflag &= ~MNT_LUNMOUNT; + mp->mnt_lflag &= ~MNT_LFORCE; + goto out; + } + + /* increment the operations count */ if (!error) - vfs_nummntops++; - CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); + OSAddAtomic(1, (SInt32 *)&vfs_nummntops); + + if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) { + /* hold an io reference and drop the usecount before close */ + devvp = mp->mnt_devvp; + vnode_clearmountedon(devvp); + vnode_getalways(devvp); + vnode_rele(devvp); + VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE, + ctx); + vnode_put(devvp); + } + lck_rw_done(&mp->mnt_rwlock); + mount_list_remove(mp); + lck_rw_lock_exclusive(&mp->mnt_rwlock); + + /* mark the mount point hook in the vp but not drop the ref yet */ if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { - coveredvp->v_mountedhere = (struct mount *)0; - simple_unlock(&mountlist_slock); - vrele(coveredvp); - simple_lock(&mountlist_slock); + vnode_getwithref(coveredvp); + vnode_lock_spin(coveredvp); + coveredvp->v_mountedhere = (struct mount *)0; + vnode_unlock(coveredvp); + vnode_put(coveredvp); } - mp->mnt_vfc->vfc_refcount--; - if (mp->mnt_vnodelist.lh_first != NULL) { - panic("unmount: dangling vnode"); + + mount_list_lock(); + mp->mnt_vtable->vfc_refcount--; + mount_list_unlock(); + + cache_purgevfs(mp); /* remove cache entries for this file sys */ + vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL); + mount_lock(mp); + mp->mnt_lflag |= MNT_LDEAD; + + if (mp->mnt_lflag & MNT_LWAIT) { + /* + * do the wakeup here + * in case we block in mount_refdrain + * which will drop the mount lock + * and allow anyone blocked in vfs_busy + * to wakeup and see the LDEAD state + */ + mp->mnt_lflag &= ~MNT_LWAIT; + wakeup((caddr_t)mp); } - lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p); + mount_refdrain(mp); out: - if (mp->mnt_kern_flag & MNTK_MWAIT) + if (mp->mnt_lflag & MNT_LWAIT) { + mp->mnt_lflag &= ~MNT_LWAIT; + needwakeup = 1; + } + mount_unlock(mp); + lck_rw_done(&mp->mnt_rwlock); + + if (needwakeup) wakeup((caddr_t)mp); - if (!error) - _FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); + if (!error) { + if ((coveredvp != NULLVP)) { + vnode_getwithref(coveredvp); + vnode_rele(coveredvp); + vnode_lock_spin(coveredvp); + if(mp->mnt_crossref == 0) { + vnode_unlock(coveredvp); + mount_lock_destroy(mp); +#if CONFIG_MACF + mac_mount_label_destroy(mp); +#endif + FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); + } else { + coveredvp->v_lflag |= VL_MOUNTDEAD; + vnode_unlock(coveredvp); + } + vnode_put(coveredvp); + } else if (mp->mnt_flag & MNT_ROOTFS) { + mount_lock_destroy(mp); +#if CONFIG_MACF + mac_mount_label_destroy(mp); +#endif + FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); + } else + panic("dounmount: no coveredvp"); + } return (error); } +void +mount_dropcrossref(mount_t mp, vnode_t dp, int need_put) +{ + vnode_lock(dp); + mp->mnt_crossref--; + if (mp->mnt_crossref < 0) + panic("mount cross refs -ve"); + if (((dp->v_lflag & VL_MOUNTDEAD) == VL_MOUNTDEAD) && (mp->mnt_crossref == 0)) { + dp->v_lflag &= ~VL_MOUNTDEAD; + if (need_put) + vnode_put_locked(dp); + vnode_unlock(dp); + mount_lock_destroy(mp); +#if CONFIG_MACF + mac_mount_label_destroy(mp); +#endif + FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); + return; + } + if (need_put) + vnode_put_locked(dp); + vnode_unlock(dp); +} + + /* * Sync each mounted filesystem. */ @@ -495,44 +1238,34 @@ int syncprt = 0; struct ctldebug debug0 = { "syncprt", &syncprt }; #endif -struct sync_args { - int dummy; -}; int print_vmpage_stat=0; -/* ARGSUSED */ -int -sync(p, uap, retval) - struct proc *p; - struct sync_args *uap; - register_t *retval; +static int +sync_callback(mount_t mp, __unused void * arg) { - register struct mount *mp, *nmp; int asyncflag; - simple_lock(&mountlist_slock); - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { - nmp = mp->mnt_list.cqe_next; - continue; - } - if ((mp->mnt_flag & MNT_RDONLY) == 0) { + if ((mp->mnt_flag & MNT_RDONLY) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; - VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p); + VFS_SYNC(mp, MNT_NOWAIT, vfs_context_current()); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; - } - simple_lock(&mountlist_slock); - nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, p); } - simple_unlock(&mountlist_slock); + return(VFS_RETURNED); +} + +extern unsigned int vp_pagein, vp_pgodirty, vp_pgoclean; +extern unsigned int dp_pgins, dp_pgouts; + +/* ARGSUSED */ +int +sync(__unused proc_t p, __unused struct sync_args *uap, __unused register_t *retval) +{ + + vfs_iterate(LK_NOWAIT, sync_callback, (void *)0); { - extern void vm_countdirtypages(void); - extern unsigned int vp_pagein, vp_pgodirty, vp_pgoclean; - extern unsigned int dp_pgins, dp_pgouts; if(print_vmpage_stat) { vm_countdirtypages(); printf("VP: %d: %d: %d: %d: %d\n", vp_pgodirty, vp_pgoclean, vp_pagein, @@ -549,1675 +1282,3376 @@ sync(p, uap, retval) /* * Change filesystem quotas. */ -struct quotactl_args { - char *path; - int cmd; - int uid; - caddr_t arg; -}; -/* ARGSUSED */ +#if QUOTA +static int quotactl_funneled(proc_t p, struct quotactl_args *uap, register_t *retval); + int -quotactl(p, uap, retval) - struct proc *p; - register struct quotactl_args *uap; - register_t *retval; +quotactl(proc_t p, struct quotactl_args *uap, register_t *retval) { - register struct mount *mp; + boolean_t funnel_state; int error; - struct nameidata nd; - - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - if (error = namei(&nd)) - return (error); - mp = nd.ni_vp->v_mount; - vrele(nd.ni_vp); - return (VFS_QUOTACTL(mp, uap->cmd, uap->uid, - uap->arg, p)); + + funnel_state = thread_funnel_set(kernel_flock, TRUE); + error = quotactl_funneled(p, uap, retval); + thread_funnel_set(kernel_flock, funnel_state); + return(error); } -/* - * Get filesystem statistics. - */ -struct statfs_args { - char *path; - struct statfs *buf; -}; -/* ARGSUSED */ -int -statfs(p, uap, retval) - struct proc *p; - register struct statfs_args *uap; - register_t *retval; +static int +quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused register_t *retval) { - register struct mount *mp; - register struct statfs *sp; - int error; + struct mount *mp; + int error, quota_cmd, quota_status; + caddr_t datap; + size_t fnamelen; struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); + struct dqblk my_dqblk; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - if (error = namei(&nd)) + AUDIT_ARG(uid, uap->uid, 0, 0, 0); + AUDIT_ARG(cmd, uap->cmd); + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = namei(&nd); + if (error) return (error); mp = nd.ni_vp->v_mount; - sp = &mp->mnt_stat; - vrele(nd.ni_vp); - if (error = VFS_STATFS(mp, sp, p)) - return (error); - sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - return (copyout((caddr_t)sp, (caddr_t)uap->buf, - sizeof(*sp)-sizeof(sp->f_reserved3)-sizeof(sp->f_reserved4))); + vnode_put(nd.ni_vp); + nameidone(&nd); + + /* copyin any data we will need for downstream code */ + quota_cmd = uap->cmd >> SUBCMDSHIFT; + + switch (quota_cmd) { + case Q_QUOTAON: + /* uap->arg specifies a file from which to take the quotas */ + fnamelen = MAXPATHLEN; + datap = kalloc(MAXPATHLEN); + error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen); + break; + case Q_GETQUOTA: + /* uap->arg is a pointer to a dqblk structure. */ + datap = (caddr_t) &my_dqblk; + break; + case Q_SETQUOTA: + case Q_SETUSE: + /* uap->arg is a pointer to a dqblk structure. */ + datap = (caddr_t) &my_dqblk; + if (proc_is64bit(p)) { + struct user_dqblk my_dqblk64; + error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64)); + if (error == 0) { + munge_dqblk(&my_dqblk, &my_dqblk64, FALSE); + } + } + else { + error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk)); + } + break; + case Q_QUOTASTAT: + /* uap->arg is a pointer to an integer */ + datap = (caddr_t) "a_status; + break; + default: + datap = NULL; + break; + } /* switch */ + + if (error == 0) { + error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx); + } + + switch (quota_cmd) { + case Q_QUOTAON: + if (datap != NULL) + kfree(datap, MAXPATHLEN); + break; + case Q_GETQUOTA: + /* uap->arg is a pointer to a dqblk structure we need to copy out to */ + if (error == 0) { + if (proc_is64bit(p)) { + struct user_dqblk my_dqblk64; + munge_dqblk(&my_dqblk, &my_dqblk64, TRUE); + error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64)); + } + else { + error = copyout(datap, uap->arg, sizeof (struct dqblk)); + } + } + break; + case Q_QUOTASTAT: + /* uap->arg is a pointer to an integer */ + if (error == 0) { + error = copyout(datap, uap->arg, sizeof(quota_status)); + } + break; + default: + break; + } /* switch */ + + return (error); } +#else +int +quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused register_t *retval) +{ + return (EOPNOTSUPP); +} +#endif /* QUOTA */ /* * Get filesystem statistics. + * + * Returns: 0 Success + * namei:??? + * vfs_update_vfsstat:??? + * munge_statfs:EFAULT */ -struct fstatfs_args { - int fd; - struct statfs *buf; -}; /* ARGSUSED */ int -fstatfs(p, uap, retval) - struct proc *p; - register struct fstatfs_args *uap; - register_t *retval; +statfs(__unused proc_t p, struct statfs_args *uap, __unused register_t *retval) { - struct file *fp; struct mount *mp; - register struct statfs *sp; + struct vfsstatfs *sp; int error; + struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); + vnode_t vp; - if (error = getvnode(p, uap->fd, &fp)) - return (error); - mp = ((struct vnode *)fp->f_data)->v_mount; - if (!mp) - return (EBADF); - sp = &mp->mnt_stat; - if (error = VFS_STATFS(mp, sp, p)) + NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = namei(&nd); + if (error) return (error); - sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - return (copyout((caddr_t)sp, (caddr_t)uap->buf, - sizeof(*sp)-sizeof(sp->f_reserved3)-sizeof(sp->f_reserved4))); -} - -/* - * Get statistics on all filesystems. - */ -struct getfsstat_args { - struct statfs *buf; - long bufsize; - int flags; -}; -int -getfsstat(p, uap, retval) - struct proc *p; - register struct getfsstat_args *uap; - register_t *retval; -{ - register struct mount *mp, *nmp; - register struct statfs *sp; - caddr_t sfsp; - long count, maxcount, error; + vp = nd.ni_vp; + mp = vp->v_mount; + sp = &mp->mnt_vfsstat; + nameidone(&nd); - maxcount = uap->bufsize / sizeof(struct statfs); - sfsp = (caddr_t)uap->buf; - count = 0; - simple_lock(&mountlist_slock); - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { - nmp = mp->mnt_list.cqe_next; - continue; - } - if (sfsp && count < maxcount) { - sp = &mp->mnt_stat; - /* - * If MNT_NOWAIT is specified, do not refresh the - * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. - */ - if (((uap->flags & MNT_NOWAIT) == 0 || - (uap->flags & MNT_WAIT)) && - (error = VFS_STATFS(mp, sp, p))) { - simple_lock(&mountlist_slock); - nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, p); - continue; - } - sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - if (error = copyout((caddr_t)sp, sfsp, sizeof(*sp))) - return (error); - sfsp += sizeof(*sp); - } - count++; - simple_lock(&mountlist_slock); - nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, p); - } - simple_unlock(&mountlist_slock); - if (sfsp && count > maxcount) - *retval = maxcount; - else - *retval = count; - return (0); -} + error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT); + vnode_put(vp); + if (error != 0) + return (error); -#if COMPAT_GETFSSTAT -ogetfsstat(p, uap, retval) - struct proc *p; - register struct getfsstat_args *uap; - register_t *retval; -{ - register struct mount *mp, *nmp; - register struct statfs *sp; - caddr_t sfsp; - long count, maxcount, error; - - maxcount = uap->bufsize / (sizeof(struct statfs) - sizeof(sp->f_reserved4)); - sfsp = (caddr_t)uap->buf; - count = 0; - simple_lock(&mountlist_slock); - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { - nmp = mp->mnt_list.cqe_next; - continue; - } - if (sfsp && count < maxcount) { - sp = &mp->mnt_stat; - /* - * If MNT_NOWAIT is specified, do not refresh the - * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. - */ - if (((uap->flags & MNT_NOWAIT) == 0 || - (uap->flags & MNT_WAIT)) && - (error = VFS_STATFS(mp, sp, p))) { - simple_lock(&mountlist_slock); - nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, p); - continue; - } - sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - error = copyout((caddr_t)sp, sfsp, - sizeof(*sp) - sizeof(sp->f_reserved3) - sizeof(sp->f_reserved4)); - if (error) - return (error); - sfsp += sizeof(*sp) - sizeof(sp->f_reserved4); - } - count++; - simple_lock(&mountlist_slock); - nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, p); - } - simple_unlock(&mountlist_slock); - if (sfsp && count > maxcount) - *retval = maxcount; - else - *retval = count; - return (0); + error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE); + return (error); } -#endif /* - * Change current working directory to a given file descriptor. + * Get filesystem statistics. */ -struct fchdir_args { - int fd; -}; /* ARGSUSED */ int -fchdir(p, uap, retval) - struct proc *p; - struct fchdir_args *uap; - register_t *retval; +fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused register_t *retval) { - register struct filedesc *fdp = p->p_fd; - struct vnode *vp, *tdp; + vnode_t vp; struct mount *mp; - struct file *fp; + struct vfsstatfs *sp; int error; - if (error = getvnode(p, uap->fd, &fp)) + AUDIT_ARG(fd, uap->fd); + + if ( (error = file_vnode(uap->fd, &vp)) ) return (error); - vp = (struct vnode *)fp->f_data; - VREF(vp); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (vp->v_type != VDIR) - error = ENOTDIR; - else - error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); - while (!error && (mp = vp->v_mountedhere) != NULL) { - if (vfs_busy(mp, 0, 0, p)) - continue; - error = VFS_ROOT(mp, &tdp); - vfs_unbusy(mp, p); - if (error) - break; - vput(vp); - vp = tdp; + + AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); + + mp = vp->v_mount; + if (!mp) { + file_drop(uap->fd); + return (EBADF); } - if (error) { - vput(vp); + sp = &mp->mnt_vfsstat; + if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) { + file_drop(uap->fd); return (error); } - VOP_UNLOCK(vp, 0, p); - vrele(fdp->fd_cdir); - fdp->fd_cdir = vp; - return (0); + file_drop(uap->fd); + + error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE); + + return (error); } -/* - * Change current working directory (``.''). +/* + * Common routine to handle copying of statfs64 data to user space */ -struct chdir_args { - char *path; -}; -/* ARGSUSED */ -int -chdir(p, uap, retval) - struct proc *p; - struct chdir_args *uap; - register_t *retval; +static int +statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp) { - register struct filedesc *fdp = p->p_fd; int error; - struct nameidata nd; + struct statfs64 sfs; + + bzero(&sfs, sizeof(sfs)); + + sfs.f_bsize = sfsp->f_bsize; + sfs.f_iosize = (int32_t)sfsp->f_iosize; + sfs.f_blocks = sfsp->f_blocks; + sfs.f_bfree = sfsp->f_bfree; + sfs.f_bavail = sfsp->f_bavail; + sfs.f_files = sfsp->f_files; + sfs.f_ffree = sfsp->f_ffree; + sfs.f_fsid = sfsp->f_fsid; + sfs.f_owner = sfsp->f_owner; + sfs.f_type = mp->mnt_vtable->vfc_typenum; + sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + sfs.f_fssubtype = sfsp->f_fssubtype; + strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN); + strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN); + strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN); + + error = copyout((caddr_t)&sfs, bufp, sizeof(sfs)); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, - uap->path, p); - if (error = change_dir(&nd, p)) - return (error); - vrele(fdp->fd_cdir); - fdp->fd_cdir = nd.ni_vp; - return (0); + return(error); } -/* - * Change notion of root (``/'') directory. +/* + * Get file system statistics in 64-bit mode */ -struct chroot_args { - char *path; -}; -/* ARGSUSED */ int -chroot(p, uap, retval) - struct proc *p; - struct chroot_args *uap; - register_t *retval; +statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused register_t *retval) { - register struct filedesc *fdp = p->p_fd; + struct mount *mp; + struct vfsstatfs *sp; int error; struct nameidata nd; + vfs_context_t ctxp = vfs_context_current(); + vnode_t vp; - if (error = suser(p->p_ucred, &p->p_acflag)) + NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctxp); + error = namei(&nd); + if (error) return (error); + vp = nd.ni_vp; + mp = vp->v_mount; + sp = &mp->mnt_vfsstat; + nameidone(&nd); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, - uap->path, p); - if (error = change_dir(&nd, p)) + error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT); + vnode_put(vp); + if (error != 0) return (error); - if(error = clone_system_shared_regions()) { - vrele(nd.ni_vp); - return (error); - } + error = statfs64_common(mp, sp, uap->buf); - if (fdp->fd_rdir != NULL) - vrele(fdp->fd_rdir); - fdp->fd_rdir = nd.ni_vp; - return (0); + return (error); } -/* - * Common routine for chroot and chdir. +/* + * Get file system statistics in 64-bit mode */ -static int -change_dir(ndp, p) - register struct nameidata *ndp; - struct proc *p; +int +fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused register_t *retval) { struct vnode *vp; + struct mount *mp; + struct vfsstatfs *sp; int error; - if (error = namei(ndp)) + AUDIT_ARG(fd, uap->fd); + + if ( (error = file_vnode(uap->fd, &vp)) ) return (error); - vp = ndp->ni_vp; - if (vp->v_type != VDIR) - error = ENOTDIR; - else - error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); - if (error) - vput(vp); - else - VOP_UNLOCK(vp, 0, p); + + AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); + + mp = vp->v_mount; + if (!mp) { + file_drop(uap->fd); + return (EBADF); + } + sp = &mp->mnt_vfsstat; + if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) { + file_drop(uap->fd); + return (error); + } + file_drop(uap->fd); + + error = statfs64_common(mp, sp, uap->buf); + return (error); } -/* - * Check permissions, allocate an open file structure, - * and call the device open routine if any. - */ -struct open_args { - char *path; - int flags; - int mode; +struct getfsstat_struct { + user_addr_t sfsp; + user_addr_t *mp; + int count; + int maxcount; + int flags; + int error; }; -int -open(p, uap, retval) - struct proc *p; - register struct open_args *uap; - register_t *retval; -{ - register struct filedesc *fdp = p->p_fd; - register struct file *fp; - register struct vnode *vp; - int flags, cmode, oflags; - struct file *nfp; - int type, indx, error; - struct flock lf; - struct nameidata nd; - extern struct fileops vnops; - oflags = uap->flags; - if ((oflags & O_ACCMODE) == O_ACCMODE) - return(EINVAL); - flags = FFLAGS(uap->flags); - if (error = falloc(p, &nfp, &indx)) - return (error); - fp = nfp; - cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - p->p_dupfd = -indx - 1; /* XXX check for fdopen */ - if (error = vn_open(&nd, flags, cmode)) { - ffree(fp); - if ((error == ENODEV || error == ENXIO) && - p->p_dupfd >= 0 && /* XXX from fdopen */ - (error = - dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { - *retval = indx; - return (0); + +static int +getfsstat_callback(mount_t mp, void * arg) +{ + + struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg; + struct vfsstatfs *sp; + int error, my_size; + vfs_context_t ctx = vfs_context_current(); + + if (fstp->sfsp && fstp->count < fstp->maxcount) { + sp = &mp->mnt_vfsstat; + /* + * If MNT_NOWAIT is specified, do not refresh the + * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. + */ + if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & MNT_WAIT)) && + (error = vfs_update_vfsstat(mp, ctx, + VFS_USER_EVENT))) { + KAUTH_DEBUG("vfs_update_vfsstat returned %d", error); + return(VFS_RETURNED); } - if (error == ERESTART) - error = EINTR; - fdrelse(p, indx); - return (error); - } - p->p_dupfd = 0; - vp = nd.ni_vp; - fp->f_flag = flags & FMASK; - fp->f_type = DTYPE_VNODE; - fp->f_ops = &vnops; - fp->f_data = (caddr_t)vp; - if (flags & (O_EXLOCK | O_SHLOCK)) { - lf.l_whence = SEEK_SET; - lf.l_start = 0; - lf.l_len = 0; - if (flags & O_EXLOCK) - lf.l_type = F_WRLCK; - else - lf.l_type = F_RDLCK; - type = F_FLOCK; - if ((flags & FNONBLOCK) == 0) - type |= F_WAIT; - VOP_UNLOCK(vp, 0, p); - if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) { - (void) vn_close(vp, fp->f_flag, fp->f_cred, p); - ffree(fp); - fdrelse(p, indx); - return (error); + + /* + * Need to handle LP64 version of struct statfs + */ + error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE); + if (error) { + fstp->error = error; + return(VFS_RETURNED_DONE); + } + fstp->sfsp += my_size; + + if (fstp->mp) { + error = mac_mount_label_get(mp, *fstp->mp); + if (error) { + fstp->error = error; + return(VFS_RETURNED_DONE); + } + fstp->mp++; } - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - fp->f_flag |= FHASLOCK; } - VOP_UNLOCK(vp, 0, p); - *fdflags(p, indx) &= ~UF_RESERVED; - *retval = indx; - return (0); + fstp->count++; + return(VFS_RETURNED); } -#if COMPAT_43 /* - * Create a file. + * Get statistics on all filesystems. */ -struct ocreat_args { - char *path; - int mode; -}; int -ocreat(p, uap, retval) - struct proc *p; - register struct ocreat_args *uap; - register_t *retval; +getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval) { - struct open_args nuap; + struct __mac_getfsstat_args muap; - nuap.path = uap->path; - nuap.mode = uap->mode; - nuap.flags = O_WRONLY | O_CREAT | O_TRUNC; - return (open(p, &nuap, retval)); + muap.buf = uap->buf; + muap.bufsize = uap->bufsize; + muap.mac = USER_ADDR_NULL; + muap.macsize = 0; + muap.flags = uap->flags; + + return (__mac_getfsstat(p, &muap, retval)); } -#endif /* COMPAT_43 */ -/* - * Create a special file. - */ -struct mknod_args { - char *path; - int mode; - int dev; -}; -/* ARGSUSED */ int -mknod(p, uap, retval) - struct proc *p; - register struct mknod_args *uap; - register_t *retval; +__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval) { - register struct vnode *vp; - struct vattr vattr; - int error; - int whiteout; - struct nameidata nd; + user_addr_t sfsp; + user_addr_t *mp; + int count, maxcount; + struct getfsstat_struct fst; - if (error = suser(p->p_ucred, &p->p_acflag)) - return (error); - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p); - if (error = namei(&nd)) - return (error); - vp = nd.ni_vp; - if (vp != NULL) - error = EEXIST; + if (IS_64BIT_PROCESS(p)) { + maxcount = uap->bufsize / sizeof(struct user_statfs); + } else { - VATTR_NULL(&vattr); - vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask; - vattr.va_rdev = uap->dev; - whiteout = 0; - - switch (uap->mode & S_IFMT) { - case S_IFMT: /* used by badsect to flag bad sectors */ - vattr.va_type = VBAD; - break; - case S_IFCHR: - vattr.va_type = VCHR; - break; - case S_IFBLK: - vattr.va_type = VBLK; - break; - case S_IFWHT: - whiteout = 1; - break; - default: - error = EINVAL; - break; - } + maxcount = uap->bufsize / sizeof(struct statfs); } - if (!error) { - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - if (whiteout) { - error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); - if (error) - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); - } else { - error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, - &nd.ni_cnd, &vattr); + sfsp = uap->buf; + count = 0; + + mp = NULL; + +#if CONFIG_MACF + if (uap->mac != USER_ADDR_NULL) { + u_int32_t *mp0; + int error; + int i; + + count = (int)(uap->macsize / (IS_64BIT_PROCESS(p) ? 8 : 4)); + if (count != maxcount) + return (EINVAL); + + /* Copy in the array */ + MALLOC(mp0, u_int32_t *, uap->macsize, M_MACTEMP, M_WAITOK); + error = copyin(uap->mac, mp0, uap->macsize); + if (error) + return (error); + + /* Normalize to an array of user_addr_t */ + MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK); + for (i = 0; i < count; i++) { + if (IS_64BIT_PROCESS(p)) + mp[i] = ((user_addr_t *)mp0)[i]; + else + mp[i] = (user_addr_t)mp0[i]; } - } else { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (vp) - vrele(vp); + FREE(mp0, M_MACTEMP); } - return (error); +#endif + + + fst.sfsp = sfsp; + fst.mp = mp; + fst.flags = uap->flags; + fst.count = 0; + fst.error = 0; + fst.maxcount = maxcount; + + + vfs_iterate(0, getfsstat_callback, &fst); + + if (mp) + FREE(mp, M_MACTEMP); + + if (fst.error ) { + KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error); + return(fst.error); + } + + if (fst.sfsp && fst.count > fst.maxcount) + *retval = fst.maxcount; + else + *retval = fst.count; + return (0); } -/* - * Create a named pipe. - */ -struct mkfifo_args { - char *path; - int mode; -}; -/* ARGSUSED */ -int -mkfifo(p, uap, retval) - struct proc *p; - register struct mkfifo_args *uap; - register_t *retval; +static int +getfsstat64_callback(mount_t mp, void * arg) { - struct vattr vattr; + struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg; + struct vfsstatfs *sp; int error; - struct nameidata nd; -#if !FIFO - return (EOPNOTSUPP); -#else - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p); - if (error = namei(&nd)) - return (error); - if (nd.ni_vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(nd.ni_vp); - return (EEXIST); + if (fstp->sfsp && fstp->count < fstp->maxcount) { + sp = &mp->mnt_vfsstat; + /* + * If MNT_NOWAIT is specified, do not refresh the + * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. + */ + if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & MNT_WAIT)) && + (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) { + KAUTH_DEBUG("vfs_update_vfsstat returned %d", error); + return(VFS_RETURNED); + } + + error = statfs64_common(mp, sp, fstp->sfsp); + if (error) { + fstp->error = error; + return(VFS_RETURNED_DONE); + } + fstp->sfsp += sizeof(struct statfs64); } - VATTR_NULL(&vattr); - vattr.va_type = VFIFO; - vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask; - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr)); -#endif /* FIFO */ + fstp->count++; + return(VFS_RETURNED); } /* - * Make a hard file link. + * Get statistics on all file systems in 64 bit mode. */ -struct link_args { - char *path; - char *link; -}; -/* ARGSUSED */ int -link(p, uap, retval) - struct proc *p; - register struct link_args *uap; - register_t *retval; +getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval) { - register struct vnode *vp; - struct nameidata nd; - int error; + user_addr_t sfsp; + int count, maxcount; + struct getfsstat_struct fst; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - if (error = namei(&nd)) - return (error); - vp = nd.ni_vp; - if (vp->v_type == VDIR) - error = EPERM; /* POSIX */ - else { - nd.ni_cnd.cn_nameiop = CREATE; - nd.ni_cnd.cn_flags = LOCKPARENT; - nd.ni_dirp = uap->link; - if ((error = namei(&nd)) == 0) { - if (nd.ni_vp != NULL) - error = EEXIST; - if (!error) { - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, - LEASE_WRITE); - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - error = VOP_LINK(vp, nd.ni_dvp, &nd.ni_cnd); - } else { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp) - vrele(nd.ni_vp); - } - } + maxcount = uap->bufsize / sizeof(struct statfs64); + + sfsp = uap->buf; + count = 0; + + fst.sfsp = sfsp; + fst.flags = uap->flags; + fst.count = 0; + fst.error = 0; + fst.maxcount = maxcount; + + vfs_iterate(0, getfsstat64_callback, &fst); + + if (fst.error ) { + KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error); + return(fst.error); } - vrele(vp); - return (error); + + if (fst.sfsp && fst.count > fst.maxcount) + *retval = fst.maxcount; + else + *retval = fst.count; + + return (0); +} + +#if COMPAT_GETFSSTAT +ogetfsstat(proc_t p, struct getfsstat_args *uap, register_t *retval) +{ + return (ENOTSUP); } +#endif /* - * Make a symbolic link. + * Change current working directory to a given file descriptor. */ -struct symlink_args { - char *path; - char *link; -}; /* ARGSUSED */ -int -symlink(p, uap, retval) - struct proc *p; - register struct symlink_args *uap; - register_t *retval; +static int +common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread) { - struct vattr vattr; - char *path; + struct filedesc *fdp = p->p_fd; + vnode_t vp; + vnode_t tdp; + vnode_t tvp; + struct mount *mp; int error; - struct nameidata nd; - size_t dummy=0; - MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - if (error = copyinstr(uap->path, path, MAXPATHLEN, &dummy)) + vfs_context_t ctx = vfs_context_current(); + + if (per_thread && uap->fd == -1) { + /* + * Switching back from per-thread to per process CWD; verify we + * in fact have one before proceeding. The only success case + * for this code path is to return 0 preemptively after zapping + * the thread structure contents. + */ + thread_t th = vfs_context_thread(ctx); + if (th) { + uthread_t uth = get_bsdthread_info(th); + tvp = uth->uu_cdir; + uth->uu_cdir = NULLVP; + if (tvp != NULLVP) { + vnode_rele(tvp); + return (0); + } + } + return (EBADF); + } + + if ( (error = file_vnode(uap->fd, &vp)) ) + return(error); + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } + + AUDIT_ARG(vnpath, vp, ARG_VNODE1); + + if (vp->v_type != VDIR) { + error = ENOTDIR; goto out; - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->link, p); - if (error = namei(&nd)) + } + +#if CONFIG_MACF + error = mac_vnode_check_chdir(ctx, vp); + if (error) goto out; - if (nd.ni_vp) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(nd.ni_vp); - error = EEXIST; +#endif + error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx); + if (error) + goto out; + + while (!error && (mp = vp->v_mountedhere) != NULL) { + if (vfs_busy(mp, LK_NOWAIT)) { + error = EACCES; + goto out; + } + error = VFS_ROOT(mp, &tdp, ctx); + vfs_unbusy(mp); + if (error) + break; + vnode_put(vp); + vp = tdp; + } + if (error) goto out; + if ( (error = vnode_ref(vp)) ) + goto out; + vnode_put(vp); + + if (per_thread) { + thread_t th = vfs_context_thread(ctx); + if (th) { + uthread_t uth = get_bsdthread_info(th); + tvp = uth->uu_cdir; + uth->uu_cdir = vp; + OSBitOrAtomic(P_THCWD, (UInt32 *)&p->p_flag); + } else { + vnode_rele(vp); + return (ENOENT); + } + } else { + proc_fdlock(p); + tvp = fdp->fd_cdir; + fdp->fd_cdir = vp; + proc_fdunlock(p); } - VATTR_NULL(&vattr); - vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); + + if (tvp) + vnode_rele(tvp); + file_drop(uap->fd); + + return (0); out: - FREE_ZONE(path, MAXPATHLEN, M_NAMEI); - return (error); + vnode_put(vp); + file_drop(uap->fd); + + return(error); } -/* - * Delete a whiteout from the filesystem. - */ -struct undelete_args { - char *path; -}; -/* ARGSUSED */ int -undelete(p, uap, retval) - struct proc *p; - register struct undelete_args *uap; - register_t *retval; +fchdir(proc_t p, struct fchdir_args *uap, __unused register_t *retval) { - int error; - struct nameidata nd; - - NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE, - uap->path, p); - error = namei(&nd); - if (error) - return (error); - - if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp) - vrele(nd.ni_vp); - return (EEXIST); - } + return common_fchdir(p, uap, 0); +} - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - if (error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); - return (error); +int +__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused register_t *retval) +{ + return common_fchdir(p, (void *)uap, 1); } /* - * Delete a name from the filesystem. + * Change current working directory (``.''). + * + * Returns: 0 Success + * change_dir:ENOTDIR + * change_dir:??? + * vnode_ref:ENOENT No such file or directory */ -struct unlink_args { - char *path; -}; /* ARGSUSED */ static int -_unlink(p, uap, retval, nodelbusy) - struct proc *p; - struct unlink_args *uap; - register_t *retval; - int nodelbusy; +common_chdir(proc_t p, struct chdir_args *uap, int per_thread) { - register struct vnode *vp; + struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; + vnode_t tvp; + vfs_context_t ctx = vfs_context_current(); - NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, uap->path, p); - /* with Carbon semantics, busy files cannot be deleted */ - if (nodelbusy) - nd.ni_cnd.cn_flags |= NODELETEBUSY; - if (error = namei(&nd)) + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = change_dir(&nd, ctx); + if (error) + return (error); + if ( (error = vnode_ref(nd.ni_vp)) ) { + vnode_put(nd.ni_vp); return (error); - vp = nd.ni_vp; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - - if (vp->v_type == VDIR) - error = EPERM; /* POSIX */ - else { - /* - * The root of a mounted filesystem cannot be deleted. - * - * XXX: can this only be a VDIR case? - */ - if (vp->v_flag & VROOT) - error = EBUSY; } - - if (!error) { - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + /* + * drop the iocount we picked up in change_dir + */ + vnode_put(nd.ni_vp); + + if (per_thread) { + thread_t th = vfs_context_thread(ctx); + if (th) { + uthread_t uth = get_bsdthread_info(th); + tvp = uth->uu_cdir; + uth->uu_cdir = nd.ni_vp; + OSBitOrAtomic(P_THCWD, (UInt32 *)&p->p_flag); + } else { + vnode_rele(nd.ni_vp); + return (ENOENT); + } } else { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (vp != NULLVP) - vput(vp); + proc_fdlock(p); + tvp = fdp->fd_cdir; + fdp->fd_cdir = nd.ni_vp; + proc_fdunlock(p); } - return (error); + + if (tvp) + vnode_rele(tvp); + + return (0); } -/* - * Delete a name from the filesystem using POSIX semantics. - */ int -unlink(p, uap, retval) - struct proc *p; - struct unlink_args *uap; - register_t *retval; +chdir(proc_t p, struct chdir_args *uap, __unused register_t *retval) { - return _unlink(p, uap, retval, 0); + return common_chdir(p, (void *)uap, 0); } -/* - * Delete a name from the filesystem using Carbon semantics. - */ int -delete(p, uap, retval) - struct proc *p; - struct unlink_args *uap; - register_t *retval; +__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused register_t *retval) { - return _unlink(p, uap, retval, 1); + return common_chdir(p, (void *)uap, 1); } + /* - * Reposition read/write file offset. + * Change notion of root (``/'') directory. */ -struct lseek_args { - int fd; -#ifdef DOUBLE_ALIGN_PARAMS - int pad; -#endif - off_t offset; - int whence; -}; +/* ARGSUSED */ int -lseek(p, uap, retval) - struct proc *p; - register struct lseek_args *uap; - register_t *retval; +chroot(proc_t p, struct chroot_args *uap, __unused register_t *retval) { - struct ucred *cred = p->p_ucred; - struct file *fp; - struct vattr vattr; + struct filedesc *fdp = p->p_fd; int error; + struct nameidata nd; + vnode_t tvp; + vfs_context_t ctx = vfs_context_current(); - if (error = fdgetf(p, uap->fd, &fp)) + if ((error = suser(kauth_cred_get(), &p->p_acflag))) return (error); - if (fp->f_type != DTYPE_VNODE) - return (ESPIPE); - switch (uap->whence) { - case L_INCR: - fp->f_offset += uap->offset; - break; - case L_XTND: - if (error = - VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p)) - return (error); - fp->f_offset = uap->offset + vattr.va_size; - break; - case L_SET: - fp->f_offset = uap->offset; - break; - default: - return (EINVAL); - } - *(off_t *)retval = fp->f_offset; - return (0); -} -#if COMPAT_43 -/* - * Reposition read/write file offset. - */ -struct olseek_args { - int fd; - long offset; - int whence; -}; -int -olseek(p, uap, retval) - struct proc *p; - register struct olseek_args *uap; - register_t *retval; -{ - struct lseek_args /* { - syscallarg(int) fd; -#ifdef DOUBLE_ALIGN_PARAMS - syscallarg(int) pad; + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = change_dir(&nd, ctx); + if (error) + return (error); + +#if CONFIG_MACF + error = mac_vnode_check_chroot(ctx, nd.ni_vp, + &nd.ni_cnd); + if (error) { + vnode_put(nd.ni_vp); + return (error); + } #endif - syscallarg(off_t) offset; - syscallarg(int) whence; - } */ nuap; - off_t qret; - int error; - nuap.fd = uap->fd; - nuap.offset = uap->offset; - nuap.whence = uap->whence; - error = lseek(p, &nuap, &qret); - *(long *)retval = qret; - return (error); -} -#endif /* COMPAT_43 */ + if ( (error = vnode_ref(nd.ni_vp)) ) { + vnode_put(nd.ni_vp); + return (error); + } + vnode_put(nd.ni_vp); -/* - * Check access permissions. - */ -struct access_args { - char *path; - int flags; -}; -int -access(p, uap, retval) - struct proc *p; - register struct access_args *uap; - register_t *retval; -{ - register struct ucred *cred = p->p_ucred; - register struct vnode *vp; - int error, flags, t_gid, t_uid; - struct nameidata nd; + proc_fdlock(p); + tvp = fdp->fd_rdir; + fdp->fd_rdir = nd.ni_vp; + fdp->fd_flags |= FD_CHROOT; + proc_fdunlock(p); - t_uid = cred->cr_uid; - t_gid = cred->cr_groups[0]; - cred->cr_uid = p->p_cred->p_ruid; - cred->cr_groups[0] = p->p_cred->p_rgid; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, - uap->path, p); - if (error = namei(&nd)) - goto out1; - vp = nd.ni_vp; + if (tvp != NULL) + vnode_rele(tvp); - /* Flags == 0 means only check for existence. */ - if (uap->flags) { - flags = 0; - if (uap->flags & R_OK) - flags |= VREAD; - if (uap->flags & W_OK) - flags |= VWRITE; - if (uap->flags & X_OK) - flags |= VEXEC; - if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) - error = VOP_ACCESS(vp, flags, cred, p); - } - vput(vp); -out1: - cred->cr_uid = t_uid; - cred->cr_groups[0] = t_gid; - return (error); + return (0); } -#if COMPAT_43 /* - * Get file status; this version follows links. + * Common routine for chroot and chdir. + * + * Returns: 0 Success + * ENOTDIR Not a directory + * namei:??? [anything namei can return] + * vnode_authorize:??? [anything vnode_authorize can return] */ -struct ostat_args { - char *path; - struct ostat *ub; -}; -/* ARGSUSED */ -int -ostat(p, uap, retval) - struct proc *p; - register struct ostat_args *uap; - register_t *retval; +static int +change_dir(struct nameidata *ndp, vfs_context_t ctx) { - struct stat sb; - struct ostat osb; + vnode_t vp; int error; - struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, - uap->path, p); - if (error = namei(&nd)) + if ((error = namei(ndp))) return (error); - error = vn_stat(nd.ni_vp, &sb, p); - vput(nd.ni_vp); - if (error) + nameidone(ndp); + vp = ndp->ni_vp; + + if (vp->v_type != VDIR) { + vnode_put(vp); + return (ENOTDIR); + } + +#if CONFIG_MACF + error = mac_vnode_check_chdir(ctx, vp); + if (error) { + vnode_put(vp); + return (error); + } +#endif + + error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx); + if (error) { + vnode_put(vp); return (error); - cvtstat(&sb, &osb); - error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb)); + } + return (error); } /* - * Get file status; this version does not follow links. + * Check permissions, allocate an open file structure, + * and call the device open routine if any. + * + * Returns: 0 Success + * EINVAL + * EINTR + * falloc:ENFILE + * falloc:EMFILE + * falloc:ENOMEM + * vn_open_auth:??? + * dupfdopen:??? + * VNOP_ADVLOCK:??? + * vnode_setsize:??? */ -struct olstat_args { - char *path; - struct ostat *ub; -}; -/* ARGSUSED */ +#warning XXX implement uid, gid int -olstat(p, uap, retval) - struct proc *p; - register struct olstat_args *uap; - register_t *retval; +open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, register_t *retval) { - struct vnode *vp, *dvp; - struct stat sb, sb1; - struct ostat osb; - int error; - struct nameidata nd; + proc_t p = vfs_context_proc(ctx); + uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx)); + struct filedesc *fdp = p->p_fd; + struct fileproc *fp; + vnode_t vp; + int flags, oflags; + struct fileproc *nfp; + int type, indx, error; + struct flock lf; + int no_controlling_tty = 0; + int deny_controlling_tty = 0; + struct session *sessp = SESSION_NULL; + struct vfs_context context = *vfs_context_current(); /* local copy */ + + oflags = uflags; + + if ((oflags & O_ACCMODE) == O_ACCMODE) + return(EINVAL); + flags = FFLAGS(uflags); + + AUDIT_ARG(fflags, oflags); + AUDIT_ARG(mode, vap->va_mode); - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE, - uap->path, p); - if (error = namei(&nd)) + if ( (error = falloc(p, &nfp, &indx, ctx)) ) { return (error); - /* - * For symbolic links, always return the attributes of its - * containing directory, except for mode, size, and links. - */ - vp = nd.ni_vp; - dvp = nd.ni_dvp; - if (vp->v_type != VLNK) { - if (dvp == vp) - vrele(dvp); - else - vput(dvp); - error = vn_stat(vp, &sb, p); - vput(vp); - if (error) - return (error); - } else { - error = vn_stat(dvp, &sb, p); - vput(dvp); - if (error) { - vput(vp); - return (error); + } + fp = nfp; + uu->uu_dupfd = -indx - 1; + + if (!(p->p_flag & P_CONTROLT)) { + sessp = proc_session(p); + no_controlling_tty = 1; + /* + * If conditions would warrant getting a controlling tty if + * the device being opened is a tty (see ttyopen in tty.c), + * but the open flags deny it, set a flag in the session to + * prevent it. + */ + if (SESS_LEADER(p, sessp) && + sessp->s_ttyvp == NULL && + (flags & O_NOCTTY)) { + session_lock(sessp); + sessp->s_flags |= S_NOCTTY; + session_unlock(sessp); + deny_controlling_tty = 1; + } + } + + if ((error = vn_open_auth(ndp, &flags, vap))) { + if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */ + if ((error = dupfdopen(fdp, indx, uu->uu_dupfd, flags, error)) == 0) { + fp_drop(p, indx, NULL, 0); + *retval = indx; + if (deny_controlling_tty) { + session_lock(sessp); + sessp->s_flags &= ~S_NOCTTY; + session_unlock(sessp); + } + if (sessp != SESSION_NULL) + session_rele(sessp); + return (0); + } } - error = vn_stat(vp, &sb1, p); - vput(vp); + if (error == ERESTART) + error = EINTR; + fp_free(p, indx, fp); + + if (deny_controlling_tty) { + session_lock(sessp); + sessp->s_flags &= ~S_NOCTTY; + session_unlock(sessp); + } + if (sessp != SESSION_NULL) + session_rele(sessp); + return (error); + } + uu->uu_dupfd = 0; + vp = ndp->ni_vp; + + fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY); + fp->f_fglob->fg_type = DTYPE_VNODE; + fp->f_fglob->fg_ops = &vnops; + fp->f_fglob->fg_data = (caddr_t)vp; + + if (flags & (O_EXLOCK | O_SHLOCK)) { + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + if (flags & O_EXLOCK) + lf.l_type = F_WRLCK; + else + lf.l_type = F_RDLCK; + type = F_FLOCK; + if ((flags & FNONBLOCK) == 0) + type |= F_WAIT; +#if CONFIG_MACF + error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob, + F_SETLK, &lf); if (error) - return (error); - sb.st_mode &= ~S_IFDIR; - sb.st_mode |= S_IFLNK; - sb.st_nlink = sb1.st_nlink; - sb.st_size = sb1.st_size; - sb.st_blocks = sb1.st_blocks; - } - cvtstat(&sb, &osb); - error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb)); + goto bad; +#endif + if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx))) + goto bad; + fp->f_fglob->fg_flag |= FHASLOCK; + } + + /* try to truncate by setting the size attribute */ + if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0)) + goto bad; + + /* + * If the open flags denied the acquisition of a controlling tty, + * clear the flag in the session structure that prevented the lower + * level code from assigning one. + */ + if (deny_controlling_tty) { + session_lock(sessp); + sessp->s_flags &= ~S_NOCTTY; + session_unlock(sessp); + } + + /* + * If a controlling tty was set by the tty line discipline, then we + * want to set the vp of the tty into the session structure. We have + * a race here because we can't get to the vp for the tp in ttyopen, + * because it's not passed as a parameter in the open path. + */ + if (no_controlling_tty && (p->p_flag & P_CONTROLT)) { + vnode_t ttyvp; + vnode_ref(vp); + session_lock(sessp); + ttyvp = sessp->s_ttyvp; + sessp->s_ttyvp = vp; + sessp->s_ttyvid = vnode_vid(vp); + session_unlock(sessp); + if (ttyvp != NULLVP) + vnode_rele(ttyvp); + } + + vnode_put(vp); + + proc_fdlock(p); + procfdtbl_releasefd(p, indx, NULL); + fp_drop(p, indx, fp, 1); + proc_fdunlock(p); + + *retval = indx; + + if (sessp != SESSION_NULL) + session_rele(sessp); + return (0); +bad: + if (deny_controlling_tty) { + session_lock(sessp); + sessp->s_flags &= ~S_NOCTTY; + session_unlock(sessp); + } + if (sessp != SESSION_NULL) + session_rele(sessp); + + /* Modify local copy (to not damage thread copy) */ + context.vc_ucred = fp->f_fglob->fg_cred; + + vn_close(vp, fp->f_fglob->fg_flag, &context); + vnode_put(vp); + fp_free(p, indx, fp); + return (error); -} -/* - * Convert from an old to a new stat structure. - */ -void -cvtstat(st, ost) - struct stat *st; - struct ostat *ost; -{ - - ost->st_dev = st->st_dev; - ost->st_ino = st->st_ino; - ost->st_mode = st->st_mode; - ost->st_nlink = st->st_nlink; - ost->st_uid = st->st_uid; - ost->st_gid = st->st_gid; - ost->st_rdev = st->st_rdev; - if (st->st_size < (quad_t)1 << 32) - ost->st_size = st->st_size; - else - ost->st_size = -2; - ost->st_atime = st->st_atime; - ost->st_mtime = st->st_mtime; - ost->st_ctime = st->st_ctime; - ost->st_blksize = st->st_blksize; - ost->st_blocks = st->st_blocks; - ost->st_flags = st->st_flags; - ost->st_gen = st->st_gen; } -#endif /* COMPAT_43 */ /* - * Get file status; this version follows links. + * An open system call using an extended argument list compared to the regular + * system call 'open'. + * + * Parameters: p Process requesting the open + * uap User argument descriptor (see below) + * retval Pointer to an area to receive the + * return calue from the system call + * + * Indirect: uap->path Path to open (same as 'open') + * uap->flags Flags to open (same as 'open' + * uap->uid UID to set, if creating + * uap->gid GID to set, if creating + * uap->mode File mode, if creating (same as 'open') + * uap->xsecurity ACL to set, if creating + * + * Returns: 0 Success + * !0 errno value + * + * Notes: The kauth_filesec_t in 'va', if any, is in host byte order. + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. */ -struct stat_args { - char *path; - struct stat *ub; -}; -/* ARGSUSED */ int -stat(p, uap, retval) - struct proc *p; - register struct stat_args *uap; - register_t *retval; +open_extended(proc_t p, struct open_extended_args *uap, register_t *retval) { - struct stat sb; - int error; + struct filedesc *fdp = p->p_fd; + int ciferror; + kauth_filesec_t xsecdst; + struct vnode_attr va; struct nameidata nd; - - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, - uap->path, p); - if (error = namei(&nd)) - return (error); - error = vn_stat(nd.ni_vp, &sb, p); - vput(nd.ni_vp); - if (error) - return (error); - error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb)); - return (error); + int cmode; + + xsecdst = NULL; + if ((uap->xsecurity != USER_ADDR_NULL) && + ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) + return ciferror; + + VATTR_INIT(&va); + cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; + VATTR_SET(&va, va_mode, cmode); + if (uap->uid != KAUTH_UID_NONE) + VATTR_SET(&va, va_uid, uap->uid); + if (uap->gid != KAUTH_GID_NONE) + VATTR_SET(&va, va_gid, uap->gid); + if (xsecdst != NULL) + VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); + + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current()); + + ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, retval); + if (xsecdst != NULL) + kauth_filesec_free(xsecdst); + + return ciferror; } -/* - * Get file status; this version does not follow links. - */ -struct lstat_args { - char *path; - struct stat *ub; -}; -/* ARGSUSED */ int -lstat(p, uap, retval) - struct proc *p; - register struct lstat_args *uap; - register_t *retval; +open(proc_t p, struct open_args *uap, register_t *retval) { - int error; - struct vnode *vp, *dvp; - struct stat sb, sb1; - struct nameidata nd; - - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE, - uap->path, p); - if (error = namei(&nd)) - return (error); - /* - * For symbolic links, always return the attributes of its containing - * directory, except for mode, size, inode number, and links. - */ - vp = nd.ni_vp; - dvp = nd.ni_dvp; - if ((vp->v_type != VLNK) || ((vp->v_type == VLNK) && (vp->v_tag == VT_NFS))) { - if (dvp == vp) - vrele(dvp); - else - vput(dvp); - error = vn_stat(vp, &sb, p); - vput(vp); - if (error) - return (error); - if (vp->v_type == VLNK) - sb.st_mode |= S_IFLNK; - } else { - error = vn_stat(dvp, &sb, p); - vput(dvp); - if (error) { - vput(vp); - return (error); - } - error = vn_stat(vp, &sb1, p); - vput(vp); - if (error) - return (error); - sb.st_mode &= ~S_IFDIR; - sb.st_mode |= S_IFLNK; - sb.st_nlink = sb1.st_nlink; - sb.st_size = sb1.st_size; - sb.st_blocks = sb1.st_blocks; - sb.st_ino = sb1.st_ino; - } - error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb)); - return (error); + __pthread_testcancel(1); + return(open_nocancel(p, (struct open_nocancel_args *)uap, retval)); } -/* - * Get configurable pathname variables. - */ -struct pathconf_args { - char *path; - int name; -}; -/* ARGSUSED */ + int -pathconf(p, uap, retval) - struct proc *p; - register struct pathconf_args *uap; - register_t *retval; +open_nocancel(proc_t p, struct open_nocancel_args *uap, register_t *retval) { - int error; + struct filedesc *fdp = p->p_fd; + struct vnode_attr va; struct nameidata nd; + int cmode; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, - uap->path, p); - if (error = namei(&nd)) - return (error); - error = VOP_PATHCONF(nd.ni_vp, uap->name, retval); - vput(nd.ni_vp); - return (error); + VATTR_INIT(&va); + /* Mask off all but regular access permissions */ + cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; + VATTR_SET(&va, va_mode, cmode & ACCESSPERMS); + + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current()); + + return(open1(vfs_context_current(), &nd, uap->flags, &va, retval)); } + /* - * Return target name of a symbolic link. + * Create a special file. */ -struct readlink_args { - char *path; - char *buf; - int count; -}; -/* ARGSUSED */ +static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap); + int -readlink(p, uap, retval) - struct proc *p; - register struct readlink_args *uap; - register_t *retval; +mknod(proc_t p, struct mknod_args *uap, __unused register_t *retval) { - register struct vnode *vp; - struct iovec aiov; - struct uio auio; + struct vnode_attr va; + vfs_context_t ctx = vfs_context_current(); int error; + int whiteout = 0; struct nameidata nd; + vnode_t vp, dvp; + + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask); + VATTR_SET(&va, va_rdev, uap->dev); + + /* If it's a mknod() of a FIFO, call mkfifo1() instead */ + if ((uap->mode & S_IFMT) == S_IFIFO) + return(mkfifo1(ctx, uap->path, &va)); + + AUDIT_ARG(mode, uap->mode); + AUDIT_ARG(dev, uap->dev); - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, - uap->path, p); - if (error = namei(&nd)) + if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag))) return (error); + NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = namei(&nd); + if (error) + return (error); + dvp = nd.ni_dvp; vp = nd.ni_vp; - if (vp->v_type != VLNK) + + if (vp != NULL) { + error = EEXIST; + goto out; + } + + switch (uap->mode & S_IFMT) { + case S_IFMT: /* used by badsect to flag bad sectors */ + VATTR_SET(&va, va_type, VBAD); + break; + case S_IFCHR: + VATTR_SET(&va, va_type, VCHR); + break; + case S_IFBLK: + VATTR_SET(&va, va_type, VBLK); + break; + case S_IFWHT: + whiteout = 1; + break; + default: error = EINVAL; - else { - aiov.iov_base = uap->buf; - aiov.iov_len = uap->count; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = 0; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = uap->count; - error = VOP_READLINK(vp, &auio, p->p_ucred); - } - vput(vp); - *retval = uap->count - auio.uio_resid; + goto out; + } + +#if CONFIG_MACF + if (!whiteout) { + error = mac_vnode_check_create(ctx, + nd.ni_dvp, &nd.ni_cnd, &va); + if (error) + goto out; + } +#endif + + if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) + goto out; + + if (whiteout) { + error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE, ctx); + } else { + error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, ctx); + } + if (error) + goto out; + + if (vp) { + int update_flags = 0; + + // Make sure the name & parent pointers are hooked up + if (vp->v_name == NULL) + update_flags |= VNODE_UPDATE_NAME; + if (vp->v_parent == NULLVP) + update_flags |= VNODE_UPDATE_PARENT; + + if (update_flags) + vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags); + +#if CONFIG_FSE + add_fsevent(FSE_CREATE_FILE, ctx, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); +#endif + } + +out: + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + if (vp) + vnode_put(vp); + vnode_put(dvp); + return (error); } /* - * Change flags of a file given a path name. + * Create a named pipe. + * + * Returns: 0 Success + * EEXIST + * namei:??? + * vnode_authorize:??? + * vn_create:??? */ -struct chflags_args { - char *path; - int flags; -}; -/* ARGSUSED */ -int -chflags(p, uap, retval) - struct proc *p; - register struct chflags_args *uap; - register_t *retval; +static int +mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap) { - register struct vnode *vp; - struct vattr vattr; + vnode_t vp, dvp; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - if (error = namei(&nd)) + NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, + UIO_USERSPACE, upath, ctx); + error = namei(&nd); + if (error) return (error); + dvp = nd.ni_dvp; vp = nd.ni_vp; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - VATTR_NULL(&vattr); - vattr.va_flags = uap->flags; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - vput(vp); - return (error); + + /* check that this is a new file and authorize addition */ + if (vp != NULL) { + error = EEXIST; + goto out; + } + VATTR_SET(vap, va_type, VFIFO); + +#if CONFIG_MACF + error = mac_vnode_check_create(ctx, nd.ni_dvp, + &nd.ni_cnd, vap); + if (error) + goto out; +#endif + + + if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) + goto out; + + + error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx); +out: + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + if (vp) + vnode_put(vp); + vnode_put(dvp); + + return error; } + /* - * Change flags of a file given a file descriptor. + * A mkfifo system call using an extended argument list compared to the regular + * system call 'mkfifo'. + * + * Parameters: p Process requesting the open + * uap User argument descriptor (see below) + * retval (Ignored) + * + * Indirect: uap->path Path to fifo (same as 'mkfifo') + * uap->uid UID to set + * uap->gid GID to set + * uap->mode File mode to set (same as 'mkfifo') + * uap->xsecurity ACL to set, if creating + * + * Returns: 0 Success + * !0 errno value + * + * Notes: The kauth_filesec_t in 'va', if any, is in host byte order. + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. */ -struct fchflags_args { - int fd; - int flags; -}; -/* ARGSUSED */ int -fchflags(p, uap, retval) - struct proc *p; - register struct fchflags_args *uap; - register_t *retval; +mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused register_t *retval) { - struct vattr vattr; - struct vnode *vp; - struct file *fp; - int error; + int ciferror; + kauth_filesec_t xsecdst; + struct vnode_attr va; + + xsecdst = KAUTH_FILESEC_NONE; + if (uap->xsecurity != USER_ADDR_NULL) { + if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) + return ciferror; + } - if (error = getvnode(p, uap->fd, &fp)) - return (error); - vp = (struct vnode *)fp->f_data; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - VATTR_NULL(&vattr); - vattr.va_flags = uap->flags; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - VOP_UNLOCK(vp, 0, p); - return (error); -} + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask); + if (uap->uid != KAUTH_UID_NONE) + VATTR_SET(&va, va_uid, uap->uid); + if (uap->gid != KAUTH_GID_NONE) + VATTR_SET(&va, va_gid, uap->gid); + if (xsecdst != KAUTH_FILESEC_NONE) + VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); -/* - * Change mode of a file given path name. - */ -struct chmod_args { - char *path; - int mode; -}; -/* ARGSUSED */ -int -chmod(p, uap, retval) - struct proc *p; - register struct chmod_args *uap; - register_t *retval; -{ - register struct vnode *vp; - struct vattr vattr; - int error; - struct nameidata nd; + ciferror = mkfifo1(vfs_context_current(), uap->path, &va); - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - if (error = namei(&nd)) - return (error); - vp = nd.ni_vp; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - VATTR_NULL(&vattr); - vattr.va_mode = uap->mode & ALLPERMS; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - vput(vp); - return (error); + if (xsecdst != KAUTH_FILESEC_NONE) + kauth_filesec_free(xsecdst); + return ciferror; } -/* - * Change mode of a file given a file descriptor. - */ -struct fchmod_args { - int fd; - int mode; -}; /* ARGSUSED */ int -fchmod(p, uap, retval) - struct proc *p; - register struct fchmod_args *uap; - register_t *retval; +mkfifo(proc_t p, struct mkfifo_args *uap, __unused register_t *retval) { - struct vattr vattr; - struct vnode *vp; - struct file *fp; - int error; + struct vnode_attr va; - if (error = getvnode(p, uap->fd, &fp)) - return (error); - vp = (struct vnode *)fp->f_data; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - VATTR_NULL(&vattr); - vattr.va_mode = uap->mode & ALLPERMS; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - VOP_UNLOCK(vp, 0, p); - return (error); + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask); + + return(mkfifo1(vfs_context_current(), uap->path, &va)); } /* - * Set ownership given a path name. + * Make a hard file link. + * + * Returns: 0 Success + * EPERM + * EEXIST + * EXDEV + * namei:??? + * vnode_authorize:??? + * VNOP_LINK:??? */ -struct chown_args { - char *path; - int uid; - int gid; -}; /* ARGSUSED */ int -chown(p, uap, retval) - struct proc *p; - register struct chown_args *uap; - register_t *retval; +link(__unused proc_t p, struct link_args *uap, __unused register_t *retval) { - register struct vnode *vp; - struct vattr vattr; - int error; + vnode_t vp, dvp, lvp; struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); + int error; + fse_info finfo; + int need_event, has_listeners; + char *target_path = NULL; + + vp = dvp = lvp = NULLVP; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - if (error = namei(&nd)) + /* look up the object we are linking to */ + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = namei(&nd); + if (error) return (error); vp = nd.ni_vp; + nameidone(&nd); + /* - * XXX A TEMPORARY HACK FOR NOW: Try to track console_user - * by looking for chown() calls on /dev/console from a console process. + * Normally, linking to directories is not supported. + * However, some file systems may have limited support. */ - if ((vp) && (vp->v_specinfo) && - (major(vp->v_specinfo->si_rdev) == CONSMAJOR) && - (minor(vp->v_specinfo->si_rdev) == 0)) { - console_user = uap->uid; - }; - - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - VATTR_NULL(&vattr); - vattr.va_uid = uap->uid; - vattr.va_gid = uap->gid; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - vput(vp); - return (error); -} + if (vp->v_type == VDIR) { + if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) { + error = EPERM; /* POSIX */ + goto out; + } + /* Linking to a directory requires ownership. */ + if (!kauth_cred_issuser(vfs_context_ucred(ctx))) { + struct vnode_attr dva; + + VATTR_INIT(&dva); + VATTR_WANTED(&dva, va_uid); + if (vnode_getattr(vp, &dva, ctx) != 0 || + !VATTR_IS_SUPPORTED(&dva, va_uid) || + (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) { + error = EACCES; + goto out; + } + } + } -/* - * Set ownership given a file descriptor. - */ -struct fchown_args { - int fd; - int uid; - int gid; -}; -/* ARGSUSED */ -int -fchown(p, uap, retval) - struct proc *p; - register struct fchown_args *uap; - register_t *retval; -{ - struct vattr vattr; - struct vnode *vp; - struct file *fp; - int error; + /* lookup the target node */ + nd.ni_cnd.cn_nameiop = CREATE; + nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK; + nd.ni_dirp = uap->link; + error = namei(&nd); + if (error != 0) + goto out; + dvp = nd.ni_dvp; + lvp = nd.ni_vp; - if (error = getvnode(p, uap->fd, &fp)) - return (error); - vp = (struct vnode *)fp->f_data; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - VATTR_NULL(&vattr); - vattr.va_uid = uap->uid; - vattr.va_gid = uap->gid; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - VOP_UNLOCK(vp, 0, p); +#if CONFIG_MACF + if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0) + goto out2; +#endif + + /* or to anything that kauth doesn't want us to (eg. immutable items) */ + if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0) + goto out2; + + /* target node must not exist */ + if (lvp != NULLVP) { + error = EEXIST; + goto out2; + } + /* cannot link across mountpoints */ + if (vnode_mount(vp) != vnode_mount(dvp)) { + error = EXDEV; + goto out2; + } + + /* authorize creation of the target note */ + if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) + goto out2; + + /* and finally make the link */ + error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx); + if (error) + goto out2; + +#if CONFIG_FSE + need_event = need_fsevent(FSE_CREATE_FILE, dvp); +#else + need_event = 0; +#endif + has_listeners = kauth_authorize_fileop_has_listeners(); + + if (need_event || has_listeners) { + char *link_to_path = NULL; + int len, link_name_len; + + /* build the path to the new link file */ + GET_PATH(target_path); + if (target_path == NULL) { + error = ENOMEM; + goto out2; + } + + len = MAXPATHLEN; + vn_getpath(dvp, target_path, &len); + if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) { + target_path[len-1] = '/'; + strlcpy(&target_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len); + len += nd.ni_cnd.cn_namelen; + } + + if (has_listeners) { + /* build the path to file we are linking to */ + GET_PATH(link_to_path); + if (link_to_path == NULL) { + error = ENOMEM; + goto out2; + } + + link_name_len = MAXPATHLEN; + vn_getpath(vp, link_to_path, &link_name_len); + + /* + * Call out to allow 3rd party notification of rename. + * Ignore result of kauth_authorize_fileop call. + */ + kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK, + (uintptr_t)link_to_path, (uintptr_t)target_path); + if (link_to_path != NULL) { + RELEASE_PATH(link_to_path); + } + } +#if CONFIG_FSE + if (need_event) { + /* construct fsevent */ + if (get_fse_info(vp, &finfo, ctx) == 0) { + // build the path to the destination of the link + add_fsevent(FSE_CREATE_FILE, ctx, + FSE_ARG_STRING, len, target_path, + FSE_ARG_FINFO, &finfo, + FSE_ARG_DONE); + } + } +#endif + } +out2: + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + if (target_path != NULL) { + RELEASE_PATH(target_path); + } +out: + if (lvp) + vnode_put(lvp); + if (dvp) + vnode_put(dvp); + vnode_put(vp); return (error); } /* - * Set the access and modification times of a file. + * Make a symbolic link. + * + * We could add support for ACLs here too... */ -struct utimes_args { - char *path; - struct timeval *tptr; -}; /* ARGSUSED */ int -utimes(p, uap, retval) - struct proc *p; - register struct utimes_args *uap; - register_t *retval; +symlink(proc_t p, struct symlink_args *uap, __unused register_t *retval) { - register struct vnode *vp; - struct timeval tv[2]; - struct vattr vattr; + struct vnode_attr va; + char *path; int error; struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); + vnode_t vp, dvp; + size_t dummy=0; + + MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + error = copyinstr(uap->path, path, MAXPATHLEN, &dummy); + if (error) + goto out; + AUDIT_ARG(text, path); /* This is the link string */ - VATTR_NULL(&vattr); - if (uap->tptr == NULL) { - microtime(&tv[0]); - tv[1] = tv[0]; - vattr.va_vaflags |= VA_UTIMES_NULL; - } else if (error = copyin((caddr_t)uap->tptr, (caddr_t)tv, - sizeof (tv))) - return (error); - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - if (error = namei(&nd)) - return (error); + NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, + UIO_USERSPACE, uap->link, ctx); + error = namei(&nd); + if (error) + goto out; + dvp = nd.ni_dvp; vp = nd.ni_vp; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - vattr.va_atime.tv_sec = tv[0].tv_sec; - vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000; - vattr.va_mtime.tv_sec = tv[1].tv_sec; - vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - vput(vp); + + VATTR_INIT(&va); + VATTR_SET(&va, va_type, VLNK); + VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask); +#if CONFIG_MACF + error = mac_vnode_check_create(ctx, + dvp, &nd.ni_cnd, &va); +#endif + if (error != 0) { + goto skipit; + } + + if (vp != NULL) { + error = EEXIST; + goto skipit; + } + + /* authorize */ + if (error == 0) + error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx); + /* get default ownership, etc. */ + if (error == 0) + error = vnode_authattr_new(dvp, &va, 0, ctx); + if (error == 0) + error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx); + + /* do fallback attribute handling */ + if (error == 0) + error = vnode_setattr_fallback(vp, &va, ctx); + + if (error == 0) { + int update_flags = 0; + + if (vp == NULL) { + nd.ni_cnd.cn_nameiop = LOOKUP; + nd.ni_cnd.cn_flags = 0; + error = namei(&nd); + vp = nd.ni_vp; + + if (vp == NULL) + goto skipit; + } + +#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */ + /* call out to allow 3rd party notification of rename. + * Ignore result of kauth_authorize_fileop call. + */ + if (kauth_authorize_fileop_has_listeners() && + namei(&nd) == 0) { + char *new_link_path = NULL; + int len; + + /* build the path to the new link file */ + new_link_path = get_pathbuff(); + len = MAXPATHLEN; + vn_getpath(dvp, new_link_path, &len); + if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) { + new_link_path[len - 1] = '/'; + strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len); + } + + kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK, + (uintptr_t)path, (uintptr_t)new_link_path); + if (new_link_path != NULL) + release_pathbuff(new_link_path); + } +#endif + // Make sure the name & parent pointers are hooked up + if (vp->v_name == NULL) + update_flags |= VNODE_UPDATE_NAME; + if (vp->v_parent == NULLVP) + update_flags |= VNODE_UPDATE_PARENT; + + if (update_flags) + vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags); + +#if CONFIG_FSE + add_fsevent(FSE_CREATE_FILE, ctx, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); +#endif + } + +skipit: + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + if (vp) + vnode_put(vp); + vnode_put(dvp); +out: + FREE_ZONE(path, MAXPATHLEN, M_NAMEI); + return (error); } /* - * Truncate a file given its path name. + * Delete a whiteout from the filesystem. */ -struct truncate_args { - char *path; -#ifdef DOUBLE_ALIGN_PARAMS - int pad; -#endif - off_t length; -}; /* ARGSUSED */ +#warning XXX authorization not implmented for whiteouts int -truncate(p, uap, retval) - struct proc *p; - register struct truncate_args *uap; - register_t *retval; +undelete(__unused proc_t p, struct undelete_args *uap, __unused register_t *retval) { - register struct vnode *vp; - struct vattr vattr; int error; struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); + vnode_t vp, dvp; - if (uap->length < 0) - return(EINVAL); - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - if (error = namei(&nd)) + NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT|AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = namei(&nd); + if (error) return (error); + dvp = nd.ni_dvp; vp = nd.ni_vp; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (vp->v_type == VDIR) - error = EISDIR; - else if ((error = vn_writechk(vp)) == 0 && - (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) { - VATTR_NULL(&vattr); - vattr.va_size = uap->length; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - } - vput(vp); + + if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) { + error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx); + } else + error = EEXIST; + + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + if (vp) + vnode_put(vp); + vnode_put(dvp); + return (error); } /* - * Truncate a file given a file descriptor. + * Delete a name from the filesystem. */ -struct ftruncate_args { - int fd; -#ifdef DOUBLE_ALIGN_PARAMS - int pad; -#endif - off_t length; -}; /* ARGSUSED */ int -ftruncate(p, uap, retval) - struct proc *p; - register struct ftruncate_args *uap; - register_t *retval; +unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy) { - struct vattr vattr; - struct vnode *vp; - struct file *fp; + vnode_t vp, dvp; int error; - - if (uap->length < 0) - return(EINVAL); - - if (error = fdgetf(p, uap->fd, &fp)) + struct componentname *cnp; + char *path = NULL; + int len; + fse_info finfo; + int flags = 0; + int need_event = 0; + int has_listeners = 0; + + ndp->ni_cnd.cn_flags |= LOCKPARENT; + cnp = &ndp->ni_cnd; + + error = namei(ndp); + if (error) return (error); + dvp = ndp->ni_dvp; + vp = ndp->ni_vp; - if (fp->f_type == DTYPE_PSXSHM) { - return(pshm_truncate(p, fp, uap->fd, uap->length, retval)); + /* With Carbon delete semantics, busy files cannot be deleted */ + if (nodelbusy) { + flags |= VNODE_REMOVE_NODELETEBUSY; } - if (fp->f_type != DTYPE_VNODE) - return (EINVAL); - if ((fp->f_flag & FWRITE) == 0) - return (EINVAL); - vp = (struct vnode *)fp->f_data; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (vp->v_type == VDIR) - error = EISDIR; - else if ((error = vn_writechk(vp)) == 0) { - VATTR_NULL(&vattr); - vattr.va_size = uap->length; - error = VOP_SETATTR(vp, &vattr, fp->f_cred, p); + /* + * Normally, unlinking of directories is not supported. + * However, some file systems may have limited support. + */ + if ((vp->v_type == VDIR) && + !(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) { + error = EPERM; /* POSIX */ } - VOP_UNLOCK(vp, 0, p); - return (error); -} -#if COMPAT_43 -/* - * Truncate a file given its path name. - */ -struct otruncate_args { - char *path; - long length; -}; -/* ARGSUSED */ -int -otruncate(p, uap, retval) - struct proc *p; - register struct otruncate_args *uap; - register_t *retval; -{ - struct truncate_args /* { - syscallarg(char *) path; -#ifdef DOUBLE_ALIGN_PARAMS - syscallarg(int) pad; + /* + * The root of a mounted filesystem cannot be deleted. + */ + if (vp->v_flag & VROOT) { + error = EBUSY; + } + if (error) + goto out; + + + /* authorize the delete operation */ +#if CONFIG_MACF + if (!error) + error = mac_vnode_check_unlink(ctx, + dvp, vp, cnp); +#endif /* MAC */ + if (!error) + error = vnode_authorize(vp, ndp->ni_dvp, KAUTH_VNODE_DELETE, ctx); + if (error) + goto out; + +#if CONFIG_FSE + need_event = need_fsevent(FSE_DELETE, dvp); + if (need_event) { + if ((vp->v_flag & VISHARDLINK) == 0) { + get_fse_info(vp, &finfo, ctx); + } + } +#endif + has_listeners = kauth_authorize_fileop_has_listeners(); + if (need_event || has_listeners) { + GET_PATH(path); + if (path == NULL) { + error = ENOMEM; + goto out; + } + len = MAXPATHLEN; + vn_getpath(vp, path, &len); + } + +#if NAMEDRSRCFORK + if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) + error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx); + else +#endif + error = VNOP_REMOVE(dvp, vp, &ndp->ni_cnd, flags, ctx); + + /* + * Call out to allow 3rd party notification of delete. + * Ignore result of kauth_authorize_fileop call. + */ + if (!error) { + if (has_listeners) { + kauth_authorize_fileop(vfs_context_ucred(ctx), + KAUTH_FILEOP_DELETE, + (uintptr_t)vp, + (uintptr_t)path); + } + + if (vp->v_flag & VISHARDLINK) { + // + // if a hardlink gets deleted we want to blow away the + // v_parent link because the path that got us to this + // instance of the link is no longer valid. this will + // force the next call to get the path to ask the file + // system instead of just following the v_parent link. + // + vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT); + } + +#if CONFIG_FSE + if (need_event) { + if (vp->v_flag & VISHARDLINK) { + get_fse_info(vp, &finfo, ctx); + } + add_fsevent(FSE_DELETE, ctx, + FSE_ARG_STRING, len, path, + FSE_ARG_FINFO, &finfo, + FSE_ARG_DONE); + } #endif - syscallarg(off_t) length; - } */ nuap; + } + if (path != NULL) + RELEASE_PATH(path); - nuap.path = uap->path; - nuap.length = uap->length; - return (truncate(p, &nuap, retval)); + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ +out: + nameidone(ndp); + vnode_put(dvp); + vnode_put(vp); + return (error); } /* - * Truncate a file given a file descriptor. + * Delete a name from the filesystem using POSIX semantics. */ -struct oftruncate_args { - int fd; - long length; -}; -/* ARGSUSED */ int -oftruncate(p, uap, retval) - struct proc *p; - register struct oftruncate_args *uap; - register_t *retval; +unlink(__unused proc_t p, struct unlink_args *uap, __unused register_t *retval) { - struct ftruncate_args /* { - syscallarg(int) fd; -#ifdef DOUBLE_ALIGN_PARAMS - syscallarg(int) pad; -#endif - syscallarg(off_t) length; - } */ nuap; + struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); - nuap.fd = uap->fd; - nuap.length = uap->length; - return (ftruncate(p, &nuap, retval)); + NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); + return unlink1(ctx, &nd, 0); } -#endif /* COMPAT_43 */ /* - * Sync an open file. + * Delete a name from the filesystem using Carbon semantics. */ -struct fsync_args { - int fd; -}; -/* ARGSUSED */ int -fsync(p, uap, retval) - struct proc *p; - struct fsync_args *uap; - register_t *retval; +delete(__unused proc_t p, struct delete_args *uap, __unused register_t *retval) { - register struct vnode *vp; - struct file *fp; - int error; + struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); - if (error = getvnode(p, uap->fd, &fp)) - return (error); - vp = (struct vnode *)fp->f_data; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p); - VOP_UNLOCK(vp, 0, p); - return (error); + NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); + return unlink1(ctx, &nd, 1); } /* - * Duplicate files. Source must be a file, target must be a file or - * must not exist. + * Reposition read/write file offset. */ - -struct copyfile_args { - char *from; - char *to; - int mode; - int flags; -}; -/* ARGSUSED */ int -copyfile(p, uap, retval) - struct proc *p; - register struct copyfile_args *uap; - register_t *retval; +lseek(proc_t p, struct lseek_args *uap, off_t *retval) { - register struct vnode *tvp, *fvp, *tdvp; - register struct ucred *cred = p->p_ucred; - struct nameidata fromnd, tond; + struct fileproc *fp; + vnode_t vp; + struct vfs_context *ctx; + off_t offset = uap->offset, file_size; int error; - - /* Check that the flags are valid. - */ - - if (uap->flags & ~CPF_MASK) { - return(EINVAL); - } - NDINIT(&fromnd, LOOKUP, SAVESTART, UIO_USERSPACE, - uap->from, p); - if (error = namei(&fromnd)) + if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) { + if (error == ENOTSUP) + return (ESPIPE); return (error); - fvp = fromnd.ni_vp; - - NDINIT(&tond, CREATE, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART, - UIO_USERSPACE, uap->to, p); - if (error = namei(&tond)) { - vrele(fvp); - goto out1; } - tdvp = tond.ni_dvp; + if (vnode_isfifo(vp)) { + file_drop(uap->fd); + return(ESPIPE); + } + + + ctx = vfs_context_current(); +#if CONFIG_MACF + if (uap->whence == L_INCR && uap->offset == 0) + error = mac_file_check_get_offset(vfs_context_ucred(ctx), + fp->f_fglob); + else + error = mac_file_check_change_offset(vfs_context_ucred(ctx), + fp->f_fglob); + if (error) { + file_drop(uap->fd); + return (error); + } +#endif + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } + + switch (uap->whence) { + case L_INCR: + offset += fp->f_fglob->fg_offset; + break; + case L_XTND: + if ((error = vnode_size(vp, &file_size, ctx)) != 0) + break; + offset += file_size; + break; + case L_SET: + break; + default: + error = EINVAL; + } + if (error == 0) { + if (uap->offset > 0 && offset < 0) { + /* Incremented/relative move past max size */ + error = EOVERFLOW; + } else { + /* + * Allow negative offsets on character devices, per + * POSIX 1003.1-2001. Most likely for writing disk + * labels. + */ + if (offset < 0 && vp->v_type != VCHR) { + /* Decremented/relative move before start */ + error = EINVAL; + } else { + /* Success */ + fp->f_fglob->fg_offset = offset; + *retval = fp->f_fglob->fg_offset; + } + } + } + (void)vnode_put(vp); + file_drop(uap->fd); + return (error); +} + + +/* + * Check access permissions. + * + * Returns: 0 Success + * vnode_authorize:??? + */ +static int +access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx) +{ + kauth_action_t action; + int error; + + /* + * If just the regular access bits, convert them to something + * that vnode_authorize will understand. + */ + if (!(uflags & _ACCESS_EXTENDED_MASK)) { + action = 0; + if (uflags & R_OK) + action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */ + if (uflags & W_OK) { + if (vnode_isdir(vp)) { + action |= KAUTH_VNODE_ADD_FILE | + KAUTH_VNODE_ADD_SUBDIRECTORY; + /* might want delete rights here too */ + } else { + action |= KAUTH_VNODE_WRITE_DATA; + } + } + if (uflags & X_OK) { + if (vnode_isdir(vp)) { + action |= KAUTH_VNODE_SEARCH; + } else { + action |= KAUTH_VNODE_EXECUTE; + } + } + } else { + /* take advantage of definition of uflags */ + action = uflags >> 8; + } + +#if CONFIG_MACF + error = mac_vnode_check_access(ctx, vp, uflags); + if (error) + return (error); +#endif /* MAC */ + + /* action == 0 means only check for existence */ + if (action != 0) { + error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx); + } else { + error = 0; + } + + return(error); +} + + + +/* + * access_extended + * + * Description: uap->entries Pointer to argument descriptor + * uap->size Size of the area pointed to by + * the descriptor + * uap->results Pointer to the results array + * + * Returns: 0 Success + * ENOMEM Insufficient memory + * EINVAL Invalid arguments + * namei:EFAULT Bad address + * namei:ENAMETOOLONG Filename too long + * namei:ENOENT No such file or directory + * namei:ELOOP Too many levels of symbolic links + * namei:EBADF Bad file descriptor + * namei:ENOTDIR Not a directory + * namei:??? + * access1: + * + * Implicit returns: + * uap->results Array contents modified + * + * Notes: The uap->entries are structured as an arbitrary length array + * of accessx descriptors, followed by one or more NULL terniated + * strings + * + * struct accessx_descriptor[0] + * ... + * struct accessx_descriptor[n] + * char name_data[0]; + * + * We determine the entry count by walking the buffer containing + * the uap->entries argument descriptor. For each descrptor we + * see, the valid values for the offset ad_name_offset will be + * in the byte range: + * + * [ uap->entries + sizeof(struct accessx_descriptor) ] + * to + * [ uap->entries + uap->size - 2 ] + * + * since we must have at least one string, and the string must + * be at least one character plus the NUL terminator in length. + * + * XXX: Need to support the check-as uid argument + */ +int +access_extended(__unused proc_t p, struct access_extended_args *uap, __unused register_t *retval) +{ + struct accessx_descriptor *input = NULL; + errno_t *result = NULL; + errno_t error = 0; + int wantdelete = 0; + unsigned int desc_max, desc_actual, i, j; + struct vfs_context context; + struct nameidata nd; + int niopts; + vnode_t vp = NULL; + vnode_t dvp = NULL; +#define ACCESSX_MAX_DESCR_ON_STACK 10 + struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK]; + + context.vc_ucred = NULL; + + /* + * Validate parameters; if valid, copy the descriptor array and string + * arguments into local memory. Before proceeding, the following + * conditions must have been met: + * + * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE + * o There must be sufficient room in the request for at least one + * descriptor and a one yte NUL terminated string. + * o The allocation of local storage must not fail. + */ + if (uap->size > ACCESSX_MAX_TABLESIZE) + return(ENOMEM); + if (uap->size < (sizeof(struct accessx_descriptor) + 2)) + return(EINVAL); + if (uap->size <= sizeof (stack_input)) { + input = stack_input; + } else { + MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK); + if (input == NULL) { + error = ENOMEM; + goto out; + } + } + error = copyin(uap->entries, input, uap->size); + if (error) + goto out; + + /* + * Force NUL termination of the copyin buffer to avoid nami() running + * off the end. If the caller passes us bogus data, they may get a + * bogus result. + */ + ((char *)input)[uap->size - 1] = 0; + + /* + * Access is defined as checking against the process' real identity, + * even if operations are checking the effective identity. This + * requires that we use a local vfs context. + */ + context.vc_ucred = kauth_cred_copy_real(kauth_cred_get()); + context.vc_thread = current_thread(); + + /* + * Find out how many entries we have, so we can allocate the result + * array by walking the list and adjusting the count downward by the + * earliest string offset we see. + */ + desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor); + desc_actual = desc_max; + for (i = 0; i < desc_actual; i++) { + /* + * Take the offset to the name string for this entry and + * convert to an input array index, which would be one off + * the end of the array if this entry was the lowest-addressed + * name string. + */ + j = input[i].ad_name_offset / sizeof(struct accessx_descriptor); + + /* + * An offset greater than the max allowable offset is an error. + * It is also an error for any valid entry to point + * to a location prior to the end of the current entry, if + * it's not a reference to the string of the previous entry. + */ + if (j > desc_max || (j != 0 && j <= i)) { + error = EINVAL; + goto out; + } + + /* + * An offset of 0 means use the previous descriptor's offset; + * this is used to chain multiple requests for the same file + * to avoid multiple lookups. + */ + if (j == 0) { + /* This is not valid for the first entry */ + if (i == 0) { + error = EINVAL; + goto out; + } + continue; + } + + /* + * If the offset of the string for this descriptor is before + * what we believe is the current actual last descriptor, + * then we need to adjust our estimate downward; this permits + * the string table following the last descriptor to be out + * of order relative to the descriptor list. + */ + if (j < desc_actual) + desc_actual = j; + } + + /* + * We limit the actual number of descriptors we are willing to process + * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being + * requested does not exceed this limit, + */ + if (desc_actual > ACCESSX_MAX_DESCRIPTORS) { + error = ENOMEM; + goto out; + } + MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK); + if (result == NULL) { + error = ENOMEM; + goto out; + } + + /* + * Do the work by iterating over the descriptor entries we know to + * at least appear to contain valid data. + */ + error = 0; + for (i = 0; i < desc_actual; i++) { + /* + * If the ad_name_offset is 0, then we use the previous + * results to make the check; otherwise, we are looking up + * a new file name. + */ + if (input[i].ad_name_offset != 0) { + /* discard old vnodes */ + if (vp) { + vnode_put(vp); + vp = NULL; + } + if (dvp) { + vnode_put(dvp); + dvp = NULL; + } + + /* + * Scan forward in the descriptor list to see if we + * need the parent vnode. We will need it if we are + * deleting, since we must have rights to remove + * entries in the parent directory, as well as the + * rights to delete the object itself. + */ + wantdelete = input[i].ad_flags & _DELETE_OK; + for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++) + if (input[j].ad_flags & _DELETE_OK) + wantdelete = 1; + + niopts = FOLLOW | AUDITVNPATH1; + + /* need parent for vnode_authorize for deletion test */ + if (wantdelete) + niopts |= WANTPARENT; + + /* do the lookup */ + NDINIT(&nd, LOOKUP, niopts, UIO_SYSSPACE, CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset), &context); + error = namei(&nd); + if (!error) { + vp = nd.ni_vp; + if (wantdelete) + dvp = nd.ni_dvp; + } + nameidone(&nd); + } + + /* + * Handle lookup errors. + */ + switch(error) { + case ENOENT: + case EACCES: + case EPERM: + case ENOTDIR: + result[i] = error; + break; + case 0: + /* run this access check */ + result[i] = access1(vp, dvp, input[i].ad_flags, &context); + break; + default: + /* fatal lookup error */ + + goto out; + } + } + + /* copy out results */ + error = copyout(result, uap->results, desc_actual * sizeof(errno_t)); + +out: + if (input && input != stack_input) + FREE(input, M_TEMP); + if (result) + FREE(result, M_TEMP); + if (vp) + vnode_put(vp); + if (dvp) + vnode_put(dvp); + if (IS_VALID_CRED(context.vc_ucred)) + kauth_cred_unref(&context.vc_ucred); + return(error); +} + + +/* + * Returns: 0 Success + * namei:EFAULT Bad address + * namei:ENAMETOOLONG Filename too long + * namei:ENOENT No such file or directory + * namei:ELOOP Too many levels of symbolic links + * namei:EBADF Bad file descriptor + * namei:ENOTDIR Not a directory + * namei:??? + * access1: + */ +int +access(__unused proc_t p, struct access_args *uap, __unused register_t *retval) +{ + int error; + struct nameidata nd; + int niopts; + struct vfs_context context; + +#if NAMEDRSRCFORK + int is_namedstream = 0; +#endif + + /* + * Access is defined as checking against the process' + * real identity, even if operations are checking the + * effective identity. So we need to tweak the credential + * in the context. + */ + context.vc_ucred = kauth_cred_copy_real(kauth_cred_get()); + context.vc_thread = current_thread(); + + niopts = FOLLOW | AUDITVNPATH1; + /* need parent for vnode_authorize for deletion test */ + if (uap->flags & _DELETE_OK) + niopts |= WANTPARENT; + NDINIT(&nd, LOOKUP, niopts, UIO_USERSPACE, uap->path, &context); + +#if NAMEDRSRCFORK + /* access(F_OK) calls are allowed for resource forks. */ + if (uap->flags == F_OK) + nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; +#endif + error = namei(&nd); + if (error) + goto out; + +#if NAMEDRSRCFORK + /* Grab reference on the shadow stream file vnode to + * force an inactive on release which will mark it for + * recycle + */ + if (vnode_isnamedstream(nd.ni_vp) && + (nd.ni_vp->v_parent != NULLVP) && + ((nd.ni_vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0)) { + is_namedstream = 1; + vnode_ref(nd.ni_vp); + } +#endif + + error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context); + +#if NAMEDRSRCFORK + if (is_namedstream) { + vnode_rele(nd.ni_vp); + } +#endif + + vnode_put(nd.ni_vp); + if (uap->flags & _DELETE_OK) + vnode_put(nd.ni_dvp); + nameidone(&nd); + +out: + kauth_cred_unref(&context.vc_ucred); + return(error); +} + + +/* + * Returns: 0 Success + * EFAULT + * copyout:EFAULT + * namei:??? + * vn_stat:??? + */ +static int +stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64) +{ + struct stat sb; + struct stat64 sb64; + struct user_stat user_sb; + struct user_stat64 user_sb64; + caddr_t sbp; + int error, my_size; + kauth_filesec_t fsec; + size_t xsecurity_bufsize; + void * statptr; + +#if NAMEDRSRCFORK + int is_namedstream = 0; + /* stat calls are allowed for resource forks. */ + ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; +#endif + error = namei(ndp); + if (error) + return (error); + fsec = KAUTH_FILESEC_NONE; + if (isstat64 != 0) + statptr = (void *)&sb64; + else + statptr = (void *)&sb; + +#if NAMEDRSRCFORK + /* Grab reference on the shadow stream file vnode to + * force an inactive on release which will mark it for + * recycle. + */ + if (vnode_isnamedstream(ndp->ni_vp) && + (ndp->ni_vp->v_parent != NULLVP) && + ((ndp->ni_vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0)) { + is_namedstream = 1; + vnode_ref (ndp->ni_vp); + } +#endif + + error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx); + +#if NAMEDRSRCFORK + if (is_namedstream) { + vnode_rele (ndp->ni_vp); + } +#endif + + vnode_put(ndp->ni_vp); + nameidone(ndp); + + if (error) + return (error); + /* Zap spare fields */ + if (isstat64 != 0) { + sb64.st_lspare = 0; + sb64.st_qspare[0] = 0LL; + sb64.st_qspare[1] = 0LL; + if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) { + munge_stat64(&sb64, &user_sb64); + my_size = sizeof(user_sb64); + sbp = (caddr_t)&user_sb64; + } else { + my_size = sizeof(sb64); + sbp = (caddr_t)&sb64; + } + /* + * Check if we raced (post lookup) against the last unlink of a file. + */ + if ((sb64.st_nlink == 0) && S_ISREG(sb64.st_mode)) { + sb64.st_nlink = 1; + } + } else { + sb.st_lspare = 0; + sb.st_qspare[0] = 0LL; + sb.st_qspare[1] = 0LL; + if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) { + munge_stat(&sb, &user_sb); + my_size = sizeof(user_sb); + sbp = (caddr_t)&user_sb; + } else { + my_size = sizeof(sb); + sbp = (caddr_t)&sb; + } + + /* + * Check if we raced (post lookup) against the last unlink of a file. + */ + if ((sb.st_nlink == 0) && S_ISREG(sb.st_mode)) { + sb.st_nlink = 1; + } + } + if ((error = copyout(sbp, ub, my_size)) != 0) + goto out; + + /* caller wants extended security information? */ + if (xsecurity != USER_ADDR_NULL) { + + /* did we get any? */ + if (fsec == KAUTH_FILESEC_NONE) { + if (susize(xsecurity_size, 0) != 0) { + error = EFAULT; + goto out; + } + } else { + /* find the user buffer size */ + xsecurity_bufsize = fusize(xsecurity_size); + + /* copy out the actual data size */ + if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) { + error = EFAULT; + goto out; + } + + /* if the caller supplied enough room, copy out to it */ + if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) + error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec)); + } + } +out: + if (fsec != KAUTH_FILESEC_NONE) + kauth_filesec_free(fsec); + return (error); +} + +/* + * Get file status; this version follows links. + * + * Returns: 0 Success + * stat2:??? [see stat2() in this file] + */ +static int +stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64) +{ + struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); + + NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, path, ctx); + return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64)); +} + +int +stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused register_t *retval) +{ + return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0)); +} + +/* + * Returns: 0 Success + * stat1:??? [see stat1() in this file] + */ +int +stat(__unused proc_t p, struct stat_args *uap, __unused register_t *retval) +{ + return(stat1(uap->path, uap->ub, 0, 0, 0)); +} + +int +stat64(__unused proc_t p, struct stat64_args *uap, __unused register_t *retval) +{ + return(stat1(uap->path, uap->ub, 0, 0, 1)); +} + +int +stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused register_t *retval) +{ + return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1)); +} +/* + * Get file status; this version does not follow links. + */ +static int +lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64) +{ + struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); + + NDINIT(&nd, LOOKUP, NOTRIGGER | NOFOLLOW | AUDITVNPATH1, + UIO_USERSPACE, path, ctx); + + return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64)); +} + +int +lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused register_t *retval) +{ + return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0)); +} + +int +lstat(__unused proc_t p, struct lstat_args *uap, __unused register_t *retval) +{ + return(lstat1(uap->path, uap->ub, 0, 0, 0)); +} +int +lstat64(__unused proc_t p, struct lstat64_args *uap, __unused register_t *retval) +{ + return(lstat1(uap->path, uap->ub, 0, 0, 1)); +} + +int +lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused register_t *retval) +{ + return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1)); +} + +/* + * Get configurable pathname variables. + * + * Returns: 0 Success + * namei:??? + * vn_pathconf:??? + * + * Notes: Global implementation constants are intended to be + * implemented in this function directly; all other constants + * are per-FS implementation, and therefore must be handled in + * each respective FS, instead. + * + * XXX We implement some things globally right now that should actually be + * XXX per-FS; we will need to deal with this at some point. + */ +/* ARGSUSED */ +int +pathconf(__unused proc_t p, struct pathconf_args *uap, register_t *retval) +{ + int error; + struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); + + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = namei(&nd); + if (error) + return (error); + + error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx); + + vnode_put(nd.ni_vp); + nameidone(&nd); + return (error); +} + +/* + * Return target name of a symbolic link. + */ +/* ARGSUSED */ +int +readlink(proc_t p, struct readlink_args *uap, register_t *retval) +{ + vnode_t vp; + uio_t auio; + int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + int error; + struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); + char uio_buf[ UIO_SIZEOF(1) ]; + + NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = namei(&nd); + if (error) + return (error); + vp = nd.ni_vp; + + nameidone(&nd); + + auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->buf, uap->count); + if (vp->v_type != VLNK) + error = EINVAL; + else { +#if CONFIG_MACF + error = mac_vnode_check_readlink(ctx, + vp); +#endif + if (error == 0) + error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx); + if (error == 0) + error = VNOP_READLINK(vp, auio, ctx); + } + vnode_put(vp); + // LP64todo - fix this + *retval = uap->count - (int)uio_resid(auio); + return (error); +} + +/* + * Change file flags. + */ +static int +chflags1(vnode_t vp, int flags, vfs_context_t ctx) +{ + struct vnode_attr va; + kauth_action_t action; + int error; + + VATTR_INIT(&va); + VATTR_SET(&va, va_flags, flags); + +#if CONFIG_MACF + error = mac_vnode_check_setflags(ctx, vp, flags); + if (error) + goto out; +#endif + + /* request authorisation, disregard immutability */ + if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) + goto out; + /* + * Request that the auth layer disregard those file flags it's allowed to when + * authorizing this operation; we need to do this in order to be able to + * clear immutable flags. + */ + if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0)) + goto out; + error = vnode_setattr(vp, &va, ctx); + + if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) { + error = ENOTSUP; + } +out: + vnode_put(vp); + return(error); +} + +/* + * Change flags of a file given a path name. + */ +/* ARGSUSED */ +int +chflags(__unused proc_t p, struct chflags_args *uap, __unused register_t *retval) +{ + vnode_t vp; + vfs_context_t ctx = vfs_context_current(); + int error; + struct nameidata nd; + + AUDIT_ARG(fflags, uap->flags); + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = namei(&nd); + if (error) + return (error); + vp = nd.ni_vp; + nameidone(&nd); + + error = chflags1(vp, uap->flags, ctx); + + return(error); +} + +/* + * Change flags of a file given a file descriptor. + */ +/* ARGSUSED */ +int +fchflags(__unused proc_t p, struct fchflags_args *uap, __unused register_t *retval) +{ + vnode_t vp; + int error; + + AUDIT_ARG(fd, uap->fd); + AUDIT_ARG(fflags, uap->flags); + if ( (error = file_vnode(uap->fd, &vp)) ) + return (error); + + if ((error = vnode_getwithref(vp))) { + file_drop(uap->fd); + return(error); + } + + AUDIT_ARG(vnpath, vp, ARG_VNODE1); + + error = chflags1(vp, uap->flags, vfs_context_current()); + + file_drop(uap->fd); + return (error); +} + +/* + * Change security information on a filesystem object. + * + * Returns: 0 Success + * EPERM Operation not permitted + * vnode_authattr:??? [anything vnode_authattr can return] + * vnode_authorize:??? [anything vnode_authorize can return] + * vnode_setattr:??? [anything vnode_setattr can return] + * + * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be + * translated to EPERM before being returned. + */ +static int +chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap) +{ + kauth_action_t action; + int error; + + AUDIT_ARG(mode, (mode_t)vap->va_mode); +#warning XXX audit new args + +#if NAMEDSTREAMS + /* chmod calls are not allowed for resource forks. */ + if (vp->v_flag & VISNAMEDSTREAM) { + return (EPERM); + } +#endif + +#if CONFIG_MACF + error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode); + if (error) + return (error); +#endif + + /* make sure that the caller is allowed to set this security information */ + if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) || + ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) { + if (error == EACCES) + error = EPERM; + return(error); + } + + error = vnode_setattr(vp, vap, ctx); + + return (error); +} + + +/* + * Change mode of a file given path name. + * + * Returns: 0 Success + * namei:??? [anything namei can return] + * chmod2:??? [anything chmod2 can return] + */ +static int +chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap) +{ + struct nameidata nd; + int error; + + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, path, ctx); + if ((error = namei(&nd))) + return (error); + error = chmod2(ctx, nd.ni_vp, vap); + vnode_put(nd.ni_vp); + nameidone(&nd); + return(error); +} + +/* + * A chmod system call using an extended argument list compared to the regular + * system call 'mkfifo'. + * + * Parameters: p Process requesting the open + * uap User argument descriptor (see below) + * retval (ignored) + * + * Indirect: uap->path Path to object (same as 'chmod') + * uap->uid UID to set + * uap->gid GID to set + * uap->mode File mode to set (same as 'chmod') + * uap->xsecurity ACL to set (or delete) + * + * Returns: 0 Success + * !0 errno value + * + * Notes: The kauth_filesec_t in 'va', if any, is in host byte order. + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ +int +chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused register_t *retval) +{ + int error; + struct vnode_attr va; + kauth_filesec_t xsecdst; + + VATTR_INIT(&va); + if (uap->mode != -1) + VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); + if (uap->uid != KAUTH_UID_NONE) + VATTR_SET(&va, va_uid, uap->uid); + if (uap->gid != KAUTH_GID_NONE) + VATTR_SET(&va, va_gid, uap->gid); + + xsecdst = NULL; + switch(uap->xsecurity) { + /* explicit remove request */ + case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */ + VATTR_SET(&va, va_acl, NULL); + break; + /* not being set */ + case USER_ADDR_NULL: + break; + default: + if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) + return(error); + VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); + KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount); + } + + error = chmod1(vfs_context_current(), uap->path, &va); + + if (xsecdst != NULL) + kauth_filesec_free(xsecdst); + return(error); +} + +/* + * Returns: 0 Success + * chmod1:??? [anything chmod1 can return] + */ +int +chmod(__unused proc_t p, struct chmod_args *uap, __unused register_t *retval) +{ + struct vnode_attr va; + + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); + + return(chmod1(vfs_context_current(), uap->path, &va)); +} + +/* + * Change mode of a file given a file descriptor. + */ +static int +fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap) +{ + vnode_t vp; + int error; + + AUDIT_ARG(fd, fd); + + if ((error = file_vnode(fd, &vp)) != 0) + return (error); + if ((error = vnode_getwithref(vp)) != 0) { + file_drop(fd); + return(error); + } + AUDIT_ARG(vnpath, vp, ARG_VNODE1); + + error = chmod2(vfs_context_current(), vp, vap); + (void)vnode_put(vp); + file_drop(fd); + + return (error); +} + +int +fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused register_t *retval) +{ + int error; + struct vnode_attr va; + kauth_filesec_t xsecdst; + + VATTR_INIT(&va); + if (uap->mode != -1) + VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); + if (uap->uid != KAUTH_UID_NONE) + VATTR_SET(&va, va_uid, uap->uid); + if (uap->gid != KAUTH_GID_NONE) + VATTR_SET(&va, va_gid, uap->gid); + + xsecdst = NULL; + switch(uap->xsecurity) { + case USER_ADDR_NULL: + VATTR_SET(&va, va_acl, NULL); + break; + case CAST_USER_ADDR_T(-1): + break; + default: + if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) + return(error); + VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); + } + + error = fchmod1(p, uap->fd, &va); + + + switch(uap->xsecurity) { + case USER_ADDR_NULL: + case CAST_USER_ADDR_T(-1): + break; + default: + if (xsecdst != NULL) + kauth_filesec_free(xsecdst); + } + return(error); +} + +int +fchmod(proc_t p, struct fchmod_args *uap, __unused register_t *retval) +{ + struct vnode_attr va; + + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); + + return(fchmod1(p, uap->fd, &va)); +} + + +/* + * Set ownership given a path name. + */ +/* ARGSUSED */ +static int +chown1(vfs_context_t ctx, struct chown_args *uap, __unused register_t *retval, int follow) +{ + vnode_t vp; + struct vnode_attr va; + int error; + struct nameidata nd; + kauth_action_t action; + + AUDIT_ARG(owner, uap->uid, uap->gid); + + NDINIT(&nd, LOOKUP, (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = namei(&nd); + if (error) + return (error); + vp = nd.ni_vp; + + nameidone(&nd); + + VATTR_INIT(&va); + if (uap->uid != VNOVAL) + VATTR_SET(&va, va_uid, uap->uid); + if (uap->gid != VNOVAL) + VATTR_SET(&va, va_gid, uap->gid); + +#if CONFIG_MACF + error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid); + if (error) + goto out; +#endif + + /* preflight and authorize attribute changes */ + if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) + goto out; + if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) + goto out; + error = vnode_setattr(vp, &va, ctx); + +out: + /* + * EACCES is only allowed from namei(); permissions failure should + * return EPERM, so we need to translate the error code. + */ + if (error == EACCES) + error = EPERM; + + vnode_put(vp); + return (error); +} + +int +chown(__unused proc_t p, struct chown_args *uap, register_t *retval) +{ + return chown1(vfs_context_current(), uap, retval, 1); +} + +int +lchown(__unused proc_t p, struct lchown_args *uap, register_t *retval) +{ + /* Argument list identical, but machine generated; cast for chown1() */ + return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0); +} + +/* + * Set ownership given a file descriptor. + */ +/* ARGSUSED */ +int +fchown(__unused proc_t p, struct fchown_args *uap, __unused register_t *retval) +{ + struct vnode_attr va; + vfs_context_t ctx = vfs_context_current(); + vnode_t vp; + int error; + kauth_action_t action; + + AUDIT_ARG(owner, uap->uid, uap->gid); + AUDIT_ARG(fd, uap->fd); + + if ( (error = file_vnode(uap->fd, &vp)) ) + return (error); + + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } + AUDIT_ARG(vnpath, vp, ARG_VNODE1); + + VATTR_INIT(&va); + if (uap->uid != VNOVAL) + VATTR_SET(&va, va_uid, uap->uid); + if (uap->gid != VNOVAL) + VATTR_SET(&va, va_gid, uap->gid); + +#if NAMEDSTREAMS + /* chown calls are not allowed for resource forks. */ + if (vp->v_flag & VISNAMEDSTREAM) { + error = EPERM; + goto out; + } +#endif + +#if CONFIG_MACF + error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid); + if (error) + goto out; +#endif + + /* preflight and authorize attribute changes */ + if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) + goto out; + if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) { + if (error == EACCES) + error = EPERM; + goto out; + } + error = vnode_setattr(vp, &va, ctx); + +out: + (void)vnode_put(vp); + file_drop(uap->fd); + return (error); +} + +static int +getutimes(user_addr_t usrtvp, struct timespec *tsp) +{ + struct user_timeval tv[2]; + int error; + + if (usrtvp == USER_ADDR_NULL) { + struct timeval old_tv; + /* XXX Y2038 bug because of microtime argument */ + microtime(&old_tv); + TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]); + tsp[1] = tsp[0]; + } else { + if (IS_64BIT_PROCESS(current_proc())) { + error = copyin(usrtvp, (void *)tv, sizeof(tv)); + } else { + struct timeval old_tv[2]; + error = copyin(usrtvp, (void *)old_tv, sizeof(old_tv)); + tv[0].tv_sec = old_tv[0].tv_sec; + tv[0].tv_usec = old_tv[0].tv_usec; + tv[1].tv_sec = old_tv[1].tv_sec; + tv[1].tv_usec = old_tv[1].tv_usec; + } + if (error) + return (error); + TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); + TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]); + } + return 0; +} + +static int +setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, + int nullflag) +{ + int error; + struct vnode_attr va; + kauth_action_t action; + + AUDIT_ARG(vnpath, vp, ARG_VNODE1); + + VATTR_INIT(&va); + VATTR_SET(&va, va_access_time, ts[0]); + VATTR_SET(&va, va_modify_time, ts[1]); + if (nullflag) + va.va_vaflags |= VA_UTIMES_NULL; + +#if NAMEDSTREAMS + /* utimes calls are not allowed for resource forks. */ + if (vp->v_flag & VISNAMEDSTREAM) { + error = EPERM; + goto out; + } +#endif + +#if CONFIG_MACF + error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]); + if (error) + goto out; +#endif + if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) { + if (!nullflag && error == EACCES) + error = EPERM; + goto out; + } + + /* since we may not need to auth anything, check here */ + if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) { + if (!nullflag && error == EACCES) + error = EPERM; + goto out; + } + error = vnode_setattr(vp, &va, ctx); + +out: + return error; +} + +/* + * Set the access and modification times of a file. + */ +/* ARGSUSED */ +int +utimes(__unused proc_t p, struct utimes_args *uap, __unused register_t *retval) +{ + struct timespec ts[2]; + user_addr_t usrtvp; + int error; + struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); + + /* + * AUDIT: Needed to change the order of operations to do the + * name lookup first because auditing wants the path. + */ + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = namei(&nd); + if (error) + return (error); + nameidone(&nd); + + /* + * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch + * the current time instead. + */ + usrtvp = uap->tptr; + if ((error = getutimes(usrtvp, ts)) != 0) + goto out; + + error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL); + +out: + vnode_put(nd.ni_vp); + return (error); +} + +/* + * Set the access and modification times of a file. + */ +/* ARGSUSED */ +int +futimes(__unused proc_t p, struct futimes_args *uap, __unused register_t *retval) +{ + struct timespec ts[2]; + vnode_t vp; + user_addr_t usrtvp; + int error; + + AUDIT_ARG(fd, uap->fd); + usrtvp = uap->tptr; + if ((error = getutimes(usrtvp, ts)) != 0) + return (error); + if ((error = file_vnode(uap->fd, &vp)) != 0) + return (error); + if((error = vnode_getwithref(vp))) { + file_drop(uap->fd); + return(error); + } + + error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0); + vnode_put(vp); + file_drop(uap->fd); + return(error); +} + +/* + * Truncate a file given its path name. + */ +/* ARGSUSED */ +int +truncate(__unused proc_t p, struct truncate_args *uap, __unused register_t *retval) +{ + vnode_t vp; + struct vnode_attr va; + vfs_context_t ctx = vfs_context_current(); + int error; + struct nameidata nd; + kauth_action_t action; + + if (uap->length < 0) + return(EINVAL); + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + if ((error = namei(&nd))) + return (error); + vp = nd.ni_vp; + + nameidone(&nd); + + VATTR_INIT(&va); + VATTR_SET(&va, va_data_size, uap->length); + +#if CONFIG_MACF + error = mac_vnode_check_truncate(ctx, NOCRED, vp); + if (error) + goto out; +#endif + + if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) + goto out; + if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) + goto out; + error = vnode_setattr(vp, &va, ctx); +out: + vnode_put(vp); + return (error); +} + +/* + * Truncate a file given a file descriptor. + */ +/* ARGSUSED */ +int +ftruncate(proc_t p, struct ftruncate_args *uap, register_t *retval) +{ + vfs_context_t ctx = vfs_context_current(); + struct vnode_attr va; + vnode_t vp; + struct fileproc *fp; + int error ; + int fd = uap->fd; + + AUDIT_ARG(fd, uap->fd); + if (uap->length < 0) + return(EINVAL); + + if ( (error = fp_lookup(p,fd,&fp,0)) ) { + return(error); + } + + if (fp->f_fglob->fg_type == DTYPE_PSXSHM) { + error = pshm_truncate(p, fp, uap->fd, uap->length, retval); + goto out; + } + if (fp->f_fglob->fg_type != DTYPE_VNODE) { + error = EINVAL; + goto out; + } + + vp = (vnode_t)fp->f_fglob->fg_data; + + if ((fp->f_fglob->fg_flag & FWRITE) == 0) { + AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); + error = EINVAL; + goto out; + } + + if ((error = vnode_getwithref(vp)) != 0) { + goto out; + } + + AUDIT_ARG(vnpath, vp, ARG_VNODE1); + +#if CONFIG_MACF + error = mac_vnode_check_truncate(ctx, + fp->f_fglob->fg_cred, vp); + if (error) { + (void)vnode_put(vp); + goto out; + } +#endif + VATTR_INIT(&va); + VATTR_SET(&va, va_data_size, uap->length); + error = vnode_setattr(vp, &va, ctx); + (void)vnode_put(vp); +out: + file_drop(fd); + return (error); +} + + +/* + * Sync an open file. + */ +/* ARGSUSED */ +int +fsync(proc_t p, struct fsync_args *uap, register_t *retval) +{ + __pthread_testcancel(1); + return(fsync_nocancel(p, (struct fsync_nocancel_args *)uap, retval)); +} + +int +fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused register_t *retval) +{ + vnode_t vp; + struct fileproc *fp; + vfs_context_t ctx = vfs_context_current(); + int error; + + if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) ) + return (error); + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } + + error = VNOP_FSYNC(vp, MNT_WAIT, ctx); + +#if NAMEDRSRCFORK + /* Sync resource fork shadow file if necessary. */ + if ((error == 0) && + (vp->v_flag & VISNAMEDSTREAM) && + (vp->v_parent != NULLVP) && + !(vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) && + (fp->f_flags & FP_WRITTEN)) { + (void) vnode_flushnamedstream(vp->v_parent, vp, ctx); + } +#endif + + (void)vnode_put(vp); + file_drop(uap->fd); + return (error); +} + +/* + * Duplicate files. Source must be a file, target must be a file or + * must not exist. + * + * XXX Copyfile authorisation checking is woefully inadequate, and will not + * perform inheritance correctly. + */ +/* ARGSUSED */ +int +copyfile(__unused proc_t p, struct copyfile_args *uap, __unused register_t *retval) +{ + vnode_t tvp, fvp, tdvp, sdvp; + struct nameidata fromnd, tond; + int error; + vfs_context_t ctx = vfs_context_current(); + + /* Check that the flags are valid. */ + + if (uap->flags & ~CPF_MASK) { + return(EINVAL); + } + + NDINIT(&fromnd, LOOKUP, SAVESTART | AUDITVNPATH1, + UIO_USERSPACE, uap->from, ctx); + if ((error = namei(&fromnd))) + return (error); + fvp = fromnd.ni_vp; + + NDINIT(&tond, CREATE, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK, + UIO_USERSPACE, uap->to, ctx); + if ((error = namei(&tond))) { + goto out1; + } + tdvp = tond.ni_dvp; tvp = tond.ni_vp; + if (tvp != NULL) { if (!(uap->flags & CPF_OVERWRITE)) { error = EEXIST; goto out; } } - if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) { error = EISDIR; goto out; } - if (error = VOP_ACCESS(tdvp, VWRITE, cred, p)) + if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) goto out; if (fvp == tdvp) @@ -2229,1006 +4663,1352 @@ copyfile(p, uap, retval) */ if (fvp == tvp) error = -1; + if (!error) + error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx); out: - if (!error) { - error = VOP_COPYFILE(fvp,tdvp,tvp,&tond.ni_cnd,uap->mode,uap->flags); - } else { - VOP_ABORTOP(tdvp, &tond.ni_cnd); - if (tdvp == tvp) - vrele(tdvp); - else - vput(tdvp); - if (tvp) - vput(tvp); - vrele(fvp); - } - vrele(tond.ni_startdir); - FREE_ZONE(tond.ni_cnd.cn_pnbuf, tond.ni_cnd.cn_pnlen, M_NAMEI); + sdvp = tond.ni_startdir; + /* + * nameidone has to happen before we vnode_put(tdvp) + * since it may need to release the fs_nodelock on the tdvp + */ + nameidone(&tond); + + if (tvp) + vnode_put(tvp); + vnode_put(tdvp); + vnode_put(sdvp); out1: + vnode_put(fvp); + if (fromnd.ni_startdir) - vrele(fromnd.ni_startdir); - FREE_ZONE(fromnd.ni_cnd.cn_pnbuf, fromnd.ni_cnd.cn_pnlen, M_NAMEI); + vnode_put(fromnd.ni_startdir); + nameidone(&fromnd); + if (error == -1) return (0); return (error); } + /* * Rename files. Source and destination must either both be directories, * or both not be directories. If target is a directory, it must be empty. */ -struct rename_args { - char *from; - char *to; -}; /* ARGSUSED */ int -rename(p, uap, retval) - struct proc *p; - register struct rename_args *uap; - register_t *retval; +rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval) { - register struct vnode *tvp, *fvp, *tdvp; + vnode_t tvp, tdvp; + vnode_t fvp, fdvp; struct nameidata fromnd, tond; + vfs_context_t ctx = vfs_context_current(); int error; + int do_retry; int mntrename; - int casesense,casepres; - + int need_event; + const char *oname; + char *from_name = NULL, *to_name = NULL; + int from_len, to_len; + int holding_mntlock; + mount_t locked_mp = NULL; + vnode_t oparent; + fse_info from_finfo, to_finfo; + + holding_mntlock = 0; + do_retry = 0; +retry: + fvp = tvp = NULL; + fdvp = tdvp = NULL; mntrename = FALSE; - NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE, - uap->from, p); - if (error = namei(&fromnd)) - return (error); - fvp = fromnd.ni_vp; + NDINIT(&fromnd, DELETE, WANTPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->from, ctx); + + if ( (error = namei(&fromnd)) ) + goto out1; + fdvp = fromnd.ni_dvp; + fvp = fromnd.ni_vp; - NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART, - UIO_USERSPACE, uap->to, p); - if (error = namei(&tond)) { - VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); - goto out2; +#if CONFIG_MACF + error = mac_vnode_check_rename_from(ctx, fdvp, fvp, &fromnd.ni_cnd); + if (error) + goto out1; +#endif + + NDINIT(&tond, RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK , UIO_USERSPACE, uap->to, ctx); + if (fvp->v_type == VDIR) + tond.ni_cnd.cn_flags |= WILLBEDIR; + + if ( (error = namei(&tond)) ) { + /* + * Translate error code for rename("dir1", "dir2/."). + */ + if (error == EISDIR && fvp->v_type == VDIR) + error = EINVAL; + goto out1; } tdvp = tond.ni_dvp; - tvp = tond.ni_vp; + tvp = tond.ni_vp; + +#if CONFIG_MACF + error = mac_vnode_check_rename_to(ctx, + tdvp, tvp, fdvp == tdvp, &tond.ni_cnd); + if (error) + goto out1; +#endif if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR; - goto out; + goto out1; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = EISDIR; - goto out; + goto out1; + } + } + if (fvp == tdvp) { + error = EINVAL; + goto out1; + } + /* + * If the source and destination are the same (i.e. they're + * links to the same vnode) and the target file system is + * case sensitive, then there is nothing to do. + */ + if (fvp == tvp) { + int pathconf_val; + + /* + * Note: if _PC_CASE_SENSITIVE selector isn't supported, + * then assume that this file system is case sensitive. + */ + if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 || + pathconf_val != 0) { + goto out1; + } + } + + /* + * Authorization. + * + * If tvp is a directory and not the same as fdvp, or tdvp is not + * the same as fdvp, the node is moving between directories and we + * need rights to remove from the old and add to the new. + * + * If tvp already exists and is not a directory, we need to be + * allowed to delete it. + * + * Note that we do not inherit when renaming. + * + * XXX This needs to be revisited to implement the deferred-inherit bit + */ + { + int moving = 0; + + error = 0; + if ((tvp != NULL) && vnode_isdir(tvp)) { + if (tvp != fdvp) + moving = 1; + } else if (tdvp != fdvp) { + moving = 1; + } + /* + * must have delete rights to remove the old name even in + * the simple case of fdvp == tdvp. + * + * If fvp is a directory, and we are changing it's parent, + * then we also need rights to rewrite its ".." entry as well. + */ + if (vnode_isdir(fvp)) { + if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0) + goto auth_exit; + } else { + if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx)) != 0) + goto auth_exit; + } + if (moving) { + /* moving into tdvp or tvp, must have rights to add */ + if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp, + NULL, + vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, + ctx)) != 0) { + /* + * We could encounter a race where after doing the namei, tvp stops + * being valid. If so, simply re-drive the rename call from the + * top. + */ + if (error == ENOENT) { + do_retry = 1; + } + goto auth_exit; + } + } else { + /* node staying in same directory, must be allowed to add new name */ + if ((error = vnode_authorize(fdvp, NULL, + vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, ctx)) != 0) + goto auth_exit; + } + /* overwriting tvp */ + if ((tvp != NULL) && !vnode_isdir(tvp) && + ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0)) { + /* + * We could encounter a race where after doing the namei, tvp stops + * being valid. If so, simply re-drive the rename call from the + * top. + */ + if (error == ENOENT) { + do_retry = 1; + } + goto auth_exit; + } + + /* XXX more checks? */ + +auth_exit: + /* authorization denied */ + if (error != 0) + goto out1; + } + /* + * Allow the renaming of mount points. + * - target must not exist + * - target must reside in the same directory as source + * - union mounts cannot be renamed + * - "/" cannot be renamed + */ + if ((fvp->v_flag & VROOT) && + (fvp->v_type == VDIR) && + (tvp == NULL) && + (fvp->v_mountedhere == NULL) && + (fdvp == tdvp) && + ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) && + (fvp->v_mount->mnt_vnodecovered != NULLVP)) { + vnode_t coveredvp; + + /* switch fvp to the covered vnode */ + coveredvp = fvp->v_mount->mnt_vnodecovered; + if ( (vnode_getwithref(coveredvp)) ) { + error = ENOENT; + goto out1; + } + vnode_put(fvp); + + fvp = coveredvp; + mntrename = TRUE; + } + /* + * Check for cross-device rename. + */ + if ((fvp->v_mount != tdvp->v_mount) || + (tvp && (fvp->v_mount != tvp->v_mount))) { + error = EXDEV; + goto out1; + } + /* + * Avoid renaming "." and "..". + */ + if (fvp->v_type == VDIR && + ((fdvp == fvp) || + (fromnd.ni_cnd.cn_namelen == 1 && fromnd.ni_cnd.cn_nameptr[0] == '.') || + ((fromnd.ni_cnd.cn_flags | tond.ni_cnd.cn_flags) & ISDOTDOT)) ) { + error = EINVAL; + goto out1; + } + /* + * The following edge case is caught here: + * (to cannot be a descendent of from) + * + * o fdvp + * / + * / + * o fvp + * \ + * \ + * o tdvp + * / + * / + * o tvp + */ + if (tdvp->v_parent == fvp) { + error = EINVAL; + goto out1; + } + + /* + * If source is the same as the destination (that is the + * same inode number) then there is nothing to do... + * EXCEPT if the underlying file system supports case + * insensitivity and is case preserving. In this case + * the file system needs to handle the special case of + * getting the same vnode as target (fvp) and source (tvp). + * + * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE + * and _PC_CASE_PRESERVING can have this exception, and they need to + * handle the special case of getting the same vnode as target and + * source. NOTE: Then the target is unlocked going into vnop_rename, + * so not to cause locking problems. There is a single reference on tvp. + * + * NOTE - that fvp == tvp also occurs if they are hard linked - NOTE + * that correct behaviour then is just to remove the source (link) + */ + if (fvp == tvp && fdvp == tdvp) { + if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && + !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, + fromnd.ni_cnd.cn_namelen)) { + goto out1; + } + } + + if (holding_mntlock && fvp->v_mount != locked_mp) { + /* + * we're holding a reference and lock + * on locked_mp, but it no longer matches + * what we want to do... so drop our hold + */ + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + holding_mntlock = 0; + } + if (tdvp != fdvp && fvp->v_type == VDIR) { + /* + * serialize renames that re-shape + * the tree... if holding_mntlock is + * set, then we're ready to go... + * otherwise we + * first need to drop the iocounts + * we picked up, second take the + * lock to serialize the access, + * then finally start the lookup + * process over with the lock held + */ + if (!holding_mntlock) { + /* + * need to grab a reference on + * the mount point before we + * drop all the iocounts... once + * the iocounts are gone, the mount + * could follow + */ + locked_mp = fvp->v_mount; + mount_ref(locked_mp, 0); + + /* + * nameidone has to happen before we vnode_put(tvp) + * since it may need to release the fs_nodelock on the tvp + */ + nameidone(&tond); + + if (tvp) + vnode_put(tvp); + vnode_put(tdvp); + + /* + * nameidone has to happen before we vnode_put(fdvp) + * since it may need to release the fs_nodelock on the fvp + */ + nameidone(&fromnd); + + vnode_put(fvp); + vnode_put(fdvp); + + mount_lock_renames(locked_mp); + holding_mntlock = 1; + + goto retry; + } + } else { + /* + * when we dropped the iocounts to take + * the lock, we allowed the identity of + * the various vnodes to change... if they did, + * we may no longer be dealing with a rename + * that reshapes the tree... once we're holding + * the iocounts, the vnodes can't change type + * so we're free to drop the lock at this point + * and continue on + */ + if (holding_mntlock) { + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + holding_mntlock = 0; + } + } + // save these off so we can later verify that fvp is the same + oname = fvp->v_name; + oparent = fvp->v_parent; + +#if CONFIG_FSE + need_event = need_fsevent(FSE_RENAME, fvp); + if (need_event) { + get_fse_info(fvp, &from_finfo, ctx); + + if (tvp) { + get_fse_info(tvp, &to_finfo, ctx); + } + } +#else + need_event = 0; +#endif /* CONFIG_FSE */ + + if (need_event || kauth_authorize_fileop_has_listeners()) { + GET_PATH(from_name); + if (from_name == NULL) { + error = ENOMEM; + goto out1; + } + from_len = MAXPATHLEN; + vn_getpath(fdvp, from_name, &from_len); + if ((from_len + 1 + fromnd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) { + if (from_len > 2) { + from_name[from_len-1] = '/'; + } else { + from_len--; + } + strlcpy(&from_name[from_len], fromnd.ni_cnd.cn_nameptr, MAXPATHLEN-from_len); + from_len += fromnd.ni_cnd.cn_namelen + 1; + from_name[from_len] = '\0'; + } + + GET_PATH(to_name); + if (to_name == NULL) { + error = ENOMEM; + goto out1; + } + + to_len = MAXPATHLEN; + vn_getpath(tdvp, to_name, &to_len); + // if the path is not just "/", then append a "/" + if ((to_len + 1 + tond.ni_cnd.cn_namelen + 1) < MAXPATHLEN) { + if (to_len > 2) { + to_name[to_len-1] = '/'; + } else { + to_len--; + } + strlcpy(&to_name[to_len], tond.ni_cnd.cn_nameptr, MAXPATHLEN-to_len); + to_len += tond.ni_cnd.cn_namelen + 1; + to_name[to_len] = '\0'; + } + } + + error = VNOP_RENAME(fdvp, fvp, &fromnd.ni_cnd, + tdvp, tvp, &tond.ni_cnd, + ctx); + + if (holding_mntlock) { + /* + * we can drop our serialization + * lock now + */ + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + holding_mntlock = 0; + } + if (error) { + /* + * We may encounter a race in the VNOP where the destination didn't + * exist when we did the namei, but it does by the time we go and + * try to create the entry. In this case, we should re-drive this rename + * call from the top again. + */ + if (error == EEXIST) { + do_retry = 1; + } + + goto out1; + } + + /* call out to allow 3rd party notification of rename. + * Ignore result of kauth_authorize_fileop call. + */ + kauth_authorize_fileop(vfs_context_ucred(ctx), + KAUTH_FILEOP_RENAME, + (uintptr_t)from_name, (uintptr_t)to_name); + +#if CONFIG_FSE + if (from_name != NULL && to_name != NULL) { + if (tvp) { + add_fsevent(FSE_RENAME, ctx, + FSE_ARG_STRING, from_len, from_name, + FSE_ARG_FINFO, &from_finfo, + FSE_ARG_STRING, to_len, to_name, + FSE_ARG_FINFO, &to_finfo, + FSE_ARG_DONE); + } else { + add_fsevent(FSE_RENAME, ctx, + FSE_ARG_STRING, from_len, from_name, + FSE_ARG_FINFO, &from_finfo, + FSE_ARG_STRING, to_len, to_name, + FSE_ARG_DONE); + } + } +#endif /* CONFIG_FSE */ + + /* + * update filesystem's mount point data + */ + if (mntrename) { + char *cp, *pathend, *mpname; + char * tobuf; + struct mount *mp; + int maxlen; + size_t len = 0; + + mp = fvp->v_mountedhere; + + if (vfs_busy(mp, LK_NOWAIT)) { + error = EBUSY; + goto out1; } + MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + + error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len); + if (!error) { + /* find current mount point prefix */ + pathend = &mp->mnt_vfsstat.f_mntonname[0]; + for (cp = pathend; *cp != '\0'; ++cp) { + if (*cp == '/') + pathend = cp + 1; + } + /* find last component of target name */ + for (mpname = cp = tobuf; *cp != '\0'; ++cp) { + if (*cp == '/') + mpname = cp + 1; + } + /* append name to prefix */ + maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname); + bzero(pathend, maxlen); + strlcpy(pathend, mpname, maxlen); + } + FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI); + + vfs_unbusy(mp); } - if (fvp == tdvp) - error = EINVAL; /* - * If source is the same as the destination (that is the - * same inode number) then there is nothing to do... - * EXCEPT if the - * underlyning file system supports case insensitivity and is case preserving. Then - * a special case is made, i.e. foo -> Foo. - * - * Only file systems that support the pathconf selectors _PC_CASE_SENSITIVE and - * _PC_CASE_PRESERVING can have this exception, and then they would need to - * handle the special case of getting the same vnode as target and source. - * NOTE: Then the target is unlocked going into VOP_RENAME, so not to cause - * locking problems. There is a single reference on tvp. + * fix up name & parent pointers. note that we first + * check that fvp has the same name/parent pointers it + * had before the rename call... this is a 'weak' check + * at best... */ - if (fvp == tvp) { - error = -1; - /* - * Check to see if just changing case, if: - * - file system is case insensitive - * - and also case preserving - * _ same parent directories (so changing case by different links is not supported) - * For instance: mv a/foo a/Foo - */ - if ((tond.ni_dvp == fromnd.ni_dvp) && - (VOP_PATHCONF(tdvp, _PC_CASE_SENSITIVE, &casesense) == 0) && - (VOP_PATHCONF(tdvp, _PC_CASE_PRESERVING, &casepres) == 0) && - (casesense == 0) && - (casepres == 1)) { - /* Since the target is locked...unlock it and lose a ref */ - vput(tvp); - error = 0; - } + if (oname == fvp->v_name && oparent == fvp->v_parent) { + int update_flags; + + update_flags = VNODE_UPDATE_NAME; + + if (fdvp != tdvp) + update_flags |= VNODE_UPDATE_PARENT; + + vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags); } - - /* - * Allow the renaming of mount points. - * - target must not exist - * - target must reside in the same directory as source - * - union mounts cannot be renamed - * - "/" cannot be renamed - */ - if ((fvp->v_flag & VROOT) && - (fvp->v_type == VDIR) && - (tvp == NULL) && - (fvp->v_mountedhere == NULL) && - (fromnd.ni_dvp == tond.ni_dvp) && - ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) && - (fvp->v_mount->mnt_vnodecovered != NULLVP)) { - - /* switch fvp to the covered vnode */ - fromnd.ni_vp = fvp->v_mount->mnt_vnodecovered; - vrele(fvp); - fvp = fromnd.ni_vp; - VREF(fvp); - mntrename = TRUE; +out1: + if (to_name != NULL) { + RELEASE_PATH(to_name); + to_name = NULL; } -out: - if (!error) { - VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE); - if (fromnd.ni_dvp != tdvp) - VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - if (tvp) - VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE); - error = VOP_RENAME(fromnd.ni_dvp, fvp, &fromnd.ni_cnd, - tond.ni_dvp, tvp, &tond.ni_cnd); - if (error) - goto out1; - + if (from_name != NULL) { + RELEASE_PATH(from_name); + from_name = NULL; + } + if (holding_mntlock) { + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + holding_mntlock = 0; + } + if (tdvp) { /* - * update filesystem's mount point data + * nameidone has to happen before we vnode_put(tdvp) + * since it may need to release the fs_nodelock on the tdvp */ - if (mntrename) { - char *cp, *pathend, *mpname; - char * tobuf; - struct mount *mp; - int maxlen; - size_t len = 0; - - VREF(fvp); - vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); - mp = fvp->v_mountedhere; - - if (vfs_busy(mp, LK_NOWAIT, 0, p)) { - vput(fvp); - error = EBUSY; - goto out1; - } - VOP_UNLOCK(fvp, 0, p); - - MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len); - if (!error) { - /* find current mount point prefix */ - pathend = &mp->mnt_stat.f_mntonname[0]; - for (cp = pathend; *cp != '\0'; ++cp) { - if (*cp == '/') - pathend = cp + 1; - } - /* find last component of target name */ - for (mpname = cp = tobuf; *cp != '\0'; ++cp) { - if (*cp == '/') - mpname = cp + 1; - } - /* append name to prefix */ - maxlen = MNAMELEN - (pathend - mp->mnt_stat.f_mntonname); - bzero(pathend, maxlen); - strncpy(pathend, mpname, maxlen - 1); - } - FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI); + nameidone(&tond); - vrele(fvp); - vfs_unbusy(mp, p); - } - } else { - VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); - if (tdvp == tvp) - vrele(tdvp); - else - vput(tdvp); if (tvp) - vput(tvp); - VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); + vnode_put(tvp); + vnode_put(tdvp); } -out1: - vrele(tond.ni_startdir); - FREE_ZONE(tond.ni_cnd.cn_pnbuf, tond.ni_cnd.cn_pnlen, M_NAMEI); -out2: - if (fromnd.ni_startdir) - vrele(fromnd.ni_startdir); - FREE_ZONE(fromnd.ni_cnd.cn_pnbuf, fromnd.ni_cnd.cn_pnlen, M_NAMEI); - if (error == -1) - return (0); + if (fdvp) { + /* + * nameidone has to happen before we vnode_put(fdvp) + * since it may need to release the fs_nodelock on the fdvp + */ + nameidone(&fromnd); + + if (fvp) + vnode_put(fvp); + vnode_put(fdvp); + } + + /* + * If things changed after we did the namei, then we will re-drive + * this rename call from the top. + */ + if(do_retry) { + do_retry = 0; + goto retry; + } + return (error); } /* * Make a directory file. + * + * Returns: 0 Success + * EEXIST + * namei:??? + * vnode_authorize:??? + * vn_create:??? */ -struct mkdir_args { - char *path; - int mode; -}; /* ARGSUSED */ -int -mkdir(p, uap, retval) - struct proc *p; - register struct mkdir_args *uap; - register_t *retval; +static int +mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap) { - register struct vnode *vp; - struct vattr vattr; + vnode_t vp, dvp; int error; + int update_flags = 0; struct nameidata nd; - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p); - if (error = namei(&nd)) + AUDIT_ARG(mode, vap->va_mode); + NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, + UIO_USERSPACE, path, ctx); + nd.ni_cnd.cn_flags |= WILLBEDIR; + error = namei(&nd); + if (error) return (error); + dvp = nd.ni_dvp; vp = nd.ni_vp; - if (vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(vp); - return (EEXIST); - } - VATTR_NULL(&vattr); - vattr.va_type = VDIR; - vattr.va_mode = (uap->mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); - if (!error) - vput(nd.ni_vp); + + if (vp != NULL) { + error = EEXIST; + goto out; + } + + VATTR_SET(vap, va_type, VDIR); + +#if CONFIG_MACF + error = mac_vnode_check_create(ctx, + nd.ni_dvp, &nd.ni_cnd, vap); + if (error) + goto out; +#endif + + /* authorize addition of a directory to the parent */ + if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0) + goto out; + + + /* make the directory */ + if ((error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx)) != 0) + goto out; + + // Make sure the name & parent pointers are hooked up + if (vp->v_name == NULL) + update_flags |= VNODE_UPDATE_NAME; + if (vp->v_parent == NULLVP) + update_flags |= VNODE_UPDATE_PARENT; + + if (update_flags) + vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags); + +#if CONFIG_FSE + add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); +#endif + +out: + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + if (vp) + vnode_put(vp); + vnode_put(dvp); + return (error); } + +int +mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused register_t *retval) +{ + int ciferror; + kauth_filesec_t xsecdst; + struct vnode_attr va; + + xsecdst = NULL; + if ((uap->xsecurity != USER_ADDR_NULL) && + ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) + return ciferror; + + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask); + if (xsecdst != NULL) + VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); + + ciferror = mkdir1(vfs_context_current(), uap->path, &va); + if (xsecdst != NULL) + kauth_filesec_free(xsecdst); + return ciferror; +} + +int +mkdir(proc_t p, struct mkdir_args *uap, __unused register_t *retval) +{ + struct vnode_attr va; + + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask); + + return(mkdir1(vfs_context_current(), uap->path, &va)); +} + /* * Remove a directory file. */ -struct rmdir_args { - char *path; -}; /* ARGSUSED */ int -rmdir(p, uap, retval) - struct proc *p; - struct rmdir_args *uap; - register_t *retval; +rmdir(__unused proc_t p, struct rmdir_args *uap, __unused register_t *retval) { - register struct vnode *vp; + vnode_t vp, dvp; int error; struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); - NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, - uap->path, p); - if (error = namei(&nd)) - return (error); - vp = nd.ni_vp; - if (vp->v_type != VDIR) { - error = ENOTDIR; - goto out; - } - /* - * No rmdir "." please. - */ - if (nd.ni_dvp == vp) { - error = EINVAL; - goto out; - } - /* - * The root of a mounted filesystem cannot be deleted. + int restart_flag, oldvp_id = -1; + + /* + * This loop exists to restart rmdir in the unlikely case that two + * processes are simultaneously trying to remove the same directory + * containing orphaned appleDouble files. */ - if (vp->v_flag & VROOT) - error = EBUSY; -out: - if (!error) { - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); - } else { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vput(vp); - } - return (error); -} + do { + restart_flag = 0; -#if COMPAT_43 -/* - * Read a block of directory entries in a file system independent format. - */ -struct ogetdirentries_args { - int fd; - char *buf; - u_int count; - long *basep; -}; -int -ogetdirentries(p, uap, retval) - struct proc *p; - register struct ogetdirentries_args *uap; - register_t *retval; -{ - register struct vnode *vp; - struct file *fp; - struct uio auio, kuio; - struct iovec aiov, kiov; - struct dirent *dp, *edp; - caddr_t dirbuf; - int error, eofflag, readcnt; - long loff; + NDINIT(&nd, DELETE, LOCKPARENT | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = namei(&nd); + if (error) + return (error); - if (error = getvnode(p, uap->fd, &fp)) - return (error); - if ((fp->f_flag & FREAD) == 0) - return (EBADF); - vp = (struct vnode *)fp->f_data; -unionread: - if (vp->v_type != VDIR) - return (EINVAL); - aiov.iov_base = uap->buf; - aiov.iov_len = uap->count; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = uap->count; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - loff = auio.uio_offset = fp->f_offset; -# if (BYTE_ORDER != LITTLE_ENDIAN) - if (vp->v_mount->mnt_maxsymlinklen <= 0) { - error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, - (int *)0, (u_long *)0); - fp->f_offset = auio.uio_offset; - } else -# endif - { - kuio = auio; - kuio.uio_iov = &kiov; - kuio.uio_segflg = UIO_SYSSPACE; - kiov.iov_len = uap->count; - MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK); - kiov.iov_base = dirbuf; - error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, - (int *)0, (u_long *)0); - fp->f_offset = kuio.uio_offset; - if (error == 0) { - readcnt = uap->count - kuio.uio_resid; - edp = (struct dirent *)&dirbuf[readcnt]; - for (dp = (struct dirent *)dirbuf; dp < edp; ) { -# if (BYTE_ORDER == LITTLE_ENDIAN) - /* - * The expected low byte of - * dp->d_namlen is our dp->d_type. - * The high MBZ byte of dp->d_namlen - * is our dp->d_namlen. - */ - dp->d_type = dp->d_namlen; - dp->d_namlen = 0; -# else - /* - * The dp->d_type is the high byte - * of the expected dp->d_namlen, - * so must be zero'ed. - */ - dp->d_type = 0; -# endif - if (dp->d_reclen > 0) { - dp = (struct dirent *) - ((char *)dp + dp->d_reclen); - } else { - error = EIO; - break; + dvp = nd.ni_dvp; + vp = nd.ni_vp; + + + /* + * If being restarted check if the new vp + * still has the same v_id. + */ + if (oldvp_id != -1 && oldvp_id != vp->v_id) { + error = ENOENT; + goto out; + } + + if (vp->v_type != VDIR) { + /* + * rmdir only deals with directories + */ + error = ENOTDIR; + } else if (dvp == vp) { + /* + * No rmdir "." please. + */ + error = EINVAL; + } else if (vp->v_flag & VROOT) { + /* + * The root of a mounted filesystem cannot be deleted. + */ + error = EBUSY; + } else { +#if CONFIG_MACF + error = mac_vnode_check_unlink(ctx, dvp, + vp, &nd.ni_cnd); + if (!error) +#endif + error = vnode_authorize(vp, nd.ni_dvp, KAUTH_VNODE_DELETE, ctx); + } + if (!error) { + char *path = NULL; + int len; + fse_info finfo; + int has_listeners = 0; + int need_event = 0; + +#if CONFIG_FSE + need_event = need_fsevent(FSE_DELETE, dvp); + if (need_event) { + get_fse_info(vp, &finfo, ctx); + } +#endif + has_listeners = kauth_authorize_fileop_has_listeners(); + if (need_event || has_listeners) { + GET_PATH(path); + if (path == NULL) { + error = ENOMEM; + goto out; } + len = MAXPATHLEN; + vn_getpath(vp, path, &len); } - if (dp >= edp) - error = uiomove(dirbuf, readcnt, &auio); - } - FREE(dirbuf, M_TEMP); - } - VOP_UNLOCK(vp, 0, p); - if (error) - return (error); -#if UNION -{ - extern int (**union_vnodeop_p)(void *); - extern struct vnode *union_dircache __P((struct vnode*, struct proc*)); + error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx); + + /* + * Special case to remove orphaned AppleDouble + * files. I don't like putting this in the kernel, + * but carbon does not like putting this in carbon either, + * so here we are. + */ + if (error == ENOTEMPTY) { + error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag); + if (error == EBUSY) { + oldvp_id = vp->v_id; + goto out; + } - if ((uap->count == auio.uio_resid) && - (vp->v_op == union_vnodeop_p)) { - struct vnode *lvp; - lvp = union_dircache(vp, p); - if (lvp != NULLVP) { - struct vattr va; + /* + * Assuming everything went well, we will try the RMDIR again + */ + if (!error) + error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx); + } /* - * If the directory is opaque, - * then don't show lower entries + * Call out to allow 3rd party notification of delete. + * Ignore result of kauth_authorize_fileop call. */ - error = VOP_GETATTR(vp, &va, fp->f_cred, p); - if (va.va_flags & OPAQUE) { - vput(lvp); - lvp = NULL; + if (!error) { + if (has_listeners) { + kauth_authorize_fileop(vfs_context_ucred(ctx), + KAUTH_FILEOP_DELETE, + (uintptr_t)vp, + (uintptr_t)path); + } + + if (vp->v_flag & VISHARDLINK) { + // see the comment in unlink1() about why we update + // the parent of a hard link when it is removed + vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT); + } + +#if CONFIG_FSE + if (need_event) { + add_fsevent(FSE_DELETE, ctx, + FSE_ARG_STRING, len, path, + FSE_ARG_FINFO, &finfo, + FSE_ARG_DONE); + } +#endif } + if (path != NULL) + RELEASE_PATH(path); } - - if (lvp != NULLVP) { - error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); - if (error) { - vput(lvp); - return (error); - } - VOP_UNLOCK(lvp, 0, p); - fp->f_data = (caddr_t) lvp; - fp->f_offset = 0; - error = VOP_CLOSE(vp, FREAD, fp->f_cred, p); - vrele(vp); - if (error) - return (error); - vp = lvp; - goto unionread; + +out: + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + vnode_put(dvp); + vnode_put(vp); + + if (restart_flag == 0) { + wakeup_one((caddr_t)vp); + return (error); } - } + tsleep(vp, PVFS, "rm AD", 1); + + } while (restart_flag != 0); + + return (error); + } -#endif /* UNION */ - if ((uap->count == auio.uio_resid) && - (vp->v_flag & VROOT) && - (vp->v_mount->mnt_flag & MNT_UNION)) { - struct vnode *tvp = vp; - vp = vp->v_mount->mnt_vnodecovered; - VREF(vp); - fp->f_data = (caddr_t) vp; - fp->f_offset = 0; - vrele(tvp); - goto unionread; +/* Get direntry length padded to 8 byte alignment */ +#define DIRENT64_LEN(namlen) \ + ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7) + +static errno_t +vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag, + int *numdirent, vfs_context_t ctxp) +{ + /* Check if fs natively supports VNODE_READDIR_EXTENDED */ + if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) { + return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp); + } else { + size_t bufsize; + void * bufptr; + uio_t auio; + struct direntry entry64; + struct dirent *dep; + int bytesread; + int error; + + /* + * Our kernel buffer needs to be smaller since re-packing + * will expand each dirent. The worse case (when the name + * length is 3) corresponds to a struct direntry size of 32 + * bytes (8-byte aligned) and a struct dirent size of 12 bytes + * (4-byte aligned). So having a buffer that is 3/8 the size + * will prevent us from reading more than we can pack. + * + * Since this buffer is wired memory, we will limit the + * buffer size to a maximum of 32K. We would really like to + * use 32K in the MIN(), but we use magic number 87371 to + * prevent uio_resid() * 3 / 8 from overflowing. + */ + bufsize = 3 * MIN(uio_resid(uio), 87371) / 8; + MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK); + + auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ); + uio_addiov(auio, (uintptr_t)bufptr, bufsize); + auio->uio_offset = uio->uio_offset; + + error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp); + + dep = (struct dirent *)bufptr; + bytesread = bufsize - uio_resid(auio); + + /* + * Convert all the entries and copy them out to user's buffer. + */ + while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) { + /* Convert a dirent to a dirent64. */ + entry64.d_ino = dep->d_ino; + entry64.d_seekoff = 0; + entry64.d_reclen = DIRENT64_LEN(dep->d_namlen); + entry64.d_namlen = dep->d_namlen; + entry64.d_type = dep->d_type; + bcopy(dep->d_name, entry64.d_name, dep->d_namlen + 1); + + /* Move to next entry. */ + dep = (struct dirent *)((char *)dep + dep->d_reclen); + + /* Copy entry64 to user's buffer. */ + error = uiomove((caddr_t)&entry64, entry64.d_reclen, uio); + } + + /* Update the real offset using the offset we got from VNOP_READDIR. */ + if (error == 0) { + uio->uio_offset = auio->uio_offset; + } + uio_free(auio); + FREE(bufptr, M_TEMP); + return (error); } - error = copyout((caddr_t)&loff, (caddr_t)uap->basep, - sizeof(long)); - *retval = uap->count - auio.uio_resid; - return (error); } -#endif /* COMPAT_43 */ /* * Read a block of directory entries in a file system independent format. */ -struct getdirentries_args { - int fd; - char *buf; - u_int count; - long *basep; -}; -int -getdirentries(p, uap, retval) - struct proc *p; - register struct getdirentries_args *uap; - register_t *retval; +static int +getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread, + off_t *offset, int flags) { - register struct vnode *vp; - struct file *fp; - struct uio auio; - struct iovec aiov; - long loff; - int error, eofflag; - - if (error = getvnode(p, uap->fd, &fp)) + vnode_t vp; + struct vfs_context context = *vfs_context_current(); /* local copy */ + struct fileproc *fp; + uio_t auio; + int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32; + off_t loff; + int error, eofflag, numdirent; + char uio_buf[ UIO_SIZEOF(1) ]; + + error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp); + if (error) { return (error); - if ((fp->f_flag & FREAD) == 0) - return (EBADF); - vp = (struct vnode *)fp->f_data; -unionread: - if (vp->v_type != VDIR) - return (EINVAL); - aiov.iov_base = uap->buf; - aiov.iov_len = uap->count; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = uap->count; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - loff = auio.uio_offset = fp->f_offset; - error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, - (int *)0, (u_long *)0); - fp->f_offset = auio.uio_offset; - VOP_UNLOCK(vp, 0, p); + } + if ((fp->f_fglob->fg_flag & FREAD) == 0) { + AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); + error = EBADF; + goto out; + } + +#if CONFIG_MACF + error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob); if (error) - return (error); + goto out; +#endif + if ( (error = vnode_getwithref(vp)) ) { + goto out; + } + AUDIT_ARG(vnpath, vp, ARG_VNODE1); + +unionread: + if (vp->v_type != VDIR) { + (void)vnode_put(vp); + error = EINVAL; + goto out; + } + +#if CONFIG_MACF + error = mac_vnode_check_readdir(&context, vp); + if (error != 0) { + (void)vnode_put(vp); + goto out; + } +#endif /* MAC */ + + loff = fp->f_fglob->fg_offset; + auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, bufp, bufsize); + + if (flags & VNODE_READDIR_EXTENDED) { + error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context); + fp->f_fglob->fg_offset = uio_offset(auio); + } else { + error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context); + fp->f_fglob->fg_offset = uio_offset(auio); + } + if (error) { + (void)vnode_put(vp); + goto out; + } + + if ((user_ssize_t)bufsize == uio_resid(auio)){ + if (union_dircheckp) { + error = union_dircheckp(&vp, fp, &context); + if (error == -1) + goto unionread; + if (error) + goto out; + } + + if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { + struct vnode *tvp = vp; + vp = vp->v_mount->mnt_vnodecovered; + vnode_getwithref(vp); + vnode_ref(vp); + fp->f_fglob->fg_data = (caddr_t) vp; + fp->f_fglob->fg_offset = 0; + vnode_rele(tvp); + vnode_put(tvp); + goto unionread; + } + } -#if UNION -{ - extern int (**union_vnodeop_p)(void *); - extern struct vnode *union_dircache __P((struct vnode*, struct proc*)); + vnode_put(vp); + if (offset) { + *offset = loff; + } + // LP64todo - fix this + *bytesread = bufsize - uio_resid(auio); +out: + file_drop(fd); + return (error); +} - if ((uap->count == auio.uio_resid) && - (vp->v_op == union_vnodeop_p)) { - struct vnode *lvp; - lvp = union_dircache(vp, p); - if (lvp != NULLVP) { - struct vattr va; +int +getdirentries(__unused struct proc *p, struct getdirentries_args *uap, register_t *retval) +{ + off_t offset; + long loff; + ssize_t bytesread; + int error; - /* - * If the directory is opaque, - * then don't show lower entries - */ - error = VOP_GETATTR(vp, &va, fp->f_cred, p); - if (va.va_flags & OPAQUE) { - vput(lvp); - lvp = NULL; - } - } + AUDIT_ARG(fd, uap->fd); + error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0); - if (lvp != NULLVP) { - error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); - if (error) { - vput(lvp); - return (error); - } - VOP_UNLOCK(lvp, 0, p); - fp->f_data = (caddr_t) lvp; - fp->f_offset = 0; - error = VOP_CLOSE(vp, FREAD, fp->f_cred, p); - vrele(vp); - if (error) - return (error); - vp = lvp; - goto unionread; - } + if (error == 0) { + loff = (long)offset; + error = copyout((caddr_t)&loff, uap->basep, sizeof(long)); + *retval = bytesread; } + return (error); } -#endif /* UNION */ - if ((uap->count == auio.uio_resid) && - (vp->v_flag & VROOT) && - (vp->v_mount->mnt_flag & MNT_UNION)) { - struct vnode *tvp = vp; - vp = vp->v_mount->mnt_vnodecovered; - VREF(vp); - fp->f_data = (caddr_t) vp; - fp->f_offset = 0; - vrele(tvp); - goto unionread; +int +getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval) +{ + off_t offset; + ssize_t bytesread; + int error; + + AUDIT_ARG(fd, uap->fd); + error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED); + + if (error == 0) { + *retval = bytesread; + error = copyout((caddr_t)&offset, uap->position, sizeof(off_t)); } - error = copyout((caddr_t)&loff, (caddr_t)uap->basep, - sizeof(long)); - *retval = uap->count - auio.uio_resid; return (error); } + /* * Set the mode mask for creation of filesystem nodes. */ -struct umask_args { - int newmask; -}; -int -umask(p, uap, retval) - struct proc *p; - struct umask_args *uap; - register_t *retval; +#warning XXX implement xsecurity + +#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */ +static int +umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, register_t *retval) { - register struct filedesc *fdp; + struct filedesc *fdp; + AUDIT_ARG(mask, newmask); + proc_fdlock(p); fdp = p->p_fd; *retval = fdp->fd_cmask; - fdp->fd_cmask = uap->newmask & ALLPERMS; + fdp->fd_cmask = newmask & ALLPERMS; + proc_fdunlock(p); return (0); } + +int +umask_extended(proc_t p, struct umask_extended_args *uap, register_t *retval) +{ + int ciferror; + kauth_filesec_t xsecdst; + + xsecdst = KAUTH_FILESEC_NONE; + if (uap->xsecurity != USER_ADDR_NULL) { + if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) + return ciferror; + } else { + xsecdst = KAUTH_FILESEC_NONE; + } + + ciferror = umask1(p, uap->newmask, xsecdst, retval); + + if (xsecdst != KAUTH_FILESEC_NONE) + kauth_filesec_free(xsecdst); + return ciferror; +} + +int +umask(proc_t p, struct umask_args *uap, register_t *retval) +{ + return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval)); +} + /* * Void all references to file by ripping underlying filesystem * away from vnode. */ -struct revoke_args { - char *path; -}; /* ARGSUSED */ int -revoke(p, uap, retval) - struct proc *p; - register struct revoke_args *uap; - register_t *retval; +revoke(proc_t p, struct revoke_args *uap, __unused register_t *retval) { - register struct vnode *vp; - struct vattr vattr; + vnode_t vp; + struct vnode_attr va; + vfs_context_t ctx = vfs_context_current(); int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - if (error = namei(&nd)) + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); + error = namei(&nd); + if (error) return (error); vp = nd.ni_vp; - if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) + + nameidone(&nd); + +#if CONFIG_MACF + error = mac_vnode_check_revoke(ctx, vp); + if (error) + goto out; +#endif + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_uid); + if ((error = vnode_getattr(vp, &va, ctx))) goto out; - if (p->p_ucred->cr_uid != vattr.va_uid && - (error = suser(p->p_ucred, &p->p_acflag))) + if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid && + (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) goto out; if (vp->v_usecount > 1 || (vp->v_flag & VALIASED)) - VOP_REVOKE(vp, REVOKEALL); + VNOP_REVOKE(vp, REVOKEALL, ctx); out: - vrele(vp); + vnode_put(vp); return (error); } -/* - * Convert a user file descriptor to a kernel file entry. - */ -int -getvnode(p, fd, fpp) - struct proc *p; - int fd; - struct file **fpp; -{ - struct file *fp; - int error; - - if (error = fdgetf(p, fd, &fp)) - return (error); - if (fp->f_type != DTYPE_VNODE) - return (EINVAL); - *fpp = fp; - return (0); -} /* * HFS/HFS PlUS SPECIFIC SYSTEM CALLS - * The following 10 system calls are designed to support features + * The following system calls are designed to support features * which are specific to the HFS & HFS Plus volume formats */ +#ifdef __APPLE_API_OBSOLETE + +/************************************************/ +/* *** Following calls will be deleted soon *** */ +/************************************************/ /* * Make a complex file. A complex file is one with multiple forks (data streams) */ -struct mkcomplex_args { - const char *path; /* pathname of the file to be created */ - mode_t mode; /* access mode for the newly created file */ - u_long type; /* format of the complex file */ -}; /* ARGSUSED */ int -mkcomplex(p,uap,retval) - struct proc *p; - register struct mkcomplex_args *uap; - register_t *retval; - +mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused register_t *retval) { - struct vnode *vp; - struct vattr vattr; - int error; - struct nameidata nd; - - /* mkcomplex wants the directory vnode locked so do that here */ - - NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_USERSPACE, (char *)uap->path, p); - if (error = namei(&nd)) - return (error); - - /* Set the attributes as specified by the user */ - - VATTR_NULL(&vattr); - vattr.va_mode = (uap->mode & ACCESSPERMS); - error = VOP_MKCOMPLEX(nd.ni_dvp, &vp, &nd.ni_cnd, &vattr, uap->type); - - /* The mkcomplex call promises to release the parent vnode pointer - * even an an error case so don't do it here unless the operation - * is not supported. In that case, there isn't anyone to unlock the parent - * The vnode pointer to the file will also be released. - */ - - if (error) - { - if (error == EOPNOTSUPP) - vput(nd.ni_dvp); - return (error); - } - - return (0); - -} /* end of mkcomplex system call */ - - + return (ENOTSUP); +} /* * Extended stat call which returns volumeid and vnodeid as well as other info */ -struct statv_args { - const char *path; /* pathname of the target file */ - struct vstat *vsb; /* vstat structure for returned info */ -}; /* ARGSUSED */ int -statv(p,uap,retval) - struct proc *p; - register struct statv_args *uap; - register_t *retval; - +statv(__unused proc_t p, + __unused struct statv_args *uap, + __unused register_t *retval) { - return (EOPNOTSUPP); /* We'll just return an error for now */ + return (ENOTSUP); /* We'll just return an error for now */ } /* end of statv system call */ - - /* * Extended lstat call which returns volumeid and vnodeid as well as other info */ -struct lstatv_args { - const char *path; /* pathname of the target file */ - struct vstat *vsb; /* vstat structure for returned info */ -}; /* ARGSUSED */ int -lstatv(p,uap,retval) - struct proc *p; - register struct lstatv_args *uap; - register_t *retval; - +lstatv(__unused proc_t p, + __unused struct lstatv_args *uap, + __unused register_t *retval) { - return (EOPNOTSUPP); /* We'll just return an error for now */ + return (ENOTSUP); /* We'll just return an error for now */ } /* end of lstatv system call */ - - /* * Extended fstat call which returns volumeid and vnodeid as well as other info */ -struct fstatv_args { - int fd; /* file descriptor of the target file */ - struct vstat *vsb; /* vstat structure for returned info */ -}; /* ARGSUSED */ int -fstatv(p,uap,retval) - struct proc *p; - register struct fstatv_args *uap; - register_t *retval; - +fstatv(__unused proc_t p, + __unused struct fstatv_args *uap, + __unused register_t *retval) { - return (EOPNOTSUPP); /* We'll just return an error for now */ + return (ENOTSUP); /* We'll just return an error for now */ } /* end of fstatv system call */ +/************************************************/ +/* *** Preceding calls will be deleted soon *** */ +/************************************************/ + +#endif /* __APPLE_API_OBSOLETE */ /* -* Obtain attribute information about a file system object +* Obtain attribute information on objects in a directory while enumerating +* the directory. This call does not yet support union mounted directories. +* TO DO +* 1.union mounted directories. */ -struct getattrlist_args { - const char *path; /* pathname of the target object */ - struct attrlist * alist; /* Attributes desired by the user */ - void * attributeBuffer; /* buffer to hold returned attributes */ - size_t bufferSize; /* size of the return buffer */ - unsigned long options; /* options (follow/don't follow) */ -}; /* ARGSUSED */ int -getattrlist (p,uap,retval) - struct proc *p; - register struct getattrlist_args *uap; - register_t *retval; - +getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, register_t *retval) { - int error; - struct nameidata nd; - struct iovec aiov; - struct uio auio; - struct attrlist attributelist; - u_long nameiflags; - - /* Get the attributes desire and do our parameter checking */ - - if (error = copyin((caddr_t)uap->alist, (caddr_t) &attributelist, - sizeof (attributelist))) - { + vnode_t vp; + struct fileproc *fp; + uio_t auio = NULL; + int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + uint32_t count; + uint32_t newstate; + int error, eofflag; + uint32_t loff; + struct attrlist attributelist; + vfs_context_t ctx = vfs_context_current(); + int fd = uap->fd; + char uio_buf[ UIO_SIZEOF(1) ]; + kauth_action_t action; + + AUDIT_ARG(fd, fd); + + /* Get the attributes into kernel space */ + if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) { return(error); - } - - if (attributelist.bitmapcount != ATTR_BIT_MAP_COUNT -#if 0 - || attributelist.commonattr & ~ATTR_CMN_VALIDMASK || - attributelist.volattr & ~ATTR_VOL_VALIDMASK || - attributelist.dirattr & ~ATTR_DIR_VALIDMASK || - attributelist.fileattr & ~ATTR_FILE_VALIDMASK || - attributelist.forkattr & ~ATTR_FORK_VALIDMASK -#endif - ) - { - return (EINVAL); - } - - /* Get the vnode for the file we are getting info on. */ - nameiflags = LOCKLEAF; - if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; - NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, (char *)uap->path, p); - - if (error = namei(&nd)) - return (error); - - /* Set up the UIO structure for use by the vfs routine */ - - - aiov.iov_base = uap->attributeBuffer; - aiov.iov_len = uap->bufferSize; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = 0; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = uap->bufferSize; - - - error = VOP_GETATTRLIST(nd.ni_vp, &attributelist, &auio, p->p_ucred, p); - - /* Unlock and release the vnode which will have been locked by namei */ - - vput(nd.ni_vp); - - /* return the effort if we got one, otherwise return success */ - - if (error) - { - return (error); - } - - return(0); - -} /* end of getattrlist system call */ - - - -/* - * Set attribute information about a file system object - */ - -struct setattrlist_args { - const char *path; /* pathname of the target object */ - struct attrlist * alist; /* Attributes being set by the user */ - void * attributeBuffer; /* buffer with attribute values to be set */ - size_t bufferSize; /* size of the return buffer */ - unsigned long options; /* options (follow/don't follow) */ -}; -/* ARGSUSED */ -int -setattrlist (p,uap,retval) - struct proc *p; - register struct setattrlist_args *uap; - register_t *retval; - -{ - int error; - struct nameidata nd; - struct iovec aiov; - struct uio auio; - struct attrlist alist; - u_long nameiflags; - - /* Get the attributes desired and do our parameter checking */ - - if ((error = copyin((caddr_t)uap->alist, (caddr_t) &alist, - sizeof (alist)))) { + } + if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) { + return(error); + } + if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) { return (error); } + if ((fp->f_fglob->fg_flag & FREAD) == 0) { + AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); + error = EBADF; + goto out; + } - if (alist.bitmapcount != ATTR_BIT_MAP_COUNT) - return (EINVAL); - /* Get the vnode for the file whose attributes are being set. */ - nameiflags = LOCKLEAF; - if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; - NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, (char *)uap->path, p); - if (error = namei(&nd)) - return (error); +#if CONFIG_MACF + error = mac_file_check_change_offset(vfs_context_ucred(ctx), + fp->f_fglob); + if (error) + goto out; +#endif - /* Set up the UIO structure for use by the vfs routine */ - aiov.iov_base = uap->attributeBuffer; - aiov.iov_len = uap->bufferSize; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = 0; - auio.uio_rw = UIO_WRITE; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = uap->bufferSize; - error = VOP_SETATTRLIST(nd.ni_vp, &alist, &auio, p->p_ucred, p); + if ( (error = vnode_getwithref(vp)) ) + goto out; - vput(nd.ni_vp); + AUDIT_ARG(vnpath, vp, ARG_VNODE1); - return (error); + if (vp->v_type != VDIR) { + (void)vnode_put(vp); + error = EINVAL; + goto out; + } -} /* end of setattrlist system call */ +#if CONFIG_MACF + error = mac_vnode_check_readdir(ctx, vp); + if (error != 0) { + (void)vnode_put(vp); + goto out; + } +#endif /* MAC */ + /* set up the uio structure which will contain the users return buffer */ + loff = fp->f_fglob->fg_offset; + auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->buffer, uap->buffersize); + + /* + * If the only item requested is file names, we can let that past with + * just LIST_DIRECTORY. If they want any other attributes, that means + * they need SEARCH as well. + */ + action = KAUTH_VNODE_LIST_DIRECTORY; + if ((attributelist.commonattr & ~ATTR_CMN_NAME) || + attributelist.fileattr || attributelist.dirattr) + action |= KAUTH_VNODE_SEARCH; + + if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) { + u_long ulcount = count; -/* -* Obtain attribute information on objects in a directory while enumerating -* the directory. This call does not yet support union mounted directories. -* TO DO -* 1.union mounted directories. -*/ + error = VNOP_READDIRATTR(vp, &attributelist, auio, + count, + uap->options, (unsigned long *)&newstate, &eofflag, + &ulcount, ctx); + if (!error) + count = ulcount; + } + (void)vnode_put(vp); -struct getdirentriesattr_args { - int fd; /* file descriptor */ - struct attrlist *alist; /* bit map of requested attributes */ - void *buffer; /* buffer to hold returned attribute info */ - size_t buffersize; /* size of the return buffer */ - u_long *count; /* the count of entries requested/returned */ - u_long *basep; /* the offset of where we are leaving off in buffer */ - u_long *newstate; /* a flag to inform of changes in directory */ - u_long options; /* maybe unused for now */ -}; -/* ARGSUSED */ -int -getdirentriesattr (p,uap,retval) - struct proc *p; - register struct getdirentriesattr_args *uap; - register_t *retval; - -{ - register struct vnode *vp; - struct file *fp; - struct uio auio; - struct iovec aiov; - u_long actualcount; - u_long newstate; - int error, eofflag; - long loff; - struct attrlist attributelist; - - /* Get the attributes into kernel space */ - if (error = copyin((caddr_t)uap->alist, (caddr_t) &attributelist, sizeof (attributelist))) - return(error); - if (error = copyin((caddr_t)uap->count, (caddr_t) &actualcount, sizeof (u_long))) - return(error); - - if (error = getvnode(p, uap->fd, &fp)) - return (error); - if ((fp->f_flag & FREAD) == 0) - return(EBADF); - vp = (struct vnode *)fp->f_data; - - if (vp->v_type != VDIR) - return(EINVAL); + if (error) + goto out; + fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */ - /* set up the uio structure which will contain the users return buffer */ - aiov.iov_base = uap->buffer; - aiov.iov_len = uap->buffersize; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = uap->buffersize; - - loff = auio.uio_offset = fp->f_offset; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_READDIRATTR (vp, &attributelist, &auio, - actualcount, uap->options, &newstate, &eofflag, - &actualcount, ((u_long **)0), p->p_cred); - - VOP_UNLOCK(vp, 0, p); - if (error) return (error); - fp->f_offset = auio.uio_offset; /* should be multiple of dirent, not variable */ - - if (error = copyout((caddr_t) &actualcount, (caddr_t) uap->count, sizeof(u_long))) - return (error); - if (error = copyout((caddr_t) &newstate, (caddr_t) uap->newstate, sizeof(u_long))) - return (error); - if (error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long))) - return (error); + if ((error = copyout((caddr_t) &count, uap->count, sizeof(count)))) + goto out; + if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate)))) + goto out; + if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff)))) + goto out; *retval = eofflag; /* similar to getdirentries */ - return (0); /* return error earlier, an retval of 0 or 1 now */ + error = 0; +out: + file_drop(fd); + return (error); /* return error earlier, an retval of 0 or 1 now */ } /* end of getdirentryattr system call */ @@ -3236,205 +6016,206 @@ getdirentriesattr (p,uap,retval) * Exchange data between two files */ -struct exchangedata_args { - const char *path1; /* pathname of the first swapee */ - const char *path2; /* pathname of the second swapee */ - unsigned long options; /* options */ -}; /* ARGSUSED */ int -exchangedata (p,uap,retval) - struct proc *p; - register struct exchangedata_args *uap; - register_t *retval; - +exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused register_t *retval) { struct nameidata fnd, snd; - struct vnode *fvp, *svp; - int error; + vfs_context_t ctx = vfs_context_current(); + vnode_t fvp; + vnode_t svp; + int error; u_long nameiflags; + char *fpath = NULL; + char *spath = NULL; + int flen, slen; + fse_info f_finfo, s_finfo; nameiflags = 0; if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; - /* Global lock, to prevent race condition, only one exchange at a time */ - lockmgr(&exchangelock, LK_EXCLUSIVE , (struct slock *)0, p); - - NDINIT(&fnd, LOOKUP, nameiflags, UIO_USERSPACE, (char *) uap->path1, p); + NDINIT(&fnd, LOOKUP, nameiflags | AUDITVNPATH1, + UIO_USERSPACE, uap->path1, ctx); - if (error = namei(&fnd)) - goto out2; + error = namei(&fnd); + if (error) + goto out2; - fvp = fnd.ni_vp; + nameidone(&fnd); + fvp = fnd.ni_vp; - NDINIT(&snd, LOOKUP, nameiflags, UIO_USERSPACE, (char *)uap->path2, p); - - if (error = namei(&snd)) { - vrele(fvp); - goto out2; - } + NDINIT(&snd, LOOKUP | CN_NBMOUNTLOOK, nameiflags | AUDITVNPATH2, + UIO_USERSPACE, uap->path2, ctx); + error = namei(&snd); + if (error) { + vnode_put(fvp); + goto out2; + } + nameidone(&snd); svp = snd.ni_vp; - /* if the files are the same, return an inval error */ + /* + * if the files are the same, return an inval error + */ if (svp == fvp) { - vrele(fvp); - vrele(svp); - error = EINVAL; - goto out2; - } - - vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); - vn_lock(svp, LK_EXCLUSIVE | LK_RETRY, p); - - error = VOP_ACCESS(fvp, VWRITE, p->p_ucred, p); - if (error) goto out; - - error = VOP_ACCESS(svp, VWRITE, p->p_ucred, p); - if (error) goto out; - - /* Ok, make the call */ - error = VOP_EXCHANGE (fvp, svp, p->p_ucred, p); + error = EINVAL; + goto out; + } -out: - vput (svp); - vput (fvp); + /* + * if the files are on different volumes, return an error + */ + if (svp->v_mount != fvp->v_mount) { + error = EXDEV; + goto out; + } -out2: - lockmgr(&exchangelock, LK_RELEASE, (struct slock *)0, p); +#if CONFIG_MACF + error = mac_vnode_check_exchangedata(ctx, + fvp, svp); + if (error) + goto out; +#endif + if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) || + ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0)) + goto out; - if (error) { - return (error); + if ( +#if CONFIG_FSE + need_fsevent(FSE_EXCHANGE, fvp) || +#endif + kauth_authorize_fileop_has_listeners()) { + GET_PATH(fpath); + GET_PATH(spath); + if (fpath == NULL || spath == NULL) { + error = ENOMEM; + goto out; } - - return (0); - -} /* end of exchangedata system call */ - -/* -* Check users access to a file -*/ - -struct checkuseraccess_args { - const char *path; /* pathname of the target file */ - uid_t userid; /* user for whom we are checking access */ - gid_t *groups; /* Group that we are checking for */ - int ngroups; /* Number of groups being checked */ - int accessrequired; /* needed access to the file */ - unsigned long options; /* options */ -}; - -/* ARGSUSED */ -int -checkuseraccess (p,uap,retval) - struct proc *p; - register struct checkuseraccess_args *uap; - register_t *retval; - -{ - register struct vnode *vp; - int error; - struct nameidata nd; - struct ucred cred; - int flags; /*what will actually get passed to access*/ - u_long nameiflags; - - /* Make sure that the number of groups is correct before we do anything */ - - if ((uap->ngroups <= 0) || (uap->ngroups > NGROUPS)) - return (EINVAL); - - /* Verify that the caller is root */ - - if (error = suser(p->p_ucred, &p->p_acflag)) - return(error); - - /* Fill in the credential structure */ - - cred.cr_ref = 0; - cred.cr_uid = uap->userid; - cred.cr_ngroups = uap->ngroups; - if (error = copyin((caddr_t) uap->groups, (caddr_t) &(cred.cr_groups), (sizeof(gid_t))*uap->ngroups)) - return (error); - - /* Get our hands on the file */ - - nameiflags = LOCKLEAF; - if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; - NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, (char *)uap->path, p); - - if (error = namei(&nd)) - return (error); - vp = nd.ni_vp; - - /* Flags == 0 means only check for existence. */ - - flags = 0; - - if (uap->accessrequired) { - if (uap->accessrequired & R_OK) - flags |= VREAD; - if (uap->accessrequired & W_OK) - flags |= VWRITE; - if (uap->accessrequired & X_OK) - flags |= VEXEC; + flen = MAXPATHLEN; + slen = MAXPATHLEN; + if (vn_getpath(fvp, fpath, &flen) != 0 || fpath[0] == '\0') { + printf("exchange: vn_getpath(fvp=%p) failed <<%s>>\n", + fvp, fpath); } - error = VOP_ACCESS(vp, flags, &cred, p); - - vput(vp); - - if (error) - return (error); - - return (0); + if (vn_getpath(svp, spath, &slen) != 0 || spath[0] == '\0') { + printf("exchange: vn_getpath(svp=%p) failed <<%s>>\n", + svp, spath); + } +#if CONFIG_FSE + get_fse_info(fvp, &f_finfo, ctx); + get_fse_info(svp, &s_finfo, ctx); +#endif + } + /* Ok, make the call */ + error = VNOP_EXCHANGE(fvp, svp, 0, ctx); + + if (error == 0) { + const char *tmpname; + + if (fpath != NULL && spath != NULL) { + /* call out to allow 3rd party notification of exchangedata. + * Ignore result of kauth_authorize_fileop call. + */ + kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE, + (uintptr_t)fpath, (uintptr_t)spath); + } + name_cache_lock(); + + tmpname = fvp->v_name; + fvp->v_name = svp->v_name; + svp->v_name = tmpname; + + if (fvp->v_parent != svp->v_parent) { + vnode_t tmp; + + tmp = fvp->v_parent; + fvp->v_parent = svp->v_parent; + svp->v_parent = tmp; + } + name_cache_unlock(); + +#if CONFIG_FSE + if (fpath != NULL && spath != NULL) { + add_fsevent(FSE_EXCHANGE, ctx, + FSE_ARG_STRING, flen, fpath, + FSE_ARG_FINFO, &f_finfo, + FSE_ARG_STRING, slen, spath, + FSE_ARG_FINFO, &s_finfo, + FSE_ARG_DONE); + } +#endif + } -} /* end of checkuseraccess system call */ +out: + if (fpath != NULL) + RELEASE_PATH(fpath); + if (spath != NULL) + RELEASE_PATH(spath); + vnode_put(svp); + vnode_put(fvp); +out2: + return (error); +} -struct searchfs_args { - const char *path; - struct fssearchblock *searchblock; - u_long *nummatches; - u_long scriptcode; - u_long options; - struct searchstate *state; - }; /* ARGSUSED */ int -searchfs (p,uap,retval) - struct proc *p; - register struct searchfs_args *uap; - register_t *retval; - +searchfs(proc_t p, struct searchfs_args *uap, __unused register_t *retval) { - register struct vnode *vp; + vnode_t vp; int error=0; int fserror = 0; struct nameidata nd; - struct fssearchblock searchblock; + struct user_fssearchblock searchblock; struct searchstate *state; struct attrlist *returnattrs; void *searchparams1,*searchparams2; - struct iovec aiov; - struct uio auio; + uio_t auio = NULL; + int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; u_long nummatches; int mallocsize; u_long nameiflags; - + vfs_context_t ctx = vfs_context_current(); + char uio_buf[ UIO_SIZEOF(1) ]; /* Start by copying in fsearchblock paramater list */ - - if (error = copyin((caddr_t) uap->searchblock, (caddr_t) &searchblock,sizeof(struct fssearchblock))) + if (IS_64BIT_PROCESS(p)) { + error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock)); + } + else { + struct fssearchblock tmp_searchblock; + error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock)); + // munge into 64-bit version + searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs); + searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer); + searchblock.returnbuffersize = tmp_searchblock.returnbuffersize; + searchblock.maxmatches = tmp_searchblock.maxmatches; + searchblock.timelimit.tv_sec = tmp_searchblock.timelimit.tv_sec; + searchblock.timelimit.tv_usec = tmp_searchblock.timelimit.tv_usec; + searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1); + searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1; + searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2); + searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2; + searchblock.searchattrs = tmp_searchblock.searchattrs; + } + if (error) return(error); + /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2. + */ + if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS || + searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS) + return(EINVAL); + /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */ /* It all has to do into local memory and it's not that big so we might as well put it all together. */ /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/ /* block. */ - mallocsize = searchblock.sizeofsearchparams1+searchblock.sizeofsearchparams2 + + mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 + sizeof(struct attrlist) + sizeof(struct searchstate); MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK); @@ -3447,36 +6228,34 @@ searchfs (p,uap,retval) /* Now copy in the stuff given our local variables. */ - if (error = copyin((caddr_t) searchblock.searchparams1, searchparams1,searchblock.sizeofsearchparams1)) + if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1))) goto freeandexit; - if (error = copyin((caddr_t) searchblock.searchparams2, searchparams2,searchblock.sizeofsearchparams2)) + if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2))) goto freeandexit; - if (error = copyin((caddr_t) searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))) + if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist)))) goto freeandexit; - if (error = copyin((caddr_t) uap->state, (caddr_t) state, sizeof(struct searchstate))) + if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate)))) goto freeandexit; /* set up the uio structure which will contain the users return buffer */ - aiov.iov_base = searchblock.returnbuffer; - aiov.iov_len = searchblock.returnbuffersize; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = searchblock.returnbuffersize; + auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize); - nameiflags = LOCKLEAF; + nameiflags = 0; if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; - NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, (char *)uap->path, p); + NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); - if (error = namei(&nd)) + error = namei(&nd); + if (error) goto freeandexit; + nameidone(&nd); vp = nd.ni_vp; @@ -3497,7 +6276,7 @@ searchfs (p,uap,retval) from copying out any results... */ - fserror = VOP_SEARCHFS(vp, + fserror = VNOP_SEARCHFS(vp, searchparams1, searchparams2, &searchblock.searchattrs, @@ -3507,20 +6286,21 @@ searchfs (p,uap,retval) &nummatches, uap->scriptcode, uap->options, - &auio, - state); + auio, + state, + ctx); saveandexit: - vput(vp); + vnode_put(vp); /* Now copy out the stuff that needs copying out. That means the number of matches, the search state. Everything was already put into he return buffer by the vop call. */ - if (error = copyout((caddr_t) state, (caddr_t) uap->state, sizeof(struct searchstate))) + if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0) goto freeandexit; - if (error = copyout((caddr_t) &nummatches, (caddr_t) uap->nummatches, sizeof(u_long))) + if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0) goto freeandexit; error = fserror; @@ -3538,46 +6318,45 @@ freeandexit: /* * Make a filesystem-specific control call: */ -struct fsctl_args { - const char *path; /* pathname of the target object */ - u_long cmd; /* cmd (also encodes size/direction of arguments a la ioctl) */ - caddr_t data; /* pointer to argument buffer */ - u_long options; /* options for fsctl processing */ -}; /* ARGSUSED */ int -fsctl (p,uap,retval) - struct proc *p; - struct fsctl_args *uap; - register_t *retval; - +fsctl (proc_t p, struct fsctl_args *uap, __unused register_t *retval) { int error; + boolean_t is64bit; struct nameidata nd; u_long nameiflags; u_long cmd = uap->cmd; - register u_int size; + u_int size; #define STK_PARAMS 128 char stkbuf[STK_PARAMS]; caddr_t data, memp; + vfs_context_t ctx = vfs_context_current(); size = IOCPARM_LEN(cmd); if (size > IOCPARM_MAX) return (EINVAL); + is64bit = proc_is64bit(p); + memp = NULL; if (size > sizeof (stkbuf)) { if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM; data = memp; } else { - data = stkbuf; + data = &stkbuf[0]; }; if (cmd & IOC_IN) { if (size) { - error = copyin(uap->data, data, (u_int)size); + error = copyin(uap->data, data, size); if (error) goto FSCtl_Exit; } else { - *(caddr_t *)data = uap->data; + if (is64bit) { + *(user_addr_t *)data = uap->data; + } + else { + *(uint32_t *)data = (uint32_t)uap->data; + } }; } else if ((cmd & IOC_OUT) && size) { /* @@ -3585,25 +6364,42 @@ fsctl (p,uap,retval) * gets back something deterministic. */ bzero(data, size); - } else if (cmd & IOC_VOID) - *(caddr_t *)data = uap->data; + } else if (cmd & IOC_VOID) { + if (is64bit) { + *(user_addr_t *)data = uap->data; + } + else { + *(uint32_t *)data = (uint32_t)uap->data; + } + } /* Get the vnode for the file we are getting info on: */ - nameiflags = LOCKLEAF; + nameiflags = 0; if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; - NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, (char *)uap->path, p); - if (error = namei(&nd)) goto FSCtl_Exit; - + NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, uap->path, ctx); + if ((error = namei(&nd))) goto FSCtl_Exit; + +#if CONFIG_MACF + error = mac_mount_check_fsctl(ctx, vnode_mount(nd.ni_vp), cmd); + if (error) { + vnode_put(nd.ni_vp); + nameidone(&nd); + goto FSCtl_Exit; + } +#endif + /* Invoke the filesystem-specific code */ - error = VOP_IOCTL(nd.ni_vp, IOCBASECMD(cmd), data, uap->options, p->p_ucred, p); + error = VNOP_IOCTL(nd.ni_vp, IOCBASECMD(cmd), data, uap->options, ctx); - vput(nd.ni_vp); + vnode_put(nd.ni_vp); + nameidone(&nd); /* * Copy any data to user, size was * already set and checked above. */ - if (error == 0 && (cmd & IOC_OUT) && size) error = copyout(data, uap->data, (u_int)size); + if (error == 0 && (cmd & IOC_OUT) && size) + error = copyout(data, uap->data, size); FSCtl_Exit: if (memp) kfree(memp, size); @@ -3618,19 +6414,606 @@ FSCtl_Exit: __private_extern__ int sync_internal(void) { - boolean_t funnel_state; int error; struct sync_args data; int retval[2]; - funnel_state = thread_funnel_set(kernel_flock, TRUE); - error = sync(current_proc(), &data, &retval); + error = sync(current_proc(), &data, &retval[0]); - thread_funnel_set(kernel_flock, funnel_state); return (error); } /* end of sync_internal call */ + +/* + * Retrieve the data of an extended attribute. + */ +int +getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval) +{ + vnode_t vp; + struct nameidata nd; + char attrname[XATTR_MAXNAMELEN+1]; + vfs_context_t ctx = vfs_context_current(); + uio_t auio = NULL; + int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + size_t attrsize = 0; + size_t namelen; + u_long nameiflags; + int error; + char uio_buf[ UIO_SIZEOF(1) ]; + + if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) + return (EINVAL); + + nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW; + NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx); + if ((error = namei(&nd))) { + return (error); + } + vp = nd.ni_vp; + nameidone(&nd); + + if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) { + goto out; + } + if (xattr_protected(attrname)) { + error = EPERM; + goto out; + } + if (uap->value && uap->size > 0) { + auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->value, uap->size); + } + + error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx); +out: + vnode_put(vp); + + if (auio) { + *retval = uap->size - uio_resid(auio); + } else { + *retval = (user_ssize_t)attrsize; + } + + return (error); +} + +/* + * Retrieve the data of an extended attribute. + */ +int +fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval) +{ + vnode_t vp; + char attrname[XATTR_MAXNAMELEN+1]; + uio_t auio = NULL; + int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + size_t attrsize = 0; + size_t namelen; + int error; + char uio_buf[ UIO_SIZEOF(1) ]; + + if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) + return (EINVAL); + + if ( (error = file_vnode(uap->fd, &vp)) ) { + return (error); + } + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } + if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) { + goto out; + } + if (xattr_protected(attrname)) { + error = EPERM; + goto out; + } + if (uap->value && uap->size > 0) { + auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->value, uap->size); + } + + error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current()); +out: + (void)vnode_put(vp); + file_drop(uap->fd); + + if (auio) { + *retval = uap->size - uio_resid(auio); + } else { + *retval = (user_ssize_t)attrsize; + } + return (error); +} + +/* + * Set the data of an extended attribute. + */ +int +setxattr(proc_t p, struct setxattr_args *uap, int *retval) +{ + vnode_t vp; + struct nameidata nd; + char attrname[XATTR_MAXNAMELEN+1]; + vfs_context_t ctx = vfs_context_current(); + uio_t auio = NULL; + int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + size_t namelen; + u_long nameiflags; + int error; + char uio_buf[ UIO_SIZEOF(1) ]; + + if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) + return (EINVAL); + + if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) { + return (error); + } + if (xattr_protected(attrname)) + return(EPERM); + if (uap->size != 0 && uap->value == 0) { + return (EINVAL); + } + + nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW; + NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx); + if ((error = namei(&nd))) { + return (error); + } + vp = nd.ni_vp; + nameidone(&nd); + + auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->value, uap->size); + + error = vn_setxattr(vp, attrname, auio, uap->options, ctx); +#if CONFIG_FSE + if (error == 0) { + add_fsevent(FSE_XATTR_MODIFIED, ctx, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); + } +#endif + vnode_put(vp); + *retval = 0; + return (error); +} + +/* + * Set the data of an extended attribute. + */ +int +fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval) +{ + vnode_t vp; + char attrname[XATTR_MAXNAMELEN+1]; + uio_t auio = NULL; + int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + size_t namelen; + int error; + char uio_buf[ UIO_SIZEOF(1) ]; + vfs_context_t ctx = vfs_context_current(); + + if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) + return (EINVAL); + + if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) { + return (error); + } + if (xattr_protected(attrname)) + return(EPERM); + if (uap->size != 0 && uap->value == 0) { + return (EINVAL); + } + if ( (error = file_vnode(uap->fd, &vp)) ) { + return (error); + } + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } + auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->value, uap->size); + + error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current()); +#if CONFIG_FSE + if (error == 0) { + add_fsevent(FSE_XATTR_MODIFIED, ctx, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); + } +#endif + vnode_put(vp); + file_drop(uap->fd); + *retval = 0; + return (error); +} + +/* + * Remove an extended attribute. + */ +#warning "code duplication" +int +removexattr(proc_t p, struct removexattr_args *uap, int *retval) +{ + vnode_t vp; + struct nameidata nd; + char attrname[XATTR_MAXNAMELEN+1]; + int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + vfs_context_t ctx = vfs_context_current(); + size_t namelen; + u_long nameiflags; + int error; + + if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) + return (EINVAL); + + error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen); + if (error != 0) { + return (error); + } + if (xattr_protected(attrname)) + return(EPERM); + nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW; + NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx); + if ((error = namei(&nd))) { + return (error); + } + vp = nd.ni_vp; + nameidone(&nd); + + error = vn_removexattr(vp, attrname, uap->options, ctx); +#if CONFIG_FSE + if (error == 0) { + add_fsevent(FSE_XATTR_REMOVED, ctx, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); + } +#endif + vnode_put(vp); + *retval = 0; + return (error); +} + +/* + * Remove an extended attribute. + */ +#warning "code duplication" +int +fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval) +{ + vnode_t vp; + char attrname[XATTR_MAXNAMELEN+1]; + size_t namelen; + int error; + vfs_context_t ctx = vfs_context_current(); + + if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) + return (EINVAL); + + error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen); + if (error != 0) { + return (error); + } + if (xattr_protected(attrname)) + return(EPERM); + if ( (error = file_vnode(uap->fd, &vp)) ) { + return (error); + } + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } + + error = vn_removexattr(vp, attrname, uap->options, vfs_context_current()); +#if CONFIG_FSE + if (error == 0) { + add_fsevent(FSE_XATTR_REMOVED, ctx, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); + } +#endif + vnode_put(vp); + file_drop(uap->fd); + *retval = 0; + return (error); +} + +/* + * Retrieve the list of extended attribute names. + */ +#warning "code duplication" +int +listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval) +{ + vnode_t vp; + struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); + uio_t auio = NULL; + int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + size_t attrsize = 0; + u_long nameiflags; + int error; + char uio_buf[ UIO_SIZEOF(1) ]; + + if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) + return (EINVAL); + + nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER; + NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx); + if ((error = namei(&nd))) { + return (error); + } + vp = nd.ni_vp; + nameidone(&nd); + if (uap->namebuf != 0 && uap->bufsize > 0) { + // LP64todo - fix this! + auio = uio_createwithbuffer(1, 0, spacetype, + UIO_READ, &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->namebuf, uap->bufsize); + } + + error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx); + + vnode_put(vp); + if (auio) { + *retval = (user_ssize_t)uap->bufsize - uio_resid(auio); + } else { + *retval = (user_ssize_t)attrsize; + } + return (error); +} + +/* + * Retrieve the list of extended attribute names. + */ +#warning "code duplication" +int +flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval) +{ + vnode_t vp; + uio_t auio = NULL; + int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + size_t attrsize = 0; + int error; + char uio_buf[ UIO_SIZEOF(1) ]; + + if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) + return (EINVAL); + + if ( (error = file_vnode(uap->fd, &vp)) ) { + return (error); + } + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } + if (uap->namebuf != 0 && uap->bufsize > 0) { + // LP64todo - fix this! + auio = uio_createwithbuffer(1, 0, spacetype, + UIO_READ, &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->namebuf, uap->bufsize); + } + + error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current()); + + vnode_put(vp); + file_drop(uap->fd); + if (auio) { + *retval = (user_ssize_t)uap->bufsize - uio_resid(auio); + } else { + *retval = (user_ssize_t)attrsize; + } + return (error); +} + +/* + * Common routine to handle various flavors of statfs data heading out + * to user space. + * + * Returns: 0 Success + * EFAULT + */ +static int +munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, + user_addr_t bufp, int *sizep, boolean_t is_64_bit, + boolean_t partial_copy) +{ + int error; + int my_size, copy_size; + + if (is_64_bit) { + struct user_statfs sfs; + my_size = copy_size = sizeof(sfs); + bzero(&sfs, my_size); + sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + sfs.f_type = mp->mnt_vtable->vfc_typenum; + sfs.f_reserved1 = (short)sfsp->f_fssubtype; + sfs.f_bsize = (user_long_t)sfsp->f_bsize; + sfs.f_iosize = (user_long_t)sfsp->f_iosize; + sfs.f_blocks = (user_long_t)sfsp->f_blocks; + sfs.f_bfree = (user_long_t)sfsp->f_bfree; + sfs.f_bavail = (user_long_t)sfsp->f_bavail; + sfs.f_files = (user_long_t)sfsp->f_files; + sfs.f_ffree = (user_long_t)sfsp->f_ffree; + sfs.f_fsid = sfsp->f_fsid; + sfs.f_owner = sfsp->f_owner; + strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN); + strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN); + strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN); + + if (partial_copy) { + copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4)); + } + error = copyout((caddr_t)&sfs, bufp, copy_size); + } + else { + struct statfs sfs; + my_size = copy_size = sizeof(sfs); + bzero(&sfs, my_size); + + sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + sfs.f_type = mp->mnt_vtable->vfc_typenum; + sfs.f_reserved1 = (short)sfsp->f_fssubtype; + + /* + * It's possible for there to be more than 2^^31 blocks in the filesystem, so we + * have to fudge the numbers here in that case. We inflate the blocksize in order + * to reflect the filesystem size as best we can. + */ + if ((sfsp->f_blocks > LONG_MAX) + /* Hack for 4061702 . I think the real fix is for Carbon to + * look for some volume capability and not depend on hidden + * semantics agreed between a FS and carbon. + * f_blocks, f_bfree, and f_bavail set to -1 is the trigger + * for Carbon to set bNoVolumeSizes volume attribute. + * Without this the webdavfs files cannot be copied onto + * disk as they look huge. This change should not affect + * XSAN as they should not setting these to -1.. + */ + && (sfsp->f_blocks != 0xffffffffffffffffULL) + && (sfsp->f_bfree != 0xffffffffffffffffULL) + && (sfsp->f_bavail != 0xffffffffffffffffULL)) { + int shift; + + /* + * Work out how far we have to shift the block count down to make it fit. + * Note that it's possible to have to shift so far that the resulting + * blocksize would be unreportably large. At that point, we will clip + * any values that don't fit. + * + * For safety's sake, we also ensure that f_iosize is never reported as + * being smaller than f_bsize. + */ + for (shift = 0; shift < 32; shift++) { + if ((sfsp->f_blocks >> shift) <= LONG_MAX) + break; + if ((sfsp->f_bsize << (shift + 1)) > LONG_MAX) + break; + } +#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > LONG_MAX) ? LONG_MAX : ((x) >> (s))) + sfs.f_blocks = (long)__SHIFT_OR_CLIP(sfsp->f_blocks, shift); + sfs.f_bfree = (long)__SHIFT_OR_CLIP(sfsp->f_bfree, shift); + sfs.f_bavail = (long)__SHIFT_OR_CLIP(sfsp->f_bavail, shift); +#undef __SHIFT_OR_CLIP + sfs.f_bsize = (long)(sfsp->f_bsize << shift); + sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize); + } else { + /* filesystem is small enough to be reported honestly */ + sfs.f_bsize = (long)sfsp->f_bsize; + sfs.f_iosize = (long)sfsp->f_iosize; + sfs.f_blocks = (long)sfsp->f_blocks; + sfs.f_bfree = (long)sfsp->f_bfree; + sfs.f_bavail = (long)sfsp->f_bavail; + } + sfs.f_files = (long)sfsp->f_files; + sfs.f_ffree = (long)sfsp->f_ffree; + sfs.f_fsid = sfsp->f_fsid; + sfs.f_owner = sfsp->f_owner; + strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN); + strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN); + strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN); + + if (partial_copy) { + copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4)); + } + error = copyout((caddr_t)&sfs, bufp, copy_size); + } + + if (sizep != NULL) { + *sizep = my_size; + } + return(error); +} + +/* + * copy stat structure into user_stat structure. + */ +void munge_stat(struct stat *sbp, struct user_stat *usbp) +{ + bzero(usbp, sizeof(struct user_stat)); + + usbp->st_dev = sbp->st_dev; + usbp->st_ino = sbp->st_ino; + usbp->st_mode = sbp->st_mode; + usbp->st_nlink = sbp->st_nlink; + usbp->st_uid = sbp->st_uid; + usbp->st_gid = sbp->st_gid; + usbp->st_rdev = sbp->st_rdev; +#ifndef _POSIX_C_SOURCE + usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec; + usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec; + usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec; + usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec; + usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec; + usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec; +#else + usbp->st_atime = sbp->st_atime; + usbp->st_atimensec = sbp->st_atimensec; + usbp->st_mtime = sbp->st_mtime; + usbp->st_mtimensec = sbp->st_mtimensec; + usbp->st_ctime = sbp->st_ctime; + usbp->st_ctimensec = sbp->st_ctimensec; +#endif + usbp->st_size = sbp->st_size; + usbp->st_blocks = sbp->st_blocks; + usbp->st_blksize = sbp->st_blksize; + usbp->st_flags = sbp->st_flags; + usbp->st_gen = sbp->st_gen; + usbp->st_lspare = sbp->st_lspare; + usbp->st_qspare[0] = sbp->st_qspare[0]; + usbp->st_qspare[1] = sbp->st_qspare[1]; +} + +/* + * copy stat64 structure into user_stat64 structure. + */ +void munge_stat64(struct stat64 *sbp, struct user_stat64 *usbp) +{ + bzero(usbp, sizeof(struct user_stat)); + + usbp->st_dev = sbp->st_dev; + usbp->st_ino = sbp->st_ino; + usbp->st_mode = sbp->st_mode; + usbp->st_nlink = sbp->st_nlink; + usbp->st_uid = sbp->st_uid; + usbp->st_gid = sbp->st_gid; + usbp->st_rdev = sbp->st_rdev; +#ifndef _POSIX_C_SOURCE + usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec; + usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec; + usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec; + usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec; + usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec; + usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec; + usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec; + usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec; +#else + usbp->st_atime = sbp->st_atime; + usbp->st_atimensec = sbp->st_atimensec; + usbp->st_mtime = sbp->st_mtime; + usbp->st_mtimensec = sbp->st_mtimensec; + usbp->st_ctime = sbp->st_ctime; + usbp->st_ctimensec = sbp->st_ctimensec; + usbp->st_birthtime = sbp->st_birthtime; + usbp->st_birthtimensec = sbp->st_birthtimensec; +#endif + usbp->st_size = sbp->st_size; + usbp->st_blocks = sbp->st_blocks; + usbp->st_blksize = sbp->st_blksize; + usbp->st_flags = sbp->st_flags; + usbp->st_gen = sbp->st_gen; + usbp->st_lspare = sbp->st_lspare; + usbp->st_qspare[0] = sbp->st_qspare[0]; + usbp->st_qspare[1] = sbp->st_qspare[1]; +}