bsd/vfs/vfs_syscalls.c

   1 /*
   2  * Copyright (c) 1995-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1989, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)vfs_syscalls.c      8.41 (Berkeley) 6/15/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73
  74 #include <sys/param.h>
  75 #include <sys/systm.h>
  76 #include <sys/namei.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/file_internal.h>
  80 #include <sys/stat.h>
  81 #include <sys/vnode_internal.h>
  82 #include <sys/mount_internal.h>
  83 #include <sys/proc_internal.h>
  84 #include <sys/kauth.h>
  85 #include <sys/uio_internal.h>
  86 #include <sys/malloc.h>
  87 #include <sys/mman.h>
  88 #include <sys/dirent.h>
  89 #include <sys/attr.h>
  90 #include <sys/sysctl.h>
  91 #include <sys/ubc.h>
  92 #include <sys/quota.h>
  93 #include <sys/kdebug.h>
  94 #include <sys/fsevents.h>
  95 #include <sys/sysproto.h>
  96 #include <sys/xattr.h>
  97 #include <sys/ubc_internal.h>
  98 #include <machine/cons.h>
  99 #include <machine/limits.h>
 100 #include <miscfs/specfs/specdev.h>
 101 #include <miscfs/union/union.h>
 102
 103 #include <bsm/audit_kernel.h>
 104 #include <bsm/audit_kevents.h>
 105
 106 #include <mach/mach_types.h>
 107 #include <kern/kern_types.h>
 108 #include <kern/kalloc.h>
 109
 110 #include <vm/vm_pageout.h>
 111
 112 #include <libkern/OSAtomic.h>
 113
 114 #if CONFIG_MACF
 115 #include <security/mac.h>
 116 #include <security/mac_framework.h>
 117 #endif
 118
 119 #if CONFIG_FSE
 120 #define GET_PATH(x) \
 121         (x) = get_pathbuff();
 122 #define RELEASE_PATH(x) \
 123         release_pathbuff(x);
 124 #else
 125 #define GET_PATH(x)     \
 126         MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
 127 #define RELEASE_PATH(x) \
 128         FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
 129 #endif /* CONFIG_FSE */
 130
 131 /* struct for checkdirs iteration */
 132 struct cdirargs {
 133         vnode_t olddp;
 134         vnode_t newdp;
 135 };
 136 /* callback  for checkdirs iteration */
 137 static int checkdirs_callback(proc_t p, void * arg);
 138
 139 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
 140 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
 141 void enablequotas(struct mount *mp, vfs_context_t ctx);
 142 static int getfsstat_callback(mount_t mp, void * arg);
 143 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
 144 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
 145 static int sync_callback(mount_t, void *);
 146 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 147                         user_addr_t bufp, int *sizep, boolean_t is_64_bit,
 148                                                 boolean_t partial_copy);
 149 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp);
 150 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
 151
 152 __private_extern__
 153 int sync_internal(void);
 154
 155 __private_extern__
 156 int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, register_t *);
 157
 158 __private_extern__
 159 int unlink1(vfs_context_t, struct nameidata *, int);
 160
 161
 162 #ifdef __APPLE_API_OBSOLETE
 163 struct fstatv_args {
 164        int fd;                  /* file descriptor of the target file */
 165        struct vstat *vsb;       /* vstat structure for returned info  */
 166 };
 167 struct lstatv_args {
 168        const char *path;        /* pathname of the target file       */
 169        struct vstat *vsb;       /* vstat structure for returned info */
 170 };
 171 struct mkcomplex_args {
 172         const char *path;       /* pathname of the file to be created */
 173                 mode_t mode;            /* access mode for the newly created file */
 174         u_long type;            /* format of the complex file */
 175 };
 176 struct statv_args {
 177         const char *path;       /* pathname of the target file       */
 178         struct vstat *vsb;      /* vstat structure for returned info */
 179 };
 180
 181 int fstatv(proc_t p, struct fstatv_args *uap, register_t *retval);
 182 int lstatv(proc_t p, struct lstatv_args *uap, register_t *retval);
 183 int mkcomplex(proc_t p, struct mkcomplex_args *uap, register_t *retval);
 184 int statv(proc_t p, struct statv_args *uap, register_t *retval);
 185
 186 #endif /* __APPLE_API_OBSOLETE */
 187
 188 /*
 189  * incremented each time a mount or unmount operation occurs
 190  * used to invalidate the cached value of the rootvp in the
 191  * mount structure utilized by cache_lookup_path
 192  */
 193 int mount_generation = 0;
 194
 195 /* counts number of mount and unmount operations */
 196 unsigned int vfs_nummntops=0;
 197
 198 extern struct fileops vnops;
 199 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
 200
 201
 202 /*
 203  * Virtual File System System Calls
 204  */
 205
 206 /*
 207  * Mount a file system.
 208  */
 209 /* ARGSUSED */
 210 int
 211 mount(proc_t p, struct mount_args *uap, __unused register_t *retval)
 212 {
 213         struct __mac_mount_args muap;
 214
 215         muap.type = uap->type;
 216         muap.path = uap->path;
 217         muap.flags = uap->flags;
 218         muap.data = uap->data;
 219         muap.mac_p = USER_ADDR_NULL;
 220         return (__mac_mount(p, &muap, retval));
 221 }
 222
 223 int
 224 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused register_t *retval)
 225 {
 226         struct vnode *vp;
 227         struct vnode *devvp = NULLVP;
 228         struct vnode *device_vnode = NULLVP;
 229 #if CONFIG_MACF
 230         struct vnode *rvp;
 231 #endif
 232         struct mount *mp;
 233         struct vfstable *vfsp = (struct vfstable *)0;
 234         int error, flag = 0;
 235         struct vnode_attr va;
 236         vfs_context_t ctx = vfs_context_current();
 237         struct nameidata nd;
 238         struct nameidata nd1;
 239         char fstypename[MFSNAMELEN];
 240         size_t dummy=0;
 241         user_addr_t devpath = USER_ADDR_NULL;
 242         user_addr_t fsmountargs =  uap->data;
 243         int ronly = 0;
 244         int mntalloc = 0;
 245         mode_t accessmode;
 246         boolean_t is_64bit;
 247         boolean_t is_rwlock_locked = FALSE;
 248
 249         AUDIT_ARG(fflags, uap->flags);
 250
 251         is_64bit = proc_is64bit(p);
 252
 253         /*
 254          * Get vnode to be covered
 255          */
 256         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
 257                    UIO_USERSPACE, uap->path, ctx);
 258         error = namei(&nd);
 259         if (error)
 260                 return (error);
 261         vp = nd.ni_vp;
 262
 263         if ((vp->v_flag & VROOT) &&
 264                 (vp->v_mount->mnt_flag & MNT_ROOTFS))
 265                         uap->flags |= MNT_UPDATE;
 266
 267         error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
 268         if (error)
 269                 goto out1;
 270
 271         if (uap->flags & MNT_UPDATE) {
 272                 if ((vp->v_flag & VROOT) == 0) {
 273                         error = EINVAL;
 274                         goto out1;
 275                 }
 276                 mp = vp->v_mount;
 277
 278                 /* unmount in progress return error */
 279                 mount_lock(mp);
 280                 if (mp->mnt_lflag & MNT_LUNMOUNT) {
 281                         mount_unlock(mp);
 282                         error = EBUSY;
 283                         goto out1;
 284                 }
 285                 mount_unlock(mp);
 286                 lck_rw_lock_exclusive(&mp->mnt_rwlock);
 287                 is_rwlock_locked = TRUE;
 288                 /*
 289                  * We only allow the filesystem to be reloaded if it
 290                  * is currently mounted read-only.
 291                  */
 292                 if ((uap->flags & MNT_RELOAD) &&
 293                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
 294                         error = ENOTSUP;
 295                         goto out1;
 296                 }
 297                 /*
 298                  * Only root, or the user that did the original mount is
 299                  * permitted to update it.
 300                  */
 301                 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 302                     (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
 303                         goto out1;
 304                 }
 305 #if CONFIG_MACF
 306                 error = mac_mount_check_remount(ctx, mp);
 307                 if (error != 0) {
 308                         lck_rw_done(&mp->mnt_rwlock);
 309                         goto out1;
 310                 }
 311 #endif
 312                 /*
 313                  * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
 314                  * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
 315                  */
 316                 if (suser(vfs_context_ucred(ctx), NULL)) {
 317                         uap->flags |= MNT_NOSUID | MNT_NODEV;
 318                         if (mp->mnt_flag & MNT_NOEXEC)
 319                                 uap->flags |= MNT_NOEXEC;
 320                 }
 321                 flag = mp->mnt_flag;
 322
 323                 mp->mnt_flag |=
 324                     uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
 325
 326                 vfsp = mp->mnt_vtable;
 327                 goto update;
 328         }
 329         /*
 330          * If the user is not root, ensure that they own the directory
 331          * onto which we are attempting to mount.
 332          */
 333         VATTR_INIT(&va);
 334         VATTR_WANTED(&va, va_uid);
 335         if ((error = vnode_getattr(vp, &va, ctx)) ||
 336             (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 337              (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))) {
 338                 goto out1;
 339         }
 340         /*
 341          * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
 342          * MNT_NOEXEC if mount point is already MNT_NOEXEC.
 343          */
 344         if (suser(vfs_context_ucred(ctx), NULL)) {
 345                 uap->flags |= MNT_NOSUID | MNT_NODEV;
 346                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
 347                         uap->flags |= MNT_NOEXEC;
 348         }
 349         if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
 350                 goto out1;
 351
 352         if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
 353                 goto out1;
 354
 355         if (vp->v_type != VDIR) {
 356                 error = ENOTDIR;
 357                 goto out1;
 358         }
 359
 360         /* XXXAUDIT: Should we capture the type on the error path as well? */
 361         AUDIT_ARG(text, fstypename);
 362         mount_list_lock();
 363         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 364                 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN))
 365                         break;
 366         mount_list_unlock();
 367         if (vfsp == NULL) {
 368                 error = ENODEV;
 369                 goto out1;
 370         }
 371 #if CONFIG_MACF
 372         error = mac_mount_check_mount(ctx, vp,
 373             &nd.ni_cnd, vfsp->vfc_name);
 374         if (error != 0)
 375                 goto out1;
 376 #endif
 377         if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
 378                 error = EBUSY;
 379                 goto out1;
 380         }
 381         vnode_lock_spin(vp);
 382         SET(vp->v_flag, VMOUNT);
 383         vnode_unlock(vp);
 384
 385         /*
 386          * Allocate and initialize the filesystem.
 387          */
 388         MALLOC_ZONE(mp, struct mount *, (u_long)sizeof(struct mount),
 389                 M_MOUNT, M_WAITOK);
 390         bzero((char *)mp, (u_long)sizeof(struct mount));
 391         mntalloc = 1;
 392
 393         /* Initialize the default IO constraints */
 394         mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 395         mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 396         mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
 397         mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
 398         mp->mnt_devblocksize = DEV_BSIZE;
 399         mp->mnt_alignmentmask = PAGE_MASK;
 400         mp->mnt_ioflags = 0;
 401         mp->mnt_realrootvp = NULLVP;
 402         mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
 403
 404         TAILQ_INIT(&mp->mnt_vnodelist);
 405         TAILQ_INIT(&mp->mnt_workerqueue);
 406         TAILQ_INIT(&mp->mnt_newvnodes);
 407         mount_lock_init(mp);
 408         lck_rw_lock_exclusive(&mp->mnt_rwlock);
 409         is_rwlock_locked = TRUE;
 410         mp->mnt_op = vfsp->vfc_vfsops;
 411         mp->mnt_vtable = vfsp;
 412         mount_list_lock();
 413         vfsp->vfc_refcount++;
 414         mount_list_unlock();
 415         //mp->mnt_stat.f_type = vfsp->vfc_typenum;
 416         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 417         strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
 418         strncpy(mp->mnt_vfsstat.f_mntonname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
 419         mp->mnt_vnodecovered = vp;
 420         mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
 421
 422         /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
 423         vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
 424
 425 update:
 426         /*
 427          * Set the mount level flags.
 428          */
 429         if (uap->flags & MNT_RDONLY)
 430                 mp->mnt_flag |= MNT_RDONLY;
 431         else if (mp->mnt_flag & MNT_RDONLY)
 432                 mp->mnt_kern_flag |= MNTK_WANTRDWR;
 433         mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 434                           MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 435                           MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
 436                           MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
 437         mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 438                                       MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 439                                       MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
 440                                           MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
 441
 442 #if CONFIG_MACF
 443         if (uap->flags & MNT_MULTILABEL) {
 444                 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
 445                         error = EINVAL;
 446                         goto out1;
 447                 }
 448                 mp->mnt_flag |= MNT_MULTILABEL;
 449         }
 450 #endif
 451
 452         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
 453                 if (is_64bit) {
 454                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
 455                                 goto out1;
 456                         fsmountargs += sizeof(devpath);
 457                 } else {
 458                         char *tmp;
 459                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
 460                                 goto out1;
 461                         /* munge into LP64 addr */
 462                         devpath = CAST_USER_ADDR_T(tmp);
 463                         fsmountargs += sizeof(tmp);
 464                 }
 465
 466                 /* if it is not update and device name needs to be parsed */
 467                 if ((devpath)) {
 468                         NDINIT(&nd1, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
 469                         if ( (error = namei(&nd1)) )
 470                                 goto out1;
 471
 472                         strncpy(mp->mnt_vfsstat.f_mntfromname, nd1.ni_cnd.cn_pnbuf, MAXPATHLEN);
 473                         devvp = nd1.ni_vp;
 474
 475                         nameidone(&nd1);
 476
 477                         if (devvp->v_type != VBLK) {
 478                                 error = ENOTBLK;
 479                                 goto out2;
 480                         }
 481                         if (major(devvp->v_rdev) >= nblkdev) {
 482                                 error = ENXIO;
 483                                 goto out2;
 484                         }
 485                         /*
 486                         * If mount by non-root, then verify that user has necessary
 487                         * permissions on the device.
 488                         */
 489                         if (suser(vfs_context_ucred(ctx), NULL) != 0) {
 490                                 accessmode = KAUTH_VNODE_READ_DATA;
 491                                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 492                                         accessmode |= KAUTH_VNODE_WRITE_DATA;
 493                                 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
 494                                         goto out2;
 495                         }
 496                 }
 497                 if (devpath && ((uap->flags & MNT_UPDATE) == 0)) {
 498                         if ( (error = vnode_ref(devvp)) )
 499                                 goto out2;
 500                         /*
 501                         * Disallow multiple mounts of the same device.
 502                         * Disallow mounting of a device that is currently in use
 503                         * (except for root, which might share swap device for miniroot).
 504                         * Flush out any old buffers remaining from a previous use.
 505                         */
 506                         if ( (error = vfs_mountedon(devvp)) )
 507                                 goto out3;
 508
 509                         if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
 510                                 error = EBUSY;
 511                                 goto out3;
 512                         }
 513                         if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
 514                                 error = ENOTBLK;
 515                                 goto out3;
 516                         }
 517                         if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
 518                                 goto out3;
 519
 520                         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 521 #if CONFIG_MACF
 522                         error = mac_vnode_check_open(ctx,
 523                             devvp,
 524                             ronly ? FREAD : FREAD|FWRITE);
 525                         if (error)
 526                                 goto out3;
 527 #endif /* MAC */
 528                         if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
 529                                 goto out3;
 530
 531                         mp->mnt_devvp = devvp;
 532                         device_vnode = devvp;
 533                 } else {
 534                         if ((mp->mnt_flag & MNT_RDONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 535                                 /*
 536                                  * If upgrade to read-write by non-root, then verify
 537                                  * that user has necessary permissions on the device.
 538                                  */
 539                                 device_vnode = mp->mnt_devvp;
 540                                 if (device_vnode && suser(vfs_context_ucred(ctx), NULL)) {
 541                                         if ((error = vnode_authorize(device_vnode, NULL,
 542                                                  KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0)
 543                                                 goto out2;
 544                                 }
 545                         }
 546                         device_vnode = NULLVP;
 547                 }
 548         }
 549 #if CONFIG_MACF
 550         if ((uap->flags & MNT_UPDATE) == 0) {
 551                 mac_mount_label_init(mp);
 552                 mac_mount_label_associate(ctx, mp);
 553         }
 554         if (uap->mac_p != USER_ADDR_NULL) {
 555                 struct user_mac mac;
 556                 char *labelstr = NULL;
 557                 size_t ulen = 0;
 558
 559                 if ((uap->flags & MNT_UPDATE) != 0) {
 560                         error = mac_mount_check_label_update(
 561                             ctx, mp);
 562                         if (error != 0)
 563                                 goto out3;
 564                 }
 565                 if (is_64bit) {
 566                         error = copyin(uap->mac_p, &mac, sizeof(mac));
 567                 } else {
 568                         struct mac mac32;
 569                         error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 570                         mac.m_buflen = mac32.m_buflen;
 571                         mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
 572                 }
 573                 if (error != 0)
 574                         goto out3;
 575                 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
 576                     (mac.m_buflen < 2)) {
 577                         error = EINVAL;
 578                         goto out3;
 579                 }
 580                 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
 581                 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
 582                 if (error != 0) {
 583                         FREE(labelstr, M_MACTEMP);
 584                         goto out3;
 585                 }
 586                 AUDIT_ARG(mac_string, labelstr);
 587                 error = mac_mount_label_internalize(mp->mnt_mntlabel, labelstr);
 588                 FREE(labelstr, M_MACTEMP);
 589                 if (error != 0)
 590                         goto out3;
 591         }
 592 #endif
 593         /*
 594          * Mount the filesystem.
 595          */
 596         error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
 597
 598         if (uap->flags & MNT_UPDATE) {
 599                 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 600                         mp->mnt_flag &= ~MNT_RDONLY;
 601                 mp->mnt_flag &=~
 602                     (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
 603                 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 604                 if (error)
 605                         mp->mnt_flag = flag;
 606                 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
 607                 lck_rw_done(&mp->mnt_rwlock);
 608                 is_rwlock_locked = FALSE;
 609                 if (!error)
 610                         enablequotas(mp, ctx);
 611                 goto out2;
 612         }
 613         /*
 614          * Put the new filesystem on the mount list after root.
 615          */
 616         if (error == 0) {
 617                 struct vfs_attr vfsattr;
 618 #if CONFIG_MACF
 619                 if (vfs_flags(mp) & MNT_MULTILABEL) {
 620                         error = VFS_ROOT(mp, &rvp, ctx);
 621                         if (error) {
 622                                 printf("%s() VFS_ROOT returned %d\n", __func__, error);
 623                                 goto out3;
 624                         }
 625
 626                         /* VFS_ROOT provides reference so needref = 0 */
 627                         error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
 628                         if (error)
 629                                 goto out3;
 630                 }
 631 #endif  /* MAC */
 632
 633                 vnode_lock_spin(vp);
 634                 CLR(vp->v_flag, VMOUNT);
 635                 vp->v_mountedhere = mp;
 636                 vnode_unlock(vp);
 637
 638                 /*
 639                  * taking the name_cache_lock exclusively will
 640                  * insure that everyone is out of the fast path who
 641                  * might be trying to use a now stale copy of
 642                  * vp->v_mountedhere->mnt_realrootvp
 643                  * bumping mount_generation causes the cached values
 644                  * to be invalidated
 645                  */
 646                 name_cache_lock();
 647                 mount_generation++;
 648                 name_cache_unlock();
 649
 650                 vnode_ref(vp);
 651
 652                 error = checkdirs(vp, ctx);
 653                 if (error != 0)  {
 654                         /* Unmount the filesystem as cdir/rdirs cannot be updated */
 655                         goto out4;
 656                 }
 657                 /*
 658                  * there is no cleanup code here so I have made it void
 659                  * we need to revisit this
 660                  */
 661                 (void)VFS_START(mp, 0, ctx);
 662
 663                 mount_list_add(mp);
 664                 lck_rw_done(&mp->mnt_rwlock);
 665                 is_rwlock_locked = FALSE;
 666
 667                 /* Check if this mounted file system supports EAs or named streams. */
 668                 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
 669                 VFSATTR_INIT(&vfsattr);
 670                 VFSATTR_WANTED(&vfsattr, f_capabilities);
 671                 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
 672                     vfs_getattr(mp, &vfsattr, ctx) == 0 &&
 673                     VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
 674                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
 675                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
 676                                 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 677                         }
 678 #if NAMEDSTREAMS
 679                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
 680                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
 681                                 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
 682                         }
 683 #endif
 684                         /* Check if this file system supports path from id lookups. */
 685                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
 686                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
 687                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 688                         } else if (mp->mnt_flag & MNT_DOVOLFS) {
 689                                 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
 690                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 691                         }
 692                 }
 693                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
 694                         mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 695                 }
 696                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
 697                         mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
 698                 }
 699                 /* increment the operations count */
 700                 OSAddAtomic(1, (SInt32 *)&vfs_nummntops);
 701                 enablequotas(mp, ctx);
 702
 703                 if (device_vnode) {
 704                         device_vnode->v_specflags |= SI_MOUNTEDON;
 705
 706                         /*
 707                          *   cache the IO attributes for the underlying physical media...
 708                          *   an error return indicates the underlying driver doesn't
 709                          *   support all the queries necessary... however, reasonable
 710                          *   defaults will have been set, so no reason to bail or care
 711                          */
 712                         vfs_init_io_attributes(device_vnode, mp);
 713                 }
 714
 715                 /* Now that mount is setup, notify the listeners */
 716                 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
 717         } else {
 718                 vnode_lock_spin(vp);
 719                 CLR(vp->v_flag, VMOUNT);
 720                 vnode_unlock(vp);
 721                 mount_list_lock();
 722                 mp->mnt_vtable->vfc_refcount--;
 723                 mount_list_unlock();
 724
 725                 if (device_vnode ) {
 726                         VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
 727                         vnode_rele(device_vnode);
 728                 }
 729                 lck_rw_done(&mp->mnt_rwlock);
 730                 is_rwlock_locked = FALSE;
 731                 mount_lock_destroy(mp);
 732 #if CONFIG_MACF
 733                 mac_mount_label_destroy(mp);
 734 #endif
 735                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 736         }
 737         nameidone(&nd);
 738
 739         /*
 740          * drop I/O count on covered 'vp' and
 741          * on the device vp if there was one
 742          */
 743         if (devpath && devvp)
 744                 vnode_put(devvp);
 745         vnode_put(vp);
 746
 747         return(error);
 748 out4:
 749         (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
 750         if (device_vnode != NULLVP) {
 751                 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
 752                        ctx);
 753
 754         }
 755         vnode_lock_spin(vp);
 756         vp->v_mountedhere = (mount_t) 0;
 757         vnode_unlock(vp);
 758         vnode_rele(vp);
 759 out3:
 760         if (devpath && ((uap->flags & MNT_UPDATE) == 0))
 761                 vnode_rele(devvp);
 762 out2:
 763         if (devpath && devvp)
 764                 vnode_put(devvp);
 765 out1:
 766         /* Release mnt_rwlock only when it was taken */
 767         if (is_rwlock_locked == TRUE) {
 768                 lck_rw_done(&mp->mnt_rwlock);
 769         }
 770         if (mntalloc) {
 771 #if CONFIG_MACF
 772                 mac_mount_label_destroy(mp);
 773 #endif
 774                 mount_list_lock();
 775                 vfsp->vfc_refcount--;
 776                 mount_list_unlock();
 777                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 778         }
 779         vnode_put(vp);
 780         nameidone(&nd);
 781
 782         return(error);
 783 }
 784
 785 void
 786 enablequotas(struct mount *mp, vfs_context_t ctx)
 787 {
 788         struct nameidata qnd;
 789         int type;
 790         char qfpath[MAXPATHLEN];
 791         const char *qfname = QUOTAFILENAME;
 792         const char *qfopsname = QUOTAOPSNAME;
 793         const char *qfextension[] = INITQFNAMES;
 794
 795         /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
 796         if ((strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 )
 797                 && (strncmp( mp->mnt_vfsstat.f_fstypename, "ufs", sizeof("ufs")) != 0))
 798           return;
 799
 800         /*
 801          * Enable filesystem disk quotas if necessary.
 802          * We ignore errors as this should not interfere with final mount
 803          */
 804         for (type=0; type < MAXQUOTAS; type++) {
 805                 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
 806                 NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE32, CAST_USER_ADDR_T(qfpath), ctx);
 807                 if (namei(&qnd) != 0)
 808                         continue;           /* option file to trigger quotas is not present */
 809                 vnode_put(qnd.ni_vp);
 810                 nameidone(&qnd);
 811                 snprintf(qfpath, sizeof(qfpath),  "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
 812
 813                 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
 814         }
 815         return;
 816 }
 817
 818
 819 static int
 820 checkdirs_callback(proc_t p, void * arg)
 821 {
 822         struct cdirargs * cdrp = (struct cdirargs * )arg;
 823         vnode_t olddp = cdrp->olddp;
 824         vnode_t newdp = cdrp->newdp;
 825         struct filedesc *fdp;
 826         vnode_t tvp;
 827         vnode_t fdp_cvp;
 828         vnode_t fdp_rvp;
 829         int cdir_changed = 0;
 830         int rdir_changed = 0;
 831
 832         /*
 833          * XXX Also needs to iterate each thread in the process to see if it
 834          * XXX is using a per-thread current working directory, and, if so,
 835          * XXX update that as well.
 836          */
 837
 838         proc_fdlock(p);
 839         fdp = p->p_fd;
 840         if (fdp == (struct filedesc *)0) {
 841                 proc_fdunlock(p);
 842                 return(PROC_RETURNED);
 843         }
 844         fdp_cvp = fdp->fd_cdir;
 845         fdp_rvp = fdp->fd_rdir;
 846         proc_fdunlock(p);
 847
 848         if (fdp_cvp == olddp) {
 849                 vnode_ref(newdp);
 850                 tvp = fdp->fd_cdir;
 851                 fdp_cvp = newdp;
 852                 cdir_changed = 1;
 853                 vnode_rele(tvp);
 854         }
 855         if (fdp_rvp == olddp) {
 856                 vnode_ref(newdp);
 857                 tvp = fdp->fd_rdir;
 858                 fdp_rvp = newdp;
 859                 rdir_changed = 1;
 860                 vnode_rele(tvp);
 861         }
 862         if (cdir_changed || rdir_changed) {
 863                 proc_fdlock(p);
 864                 fdp->fd_cdir = fdp_cvp;
 865                 fdp->fd_rdir = fdp_rvp;
 866                 proc_fdunlock(p);
 867         }
 868         return(PROC_RETURNED);
 869 }
 870
 871
 872
 873 /*
 874  * Scan all active processes to see if any of them have a current
 875  * or root directory onto which the new filesystem has just been
 876  * mounted. If so, replace them with the new mount point.
 877  */
 878 static int
 879 checkdirs(vnode_t olddp, vfs_context_t ctx)
 880 {
 881         vnode_t newdp;
 882         vnode_t tvp;
 883         int err;
 884         struct cdirargs cdr;
 885         struct uthread * uth = get_bsdthread_info(current_thread());
 886
 887         if (olddp->v_usecount == 1)
 888                 return(0);
 889         if (uth != (struct uthread *)0)
 890                 uth->uu_notrigger = 1;
 891         err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
 892         if (uth != (struct uthread *)0)
 893                 uth->uu_notrigger = 0;
 894
 895         if (err != 0) {
 896 #if DIAGNOSTIC
 897                 panic("mount: lost mount: error %d", err);
 898 #endif
 899                 return(err);
 900         }
 901
 902         cdr.olddp = olddp;
 903         cdr.newdp = newdp;
 904         /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
 905         proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
 906
 907         if (rootvnode == olddp) {
 908                 vnode_ref(newdp);
 909                 tvp = rootvnode;
 910                 rootvnode = newdp;
 911                 vnode_rele(tvp);
 912         }
 913
 914         vnode_put(newdp);
 915         return(0);
 916 }
 917
 918 /*
 919  * Unmount a file system.
 920  *
 921  * Note: unmount takes a path to the vnode mounted on as argument,
 922  * not special file (as before).
 923  */
 924 /* ARGSUSED */
 925 int
 926 unmount(__unused proc_t p, struct unmount_args *uap, __unused register_t *retval)
 927 {
 928         vnode_t vp;
 929         struct mount *mp;
 930         int error;
 931         struct nameidata nd;
 932         vfs_context_t ctx = vfs_context_current();
 933
 934         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
 935                 UIO_USERSPACE, uap->path, ctx);
 936         error = namei(&nd);
 937         if (error)
 938                 return (error);
 939         vp = nd.ni_vp;
 940         mp = vp->v_mount;
 941         nameidone(&nd);
 942
 943 #if CONFIG_MACF
 944         error = mac_mount_check_umount(ctx, mp);
 945         if (error != 0) {
 946                 vnode_put(vp);
 947                 return (error);
 948         }
 949 #endif
 950         /*
 951          * Must be the root of the filesystem
 952          */
 953         if ((vp->v_flag & VROOT) == 0) {
 954                 vnode_put(vp);
 955                 return (EINVAL);
 956         }
 957         mount_ref(mp, 0);
 958         vnode_put(vp);
 959         /* safedounmount consumes the mount ref */
 960         return (safedounmount(mp, uap->flags, ctx));
 961 }
 962
 963 int
 964 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
 965 {
 966         mount_t mp;
 967
 968         mp = mount_list_lookupby_fsid(fsid, 0, 1);
 969         if (mp == (mount_t)0) {
 970                 return(ENOENT);
 971         }
 972         mount_ref(mp, 0);
 973         mount_iterdrop(mp);
 974         /* safedounmount consumes the mount ref */
 975         return(safedounmount(mp, flags, ctx));
 976 }
 977
 978
 979 /*
 980  * The mount struct comes with a mount ref which will be consumed.
 981  * Do the actual file system unmount, prevent some common foot shooting.
 982  */
 983 int
 984 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
 985 {
 986         int error;
 987         proc_t p = vfs_context_proc(ctx);
 988
 989         /*
 990          * Only root, or the user that did the original mount is
 991          * permitted to unmount this filesystem.
 992          */
 993         if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
 994             (error = suser(kauth_cred_get(), &p->p_acflag)))
 995                 goto out;
 996
 997         /*
 998          * Don't allow unmounting the root file system.
 999          */
1000         if (mp->mnt_flag & MNT_ROOTFS) {
1001                 error = EBUSY; /* the root is always busy */
1002                 goto out;
1003         }
1004
1005         return (dounmount(mp, flags, 1, ctx));
1006
1007 out:
1008         mount_drop(mp, 0);
1009         return(error);
1010 }
1011
1012 /*
1013  * Do the actual file system unmount.
1014  */
1015 int
1016 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1017 {
1018         vnode_t coveredvp = (vnode_t)0;
1019         int error;
1020         int needwakeup = 0;
1021         int forcedunmount = 0;
1022         int lflags = 0;
1023
1024         if (flags & MNT_FORCE)
1025                 forcedunmount = 1;
1026         mount_lock(mp);
1027         /* XXX post jaguar fix LK_DRAIN - then clean this up */
1028         if ((flags & MNT_FORCE)) {
1029                 mp->mnt_kern_flag |= MNTK_FRCUNMOUNT;
1030                 mp->mnt_lflag |= MNT_LFORCE;
1031         }
1032         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1033                 mp->mnt_lflag |= MNT_LWAIT;
1034                 if(withref != 0)
1035                         mount_drop(mp, 1);
1036                 msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL);
1037                 /*
1038                  * The prior unmount attempt has probably succeeded.
1039                  * Do not dereference mp here - returning EBUSY is safest.
1040                  */
1041                 return (EBUSY);
1042         }
1043         mp->mnt_kern_flag |= MNTK_UNMOUNT;
1044         mp->mnt_lflag |= MNT_LUNMOUNT;
1045         mp->mnt_flag &=~ MNT_ASYNC;
1046         /*
1047          * anyone currently in the fast path that
1048          * trips over the cached rootvp will be
1049          * dumped out and forced into the slow path
1050          * to regenerate a new cached value
1051          */
1052         mp->mnt_realrootvp = NULLVP;
1053         mount_unlock(mp);
1054
1055         /*
1056          * taking the name_cache_lock exclusively will
1057          * insure that everyone is out of the fast path who
1058          * might be trying to use a now stale copy of
1059          * vp->v_mountedhere->mnt_realrootvp
1060          * bumping mount_generation causes the cached values
1061          * to be invalidated
1062          */
1063         name_cache_lock();
1064         mount_generation++;
1065         name_cache_unlock();
1066
1067
1068         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1069         if (withref != 0)
1070                 mount_drop(mp, 0);
1071 #if CONFIG_FSE
1072         fsevent_unmount(mp);  /* has to come first! */
1073 #endif
1074         error = 0;
1075         if (forcedunmount == 0) {
1076                 ubc_umount(mp); /* release cached vnodes */
1077                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1078                         error = VFS_SYNC(mp, MNT_WAIT, ctx);
1079                         if (error) {
1080                                 mount_lock(mp);
1081                                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1082                                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1083                                 mp->mnt_lflag &= ~MNT_LFORCE;
1084                                 goto out;
1085                         }
1086                 }
1087         }
1088
1089         if (forcedunmount)
1090                 lflags |= FORCECLOSE;
1091         error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM  | SKIPROOT | lflags);
1092         if ((forcedunmount == 0) && error) {
1093                 mount_lock(mp);
1094                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1095                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1096                 mp->mnt_lflag &= ~MNT_LFORCE;
1097                 goto out;
1098         }
1099
1100         /* make sure there are no one in the mount iterations or lookup */
1101         mount_iterdrain(mp);
1102
1103         error = VFS_UNMOUNT(mp, flags, ctx);
1104         if (error) {
1105                 mount_iterreset(mp);
1106                 mount_lock(mp);
1107                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1108                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1109                 mp->mnt_lflag &= ~MNT_LFORCE;
1110                 goto out;
1111         }
1112
1113         /* increment the operations count */
1114         if (!error)
1115                 OSAddAtomic(1, (SInt32 *)&vfs_nummntops);
1116
1117         if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1118                 mp->mnt_devvp->v_specflags &= ~SI_MOUNTEDON;
1119                 VNOP_CLOSE(mp->mnt_devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1120                        ctx);
1121                 vnode_rele(mp->mnt_devvp);
1122         }
1123         lck_rw_done(&mp->mnt_rwlock);
1124         mount_list_remove(mp);
1125         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1126
1127         /* mark the mount point hook in the vp but not drop the ref yet */
1128         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1129                         vnode_getwithref(coveredvp);
1130                         vnode_lock_spin(coveredvp);
1131                         coveredvp->v_mountedhere = (struct mount *)0;
1132                         vnode_unlock(coveredvp);
1133                         vnode_put(coveredvp);
1134         }
1135
1136         mount_list_lock();
1137         mp->mnt_vtable->vfc_refcount--;
1138         mount_list_unlock();
1139
1140         cache_purgevfs(mp);     /* remove cache entries for this file sys */
1141         vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1142         mount_lock(mp);
1143         mp->mnt_lflag |= MNT_LDEAD;
1144
1145         if (mp->mnt_lflag & MNT_LWAIT) {
1146                 /*
1147                  * do the wakeup here
1148                  * in case we block in mount_refdrain
1149                  * which will drop the mount lock
1150                  * and allow anyone blocked in vfs_busy
1151                  * to wakeup and see the LDEAD state
1152                  */
1153                 mp->mnt_lflag &= ~MNT_LWAIT;
1154                 wakeup((caddr_t)mp);
1155         }
1156         mount_refdrain(mp);
1157 out:
1158         if (mp->mnt_lflag & MNT_LWAIT) {
1159                 mp->mnt_lflag &= ~MNT_LWAIT;
1160                 needwakeup = 1;
1161         }
1162         mount_unlock(mp);
1163         lck_rw_done(&mp->mnt_rwlock);
1164
1165         if (needwakeup)
1166                 wakeup((caddr_t)mp);
1167         if (!error) {
1168                 if ((coveredvp != NULLVP)) {
1169                         vnode_getwithref(coveredvp);
1170                         vnode_rele(coveredvp);
1171                         vnode_lock_spin(coveredvp);
1172                         if(mp->mnt_crossref == 0) {
1173                                 vnode_unlock(coveredvp);
1174                                 mount_lock_destroy(mp);
1175 #if CONFIG_MACF
1176                                 mac_mount_label_destroy(mp);
1177 #endif
1178                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1179                         }  else {
1180                                 coveredvp->v_lflag |= VL_MOUNTDEAD;
1181                                 vnode_unlock(coveredvp);
1182                         }
1183                         vnode_put(coveredvp);
1184                 } else if (mp->mnt_flag & MNT_ROOTFS) {
1185                                 mount_lock_destroy(mp);
1186 #if CONFIG_MACF
1187                                 mac_mount_label_destroy(mp);
1188 #endif
1189                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1190                 } else
1191                         panic("dounmount: no coveredvp");
1192         }
1193         return (error);
1194 }
1195
1196 void
1197 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
1198 {
1199                 vnode_lock(dp);
1200                 mp->mnt_crossref--;
1201                 if (mp->mnt_crossref < 0)
1202                         panic("mount cross refs -ve");
1203                 if (((dp->v_lflag & VL_MOUNTDEAD) == VL_MOUNTDEAD) && (mp->mnt_crossref == 0)) {
1204                         dp->v_lflag &= ~VL_MOUNTDEAD;
1205                         if (need_put)
1206                                 vnode_put_locked(dp);
1207                         vnode_unlock(dp);
1208                         mount_lock_destroy(mp);
1209 #if CONFIG_MACF
1210                         mac_mount_label_destroy(mp);
1211 #endif
1212                         FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1213                         return;
1214                 }
1215                 if (need_put)
1216                         vnode_put_locked(dp);
1217                 vnode_unlock(dp);
1218 }
1219
1220
1221 /*
1222  * Sync each mounted filesystem.
1223  */
1224 #if DIAGNOSTIC
1225 int syncprt = 0;
1226 struct ctldebug debug0 = { "syncprt", &syncprt };
1227 #endif
1228
1229 int print_vmpage_stat=0;
1230
1231 static int
1232 sync_callback(mount_t mp, __unused void * arg)
1233 {
1234         int asyncflag;
1235
1236         if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1237                         asyncflag = mp->mnt_flag & MNT_ASYNC;
1238                         mp->mnt_flag &= ~MNT_ASYNC;
1239                         VFS_SYNC(mp, MNT_NOWAIT, vfs_context_current());
1240                         if (asyncflag)
1241                                 mp->mnt_flag |= MNT_ASYNC;
1242         }
1243         return(VFS_RETURNED);
1244 }
1245
1246
1247 extern unsigned int vp_pagein, vp_pgodirty, vp_pgoclean;
1248 extern unsigned int dp_pgins, dp_pgouts;
1249
1250 /* ARGSUSED */
1251 int
1252 sync(__unused proc_t p, __unused struct sync_args *uap, __unused register_t *retval)
1253 {
1254
1255         vfs_iterate(LK_NOWAIT, sync_callback, (void *)0);
1256         {
1257         if(print_vmpage_stat) {
1258                 vm_countdirtypages();
1259                 printf("VP: %d: %d: %d: %d: %d\n", vp_pgodirty, vp_pgoclean, vp_pagein,
1260                         dp_pgins, dp_pgouts);
1261         }
1262         }
1263 #if DIAGNOSTIC
1264         if (syncprt)
1265                 vfs_bufstats();
1266 #endif /* DIAGNOSTIC */
1267         return (0);
1268 }
1269
1270 /*
1271  * Change filesystem quotas.
1272  */
1273 #if QUOTA
1274 static int quotactl_funneled(proc_t p, struct quotactl_args *uap, register_t *retval);
1275
1276 int
1277 quotactl(proc_t p, struct quotactl_args *uap, register_t *retval)
1278 {
1279         boolean_t funnel_state;
1280         int error;
1281
1282         funnel_state = thread_funnel_set(kernel_flock, TRUE);
1283         error = quotactl_funneled(p, uap, retval);
1284         thread_funnel_set(kernel_flock, funnel_state);
1285         return(error);
1286 }
1287
1288 static int
1289 quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused register_t *retval)
1290 {
1291         struct mount *mp;
1292         int error, quota_cmd, quota_status;
1293         caddr_t datap;
1294         size_t fnamelen;
1295         struct nameidata nd;
1296         vfs_context_t ctx = vfs_context_current();
1297         struct dqblk my_dqblk;
1298
1299         AUDIT_ARG(uid, uap->uid, 0, 0, 0);
1300         AUDIT_ARG(cmd, uap->cmd);
1301         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1302                 UIO_USERSPACE, uap->path, ctx);
1303         error = namei(&nd);
1304         if (error)
1305                 return (error);
1306         mp = nd.ni_vp->v_mount;
1307         vnode_put(nd.ni_vp);
1308         nameidone(&nd);
1309
1310         /* copyin any data we will need for downstream code */
1311         quota_cmd = uap->cmd >> SUBCMDSHIFT;
1312
1313         switch (quota_cmd) {
1314         case Q_QUOTAON:
1315                 /* uap->arg specifies a file from which to take the quotas */
1316                 fnamelen = MAXPATHLEN;
1317                 datap = kalloc(MAXPATHLEN);
1318                 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
1319                 break;
1320         case Q_GETQUOTA:
1321                 /* uap->arg is a pointer to a dqblk structure. */
1322                 datap = (caddr_t) &my_dqblk;
1323                 break;
1324         case Q_SETQUOTA:
1325         case Q_SETUSE:
1326                 /* uap->arg is a pointer to a dqblk structure. */
1327                 datap = (caddr_t) &my_dqblk;
1328                 if (proc_is64bit(p)) {
1329                         struct user_dqblk       my_dqblk64;
1330                         error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
1331                         if (error == 0) {
1332                                 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
1333                         }
1334                 }
1335                 else {
1336                         error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
1337                 }
1338                 break;
1339         case Q_QUOTASTAT:
1340                 /* uap->arg is a pointer to an integer */
1341                 datap = (caddr_t) &quota_status;
1342                 break;
1343         default:
1344                 datap = NULL;
1345                 break;
1346         } /* switch */
1347
1348         if (error == 0) {
1349                 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
1350         }
1351
1352         switch (quota_cmd) {
1353         case Q_QUOTAON:
1354                 if (datap != NULL)
1355                         kfree(datap, MAXPATHLEN);
1356                 break;
1357         case Q_GETQUOTA:
1358                 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
1359                 if (error == 0) {
1360                         if (proc_is64bit(p)) {
1361                                 struct user_dqblk       my_dqblk64;
1362                                 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
1363                                 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
1364                         }
1365                         else {
1366                                 error = copyout(datap, uap->arg, sizeof (struct dqblk));
1367                         }
1368                 }
1369                 break;
1370         case Q_QUOTASTAT:
1371                 /* uap->arg is a pointer to an integer */
1372                 if (error == 0) {
1373                         error = copyout(datap, uap->arg, sizeof(quota_status));
1374                 }
1375                 break;
1376         default:
1377                 break;
1378         } /* switch */
1379
1380         return (error);
1381 }
1382 #else
1383 int
1384 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused register_t *retval)
1385 {
1386         return (EOPNOTSUPP);
1387 }
1388 #endif /* QUOTA */
1389
1390 /*
1391  * Get filesystem statistics.
1392  *
1393  * Returns:     0                       Success
1394  *      namei:???
1395  *      vfs_update_vfsstat:???
1396  *      munge_statfs:EFAULT
1397  */
1398 /* ARGSUSED */
1399 int
1400 statfs(__unused proc_t p, struct statfs_args *uap, __unused register_t *retval)
1401 {
1402         struct mount *mp;
1403         struct vfsstatfs *sp;
1404         int error;
1405         struct nameidata nd;
1406         vfs_context_t ctx = vfs_context_current();
1407         vnode_t vp;
1408
1409         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1410                 UIO_USERSPACE, uap->path, ctx);
1411         error = namei(&nd);
1412         if (error)
1413                 return (error);
1414         vp = nd.ni_vp;
1415         mp = vp->v_mount;
1416         sp = &mp->mnt_vfsstat;
1417         nameidone(&nd);
1418
1419         error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
1420         vnode_put(vp);
1421         if (error != 0)
1422                 return (error);
1423
1424         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1425         return (error);
1426 }
1427
1428 /*
1429  * Get filesystem statistics.
1430  */
1431 /* ARGSUSED */
1432 int
1433 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused register_t *retval)
1434 {
1435         vnode_t vp;
1436         struct mount *mp;
1437         struct vfsstatfs *sp;
1438         int error;
1439
1440         AUDIT_ARG(fd, uap->fd);
1441
1442         if ( (error = file_vnode(uap->fd, &vp)) )
1443                 return (error);
1444
1445         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
1446
1447         mp = vp->v_mount;
1448         if (!mp) {
1449                 file_drop(uap->fd);
1450                 return (EBADF);
1451         }
1452         sp = &mp->mnt_vfsstat;
1453         if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
1454                 file_drop(uap->fd);
1455                 return (error);
1456         }
1457         file_drop(uap->fd);
1458
1459         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1460
1461         return (error);
1462 }
1463
1464 /*
1465  * Common routine to handle copying of statfs64 data to user space
1466  */
1467 static int
1468 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
1469 {
1470         int error;
1471         struct statfs64 sfs;
1472
1473         bzero(&sfs, sizeof(sfs));
1474
1475         sfs.f_bsize = sfsp->f_bsize;
1476         sfs.f_iosize = (int32_t)sfsp->f_iosize;
1477         sfs.f_blocks = sfsp->f_blocks;
1478         sfs.f_bfree = sfsp->f_bfree;
1479         sfs.f_bavail = sfsp->f_bavail;
1480         sfs.f_files = sfsp->f_files;
1481         sfs.f_ffree = sfsp->f_ffree;
1482         sfs.f_fsid = sfsp->f_fsid;
1483         sfs.f_owner = sfsp->f_owner;
1484         sfs.f_type = mp->mnt_vtable->vfc_typenum;
1485         sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1486         sfs.f_fssubtype = sfsp->f_fssubtype;
1487         strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
1488         strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
1489         strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
1490
1491         error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
1492
1493         return(error);
1494 }
1495
1496 /*
1497  * Get file system statistics in 64-bit mode
1498  */
1499 int
1500 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused register_t *retval)
1501 {
1502         struct mount *mp;
1503         struct vfsstatfs *sp;
1504         int error;
1505         struct nameidata nd;
1506         vfs_context_t ctxp = vfs_context_current();
1507         vnode_t vp;
1508
1509         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1510                 UIO_USERSPACE, uap->path, ctxp);
1511         error = namei(&nd);
1512         if (error)
1513                 return (error);
1514         vp = nd.ni_vp;
1515         mp = vp->v_mount;
1516         sp = &mp->mnt_vfsstat;
1517         nameidone(&nd);
1518
1519         error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
1520         vnode_put(vp);
1521         if (error != 0)
1522                 return (error);
1523
1524         error = statfs64_common(mp, sp, uap->buf);
1525
1526         return (error);
1527 }
1528
1529 /*
1530  * Get file system statistics in 64-bit mode
1531  */
1532 int
1533 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused register_t *retval)
1534 {
1535         struct vnode *vp;
1536         struct mount *mp;
1537         struct vfsstatfs *sp;
1538         int error;
1539
1540         AUDIT_ARG(fd, uap->fd);
1541
1542         if ( (error = file_vnode(uap->fd, &vp)) )
1543                 return (error);
1544
1545         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
1546
1547         mp = vp->v_mount;
1548         if (!mp) {
1549                 file_drop(uap->fd);
1550                 return (EBADF);
1551         }
1552         sp = &mp->mnt_vfsstat;
1553         if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
1554                 file_drop(uap->fd);
1555                 return (error);
1556         }
1557         file_drop(uap->fd);
1558
1559         error = statfs64_common(mp, sp, uap->buf);
1560
1561         return (error);
1562 }
1563
1564 struct getfsstat_struct {
1565         user_addr_t     sfsp;
1566         user_addr_t     *mp;
1567         int             count;
1568         int             maxcount;
1569         int             flags;
1570         int             error;
1571 };
1572
1573
1574 static int
1575 getfsstat_callback(mount_t mp, void * arg)
1576 {
1577
1578         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
1579         struct vfsstatfs *sp;
1580         int error, my_size;
1581         vfs_context_t ctx = vfs_context_current();
1582
1583         if (fstp->sfsp && fstp->count < fstp->maxcount) {
1584                 sp = &mp->mnt_vfsstat;
1585                 /*
1586                  * If MNT_NOWAIT is specified, do not refresh the
1587                  * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
1588                  */
1589                 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & MNT_WAIT)) &&
1590                         (error = vfs_update_vfsstat(mp, ctx,
1591                             VFS_USER_EVENT))) {
1592                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
1593                         return(VFS_RETURNED);
1594                 }
1595
1596                 /*
1597                  * Need to handle LP64 version of struct statfs
1598                  */
1599                 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
1600                 if (error) {
1601                         fstp->error = error;
1602                         return(VFS_RETURNED_DONE);
1603                 }
1604                 fstp->sfsp += my_size;
1605
1606                 if (fstp->mp) {
1607                         error = mac_mount_label_get(mp, *fstp->mp);
1608                         if (error) {
1609                                 fstp->error = error;
1610                                 return(VFS_RETURNED_DONE);
1611                         }
1612                         fstp->mp++;
1613                 }
1614         }
1615         fstp->count++;
1616         return(VFS_RETURNED);
1617 }
1618
1619 /*
1620  * Get statistics on all filesystems.
1621  */
1622 int
1623 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
1624 {
1625         struct __mac_getfsstat_args muap;
1626
1627         muap.buf = uap->buf;
1628         muap.bufsize = uap->bufsize;
1629         muap.mac = USER_ADDR_NULL;
1630         muap.macsize = 0;
1631         muap.flags = uap->flags;
1632
1633         return (__mac_getfsstat(p, &muap, retval));
1634 }
1635
1636 int
1637 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
1638 {
1639         user_addr_t sfsp;
1640         user_addr_t *mp;
1641         int count, maxcount;
1642         struct getfsstat_struct fst;
1643
1644         if (IS_64BIT_PROCESS(p)) {
1645                 maxcount = uap->bufsize / sizeof(struct user_statfs);
1646         }
1647         else {
1648                 maxcount = uap->bufsize / sizeof(struct statfs);
1649         }
1650         sfsp = uap->buf;
1651         count = 0;
1652
1653         mp = NULL;
1654
1655 #if CONFIG_MACF
1656         if (uap->mac != USER_ADDR_NULL) {
1657                 u_int32_t *mp0;
1658                 int error;
1659                 int i;
1660
1661                 count = (int)(uap->macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
1662                 if (count != maxcount)
1663                         return (EINVAL);
1664
1665                 /* Copy in the array */
1666                 MALLOC(mp0, u_int32_t *, uap->macsize, M_MACTEMP, M_WAITOK);
1667                 error = copyin(uap->mac, mp0, uap->macsize);
1668                 if (error)
1669                         return (error);
1670
1671                 /* Normalize to an array of user_addr_t */
1672                 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
1673                 for (i = 0; i < count; i++) {
1674                         if (IS_64BIT_PROCESS(p))
1675                                 mp[i] = ((user_addr_t *)mp0)[i];
1676                         else
1677                                 mp[i] = (user_addr_t)mp0[i];
1678                 }
1679                 FREE(mp0, M_MACTEMP);
1680         }
1681 #endif
1682
1683
1684         fst.sfsp = sfsp;
1685         fst.mp = mp;
1686         fst.flags = uap->flags;
1687         fst.count = 0;
1688         fst.error = 0;
1689         fst.maxcount = maxcount;
1690
1691
1692         vfs_iterate(0, getfsstat_callback, &fst);
1693
1694         if (mp)
1695                 FREE(mp, M_MACTEMP);
1696
1697         if (fst.error ) {
1698                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
1699                 return(fst.error);
1700         }
1701
1702         if (fst.sfsp && fst.count > fst.maxcount)
1703                 *retval = fst.maxcount;
1704         else
1705                 *retval = fst.count;
1706         return (0);
1707 }
1708
1709 static int
1710 getfsstat64_callback(mount_t mp, void * arg)
1711 {
1712         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
1713         struct vfsstatfs *sp;
1714         int error;
1715
1716         if (fstp->sfsp && fstp->count < fstp->maxcount) {
1717                 sp = &mp->mnt_vfsstat;
1718                 /*
1719                  * If MNT_NOWAIT is specified, do not refresh the
1720                  * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
1721                  */
1722                 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & MNT_WAIT)) &&
1723                     (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
1724                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
1725                         return(VFS_RETURNED);
1726                 }
1727
1728                 error = statfs64_common(mp, sp, fstp->sfsp);
1729                 if (error) {
1730                         fstp->error = error;
1731                         return(VFS_RETURNED_DONE);
1732                 }
1733                 fstp->sfsp += sizeof(struct statfs64);
1734         }
1735         fstp->count++;
1736         return(VFS_RETURNED);
1737 }
1738
1739 /*
1740  * Get statistics on all file systems in 64 bit mode.
1741  */
1742 int
1743 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
1744 {
1745         user_addr_t sfsp;
1746         int count, maxcount;
1747         struct getfsstat_struct fst;
1748
1749         maxcount = uap->bufsize / sizeof(struct statfs64);
1750
1751         sfsp = uap->buf;
1752         count = 0;
1753
1754         fst.sfsp = sfsp;
1755         fst.flags = uap->flags;
1756         fst.count = 0;
1757         fst.error = 0;
1758         fst.maxcount = maxcount;
1759
1760         vfs_iterate(0, getfsstat64_callback, &fst);
1761
1762         if (fst.error ) {
1763                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
1764                 return(fst.error);
1765         }
1766
1767         if (fst.sfsp && fst.count > fst.maxcount)
1768                 *retval = fst.maxcount;
1769         else
1770                 *retval = fst.count;
1771
1772         return (0);
1773 }
1774
1775 #if COMPAT_GETFSSTAT
1776 ogetfsstat(proc_t p, struct getfsstat_args *uap, register_t *retval)
1777 {
1778         return (ENOTSUP);
1779 }
1780 #endif
1781
1782 /*
1783  * Change current working directory to a given file descriptor.
1784  */
1785 /* ARGSUSED */
1786 static int
1787 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1788 {
1789         struct filedesc *fdp = p->p_fd;
1790         vnode_t vp;
1791         vnode_t tdp;
1792         vnode_t tvp;
1793         struct mount *mp;
1794         int error;
1795         vfs_context_t ctx = vfs_context_current();
1796
1797         if (per_thread && uap->fd == -1) {
1798                 /*
1799                  * Switching back from per-thread to per process CWD; verify we
1800                  * in fact have one before proceeding.  The only success case
1801                  * for this code path is to return 0 preemptively after zapping
1802                  * the thread structure contents.
1803                  */
1804                 thread_t th = vfs_context_thread(ctx);
1805                 if (th) {
1806                         uthread_t uth = get_bsdthread_info(th);
1807                         tvp = uth->uu_cdir;
1808                         uth->uu_cdir = NULLVP;
1809                         if (tvp != NULLVP) {
1810                                 vnode_rele(tvp);
1811                                 return (0);
1812                         }
1813                 }
1814                 return (EBADF);
1815         }
1816
1817         if ( (error = file_vnode(uap->fd, &vp)) )
1818                 return(error);
1819         if ( (error = vnode_getwithref(vp)) ) {
1820                 file_drop(uap->fd);
1821                 return(error);
1822         }
1823
1824         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1825
1826         if (vp->v_type != VDIR) {
1827                 error = ENOTDIR;
1828                 goto out;
1829         }
1830
1831 #if CONFIG_MACF
1832         error = mac_vnode_check_chdir(ctx, vp);
1833         if (error)
1834                 goto out;
1835 #endif
1836         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
1837         if (error)
1838                 goto out;
1839
1840         while (!error && (mp = vp->v_mountedhere) != NULL) {
1841                 if (vfs_busy(mp, LK_NOWAIT)) {
1842                         error = EACCES;
1843                         goto out;
1844                 }
1845                 error = VFS_ROOT(mp, &tdp, ctx);
1846                 vfs_unbusy(mp);
1847                 if (error)
1848                         break;
1849                 vnode_put(vp);
1850                 vp = tdp;
1851         }
1852         if (error)
1853                 goto out;
1854         if ( (error = vnode_ref(vp)) )
1855                 goto out;
1856         vnode_put(vp);
1857
1858         if (per_thread) {
1859                 thread_t th = vfs_context_thread(ctx);
1860                 if (th) {
1861                         uthread_t uth = get_bsdthread_info(th);
1862                         tvp = uth->uu_cdir;
1863                         uth->uu_cdir = vp;
1864                         OSBitOrAtomic(P_THCWD, (UInt32 *)&p->p_flag);
1865                 } else {
1866                         vnode_rele(vp);
1867                         return (ENOENT);
1868                 }
1869         } else {
1870                 proc_fdlock(p);
1871                 tvp = fdp->fd_cdir;
1872                 fdp->fd_cdir = vp;
1873                 proc_fdunlock(p);
1874         }
1875
1876         if (tvp)
1877                 vnode_rele(tvp);
1878         file_drop(uap->fd);
1879
1880         return (0);
1881 out:
1882         vnode_put(vp);
1883         file_drop(uap->fd);
1884
1885         return(error);
1886 }
1887
1888 int
1889 fchdir(proc_t p, struct fchdir_args *uap, __unused register_t *retval)
1890 {
1891         return common_fchdir(p, uap, 0);
1892 }
1893
1894 int
1895 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused register_t *retval)
1896 {
1897         return common_fchdir(p, (void *)uap, 1);
1898 }
1899
1900 /*
1901  * Change current working directory (``.'').
1902  *
1903  * Returns:     0                       Success
1904  *      change_dir:ENOTDIR
1905  *      change_dir:???
1906  *      vnode_ref:ENOENT                No such file or directory
1907  */
1908 /* ARGSUSED */
1909 static int
1910 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
1911 {
1912         struct filedesc *fdp = p->p_fd;
1913         int error;
1914         struct nameidata nd;
1915         vnode_t tvp;
1916         vfs_context_t ctx = vfs_context_current();
1917
1918         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1919                 UIO_USERSPACE, uap->path, ctx);
1920         error = change_dir(&nd, ctx);
1921         if (error)
1922                 return (error);
1923         if ( (error = vnode_ref(nd.ni_vp)) ) {
1924                 vnode_put(nd.ni_vp);
1925                 return (error);
1926         }
1927         /*
1928          * drop the iocount we picked up in change_dir
1929          */
1930         vnode_put(nd.ni_vp);
1931
1932         if (per_thread) {
1933                 thread_t th = vfs_context_thread(ctx);
1934                 if (th) {
1935                         uthread_t uth = get_bsdthread_info(th);
1936                         tvp = uth->uu_cdir;
1937                         uth->uu_cdir = nd.ni_vp;
1938                         OSBitOrAtomic(P_THCWD, (UInt32 *)&p->p_flag);
1939                 } else {
1940                         vnode_rele(nd.ni_vp);
1941                         return (ENOENT);
1942                 }
1943         } else {
1944                 proc_fdlock(p);
1945                 tvp = fdp->fd_cdir;
1946                 fdp->fd_cdir = nd.ni_vp;
1947                 proc_fdunlock(p);
1948         }
1949
1950         if (tvp)
1951                 vnode_rele(tvp);
1952
1953         return (0);
1954 }
1955
1956 int
1957 chdir(proc_t p, struct chdir_args *uap, __unused register_t *retval)
1958 {
1959         return common_chdir(p, (void *)uap, 0);
1960 }
1961
1962 int
1963 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused register_t *retval)
1964 {
1965         return common_chdir(p, (void *)uap, 1);
1966 }
1967
1968
1969 /*
1970  * Change notion of root (``/'') directory.
1971  */
1972 /* ARGSUSED */
1973 int
1974 chroot(proc_t p, struct chroot_args *uap, __unused register_t *retval)
1975 {
1976         struct filedesc *fdp = p->p_fd;
1977         int error;
1978         struct nameidata nd;
1979         vnode_t tvp;
1980         vfs_context_t ctx = vfs_context_current();
1981
1982         if ((error = suser(kauth_cred_get(), &p->p_acflag)))
1983                 return (error);
1984
1985         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1986                 UIO_USERSPACE, uap->path, ctx);
1987         error = change_dir(&nd, ctx);
1988         if (error)
1989                 return (error);
1990
1991 #if CONFIG_MACF
1992         error = mac_vnode_check_chroot(ctx, nd.ni_vp,
1993             &nd.ni_cnd);
1994         if (error) {
1995                 vnode_put(nd.ni_vp);
1996                 return (error);
1997         }
1998 #endif
1999
2000         if ( (error = vnode_ref(nd.ni_vp)) ) {
2001                 vnode_put(nd.ni_vp);
2002                 return (error);
2003         }
2004         vnode_put(nd.ni_vp);
2005
2006         proc_fdlock(p);
2007         tvp = fdp->fd_rdir;
2008         fdp->fd_rdir = nd.ni_vp;
2009         fdp->fd_flags |= FD_CHROOT;
2010         proc_fdunlock(p);
2011
2012         if (tvp != NULL)
2013                 vnode_rele(tvp);
2014
2015         return (0);
2016 }
2017
2018 /*
2019  * Common routine for chroot and chdir.
2020  *
2021  * Returns:     0                       Success
2022  *              ENOTDIR                 Not a directory
2023  *              namei:???               [anything namei can return]
2024  *              vnode_authorize:???     [anything vnode_authorize can return]
2025  */
2026 static int
2027 change_dir(struct nameidata *ndp, vfs_context_t ctx)
2028 {
2029         vnode_t vp;
2030         int error;
2031
2032         if ((error = namei(ndp)))
2033                 return (error);
2034         nameidone(ndp);
2035         vp = ndp->ni_vp;
2036
2037         if (vp->v_type != VDIR) {
2038                 vnode_put(vp);
2039                 return (ENOTDIR);
2040         }
2041
2042 #if CONFIG_MACF
2043         error = mac_vnode_check_chdir(ctx, vp);
2044         if (error) {
2045                 vnode_put(vp);
2046                 return (error);
2047         }
2048 #endif
2049
2050         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2051         if (error) {
2052                 vnode_put(vp);
2053                 return (error);
2054         }
2055
2056         return (error);
2057 }
2058
2059 /*
2060  * Check permissions, allocate an open file structure,
2061  * and call the device open routine if any.
2062  *
2063  * Returns:     0                       Success
2064  *              EINVAL
2065  *              EINTR
2066  *      falloc:ENFILE
2067  *      falloc:EMFILE
2068  *      falloc:ENOMEM
2069  *      vn_open_auth:???
2070  *      dupfdopen:???
2071  *      VNOP_ADVLOCK:???
2072  *      vnode_setsize:???
2073  */
2074 #warning XXX implement uid, gid
2075 int
2076 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, register_t *retval)
2077 {
2078         proc_t p = vfs_context_proc(ctx);
2079         uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2080         struct filedesc *fdp = p->p_fd;
2081         struct fileproc *fp;
2082         vnode_t vp;
2083         int flags, oflags;
2084         struct fileproc *nfp;
2085         int type, indx, error;
2086         struct flock lf;
2087         int no_controlling_tty = 0;
2088         int deny_controlling_tty = 0;
2089         struct session *sessp = SESSION_NULL;
2090         struct vfs_context context = *vfs_context_current();    /* local copy */
2091
2092         oflags = uflags;
2093
2094         if ((oflags & O_ACCMODE) == O_ACCMODE)
2095                 return(EINVAL);
2096         flags = FFLAGS(uflags);
2097
2098         AUDIT_ARG(fflags, oflags);
2099         AUDIT_ARG(mode, vap->va_mode);
2100
2101         if ( (error = falloc(p, &nfp, &indx, ctx)) ) {
2102                 return (error);
2103         }
2104         fp = nfp;
2105         uu->uu_dupfd = -indx - 1;
2106
2107         if (!(p->p_flag & P_CONTROLT)) {
2108                 sessp = proc_session(p);
2109                 no_controlling_tty = 1;
2110                 /*
2111                  * If conditions would warrant getting a controlling tty if
2112                  * the device being opened is a tty (see ttyopen in tty.c),
2113                  * but the open flags deny it, set a flag in the session to
2114                  * prevent it.
2115                  */
2116                 if (SESS_LEADER(p, sessp) &&
2117                     sessp->s_ttyvp == NULL &&
2118                     (flags & O_NOCTTY)) {
2119                         session_lock(sessp);
2120                         sessp->s_flags |= S_NOCTTY;
2121                         session_unlock(sessp);
2122                         deny_controlling_tty = 1;
2123                 }
2124         }
2125
2126         if ((error = vn_open_auth(ndp, &flags, vap))) {
2127                 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){        /* XXX from fdopen */
2128                         if ((error = dupfdopen(fdp, indx, uu->uu_dupfd, flags, error)) == 0) {
2129                                 fp_drop(p, indx, NULL, 0);
2130                                 *retval = indx;
2131                                 if (deny_controlling_tty) {
2132                                         session_lock(sessp);
2133                                         sessp->s_flags &= ~S_NOCTTY;
2134                                         session_unlock(sessp);
2135                                 }
2136                                 if (sessp != SESSION_NULL)
2137                                         session_rele(sessp);
2138                                 return (0);
2139                         }
2140                 }
2141                 if (error == ERESTART)
2142                         error = EINTR;
2143                 fp_free(p, indx, fp);
2144
2145                 if (deny_controlling_tty) {
2146                         session_lock(sessp);
2147                         sessp->s_flags &= ~S_NOCTTY;
2148                         session_unlock(sessp);
2149                 }
2150                 if (sessp != SESSION_NULL)
2151                         session_rele(sessp);
2152                 return (error);
2153         }
2154         uu->uu_dupfd = 0;
2155         vp = ndp->ni_vp;
2156
2157         fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
2158         fp->f_fglob->fg_type = DTYPE_VNODE;
2159         fp->f_fglob->fg_ops = &vnops;
2160         fp->f_fglob->fg_data = (caddr_t)vp;
2161
2162         if (flags & (O_EXLOCK | O_SHLOCK)) {
2163                 lf.l_whence = SEEK_SET;
2164                 lf.l_start = 0;
2165                 lf.l_len = 0;
2166                 if (flags & O_EXLOCK)
2167                         lf.l_type = F_WRLCK;
2168                 else
2169                         lf.l_type = F_RDLCK;
2170                 type = F_FLOCK;
2171                 if ((flags & FNONBLOCK) == 0)
2172                         type |= F_WAIT;
2173 #if CONFIG_MACF
2174                 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
2175                     F_SETLK, &lf);
2176                 if (error)
2177                         goto bad;
2178 #endif
2179                 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx)))
2180                         goto bad;
2181                 fp->f_fglob->fg_flag |= FHASLOCK;
2182         }
2183
2184         /* try to truncate by setting the size attribute */
2185         if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
2186                 goto bad;
2187
2188         /*
2189          * If the open flags denied the acquisition of a controlling tty,
2190          * clear the flag in the session structure that prevented the lower
2191          * level code from assigning one.
2192          */
2193         if (deny_controlling_tty) {
2194                 session_lock(sessp);
2195                 sessp->s_flags &= ~S_NOCTTY;
2196                 session_unlock(sessp);
2197         }
2198
2199         /*
2200          * If a controlling tty was set by the tty line discipline, then we
2201          * want to set the vp of the tty into the session structure.  We have
2202          * a race here because we can't get to the vp for the tp in ttyopen,
2203          * because it's not passed as a parameter in the open path.
2204          */
2205         if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
2206                 vnode_t ttyvp;
2207                 vnode_ref(vp);
2208                 session_lock(sessp);
2209                 ttyvp = sessp->s_ttyvp;
2210                 sessp->s_ttyvp = vp;
2211                 sessp->s_ttyvid = vnode_vid(vp);
2212                 session_unlock(sessp);
2213                 if (ttyvp != NULLVP)
2214                         vnode_rele(ttyvp);
2215         }
2216
2217         vnode_put(vp);
2218
2219         proc_fdlock(p);
2220         procfdtbl_releasefd(p, indx, NULL);
2221         fp_drop(p, indx, fp, 1);
2222         proc_fdunlock(p);
2223
2224         *retval = indx;
2225
2226         if (sessp != SESSION_NULL)
2227                 session_rele(sessp);
2228         return (0);
2229 bad:
2230         if (deny_controlling_tty) {
2231                 session_lock(sessp);
2232                 sessp->s_flags &= ~S_NOCTTY;
2233                 session_unlock(sessp);
2234         }
2235         if (sessp != SESSION_NULL)
2236                 session_rele(sessp);
2237
2238         /* Modify local copy (to not damage thread copy) */
2239         context.vc_ucred = fp->f_fglob->fg_cred;
2240
2241         vn_close(vp, fp->f_fglob->fg_flag, &context);
2242         vnode_put(vp);
2243         fp_free(p, indx, fp);
2244
2245         return (error);
2246
2247 }
2248
2249 /*
2250  * An open system call using an extended argument list compared to the regular
2251  * system call 'open'.
2252  *
2253  * Parameters:  p                       Process requesting the open
2254  *              uap                     User argument descriptor (see below)
2255  *              retval                  Pointer to an area to receive the
2256  *                                      return calue from the system call
2257  *
2258  * Indirect:    uap->path               Path to open (same as 'open')
2259  *              uap->flags              Flags to open (same as 'open'
2260  *              uap->uid                UID to set, if creating
2261  *              uap->gid                GID to set, if creating
2262  *              uap->mode               File mode, if creating (same as 'open')
2263  *              uap->xsecurity          ACL to set, if creating
2264  *
2265  * Returns:     0                       Success
2266  *              !0                      errno value
2267  *
2268  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
2269  *
2270  * XXX:         We should enummerate the possible errno values here, and where
2271  *              in the code they originated.
2272  */
2273 int
2274 open_extended(proc_t p, struct open_extended_args *uap, register_t *retval)
2275 {
2276         struct filedesc *fdp = p->p_fd;
2277         int ciferror;
2278         kauth_filesec_t xsecdst;
2279         struct vnode_attr va;
2280         struct nameidata nd;
2281         int cmode;
2282
2283         xsecdst = NULL;
2284         if ((uap->xsecurity != USER_ADDR_NULL) &&
2285             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
2286                 return ciferror;
2287
2288         VATTR_INIT(&va);
2289         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2290         VATTR_SET(&va, va_mode, cmode);
2291         if (uap->uid != KAUTH_UID_NONE)
2292                 VATTR_SET(&va, va_uid, uap->uid);
2293         if (uap->gid != KAUTH_GID_NONE)
2294                 VATTR_SET(&va, va_gid, uap->gid);
2295         if (xsecdst != NULL)
2296                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
2297
2298         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2299
2300         ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, retval);
2301         if (xsecdst != NULL)
2302                 kauth_filesec_free(xsecdst);
2303
2304         return ciferror;
2305 }
2306
2307 int
2308 open(proc_t p, struct open_args *uap, register_t *retval)
2309 {
2310         __pthread_testcancel(1);
2311         return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
2312 }
2313
2314
2315 int
2316 open_nocancel(proc_t p, struct open_nocancel_args *uap, register_t *retval)
2317 {
2318         struct filedesc *fdp = p->p_fd;
2319         struct vnode_attr va;
2320         struct nameidata nd;
2321         int cmode;
2322
2323         VATTR_INIT(&va);
2324         /* Mask off all but regular access permissions */
2325         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2326         VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
2327
2328         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2329
2330         return(open1(vfs_context_current(), &nd, uap->flags, &va, retval));
2331 }
2332
2333
2334 /*
2335  * Create a special file.
2336  */
2337 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
2338
2339 int
2340 mknod(proc_t p, struct mknod_args *uap, __unused register_t *retval)
2341 {
2342         struct vnode_attr va;
2343         vfs_context_t ctx = vfs_context_current();
2344         int error;
2345         int whiteout = 0;
2346         struct nameidata nd;
2347         vnode_t vp, dvp;
2348
2349         VATTR_INIT(&va);
2350         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2351         VATTR_SET(&va, va_rdev, uap->dev);
2352
2353         /* If it's a mknod() of a FIFO, call mkfifo1() instead */
2354         if ((uap->mode & S_IFMT) == S_IFIFO)
2355                 return(mkfifo1(ctx, uap->path, &va));
2356
2357         AUDIT_ARG(mode, uap->mode);
2358         AUDIT_ARG(dev, uap->dev);
2359
2360         if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
2361                 return (error);
2362         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2363                 UIO_USERSPACE, uap->path, ctx);
2364         error = namei(&nd);
2365         if (error)
2366                 return (error);
2367         dvp = nd.ni_dvp;
2368         vp = nd.ni_vp;
2369
2370         if (vp != NULL) {
2371                 error = EEXIST;
2372                 goto out;
2373         }
2374
2375         switch (uap->mode & S_IFMT) {
2376         case S_IFMT:    /* used by badsect to flag bad sectors */
2377                 VATTR_SET(&va, va_type, VBAD);
2378                 break;
2379         case S_IFCHR:
2380                 VATTR_SET(&va, va_type, VCHR);
2381                 break;
2382         case S_IFBLK:
2383                 VATTR_SET(&va, va_type, VBLK);
2384                 break;
2385         case S_IFWHT:
2386                 whiteout = 1;
2387                 break;
2388         default:
2389                 error = EINVAL;
2390                 goto out;
2391         }
2392
2393 #if CONFIG_MACF
2394         if (!whiteout) {
2395                 error = mac_vnode_check_create(ctx,
2396                     nd.ni_dvp, &nd.ni_cnd, &va);
2397                 if (error)
2398                         goto out;
2399         }
2400 #endif
2401
2402         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2403                 goto out;
2404
2405         if (whiteout) {
2406                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE, ctx);
2407         } else {
2408                 error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, ctx);
2409         }
2410         if (error)
2411                 goto out;
2412
2413         if (vp) {
2414                 int     update_flags = 0;
2415
2416                 // Make sure the name & parent pointers are hooked up
2417                 if (vp->v_name == NULL)
2418                         update_flags |= VNODE_UPDATE_NAME;
2419                 if (vp->v_parent == NULLVP)
2420                         update_flags |= VNODE_UPDATE_PARENT;
2421
2422                 if (update_flags)
2423                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
2424
2425 #if CONFIG_FSE
2426                 add_fsevent(FSE_CREATE_FILE, ctx,
2427                     FSE_ARG_VNODE, vp,
2428                     FSE_ARG_DONE);
2429 #endif
2430         }
2431
2432 out:
2433         /*
2434          * nameidone has to happen before we vnode_put(dvp)
2435          * since it may need to release the fs_nodelock on the dvp
2436          */
2437         nameidone(&nd);
2438
2439         if (vp)
2440                 vnode_put(vp);
2441         vnode_put(dvp);
2442
2443         return (error);
2444 }
2445
2446 /*
2447  * Create a named pipe.
2448  *
2449  * Returns:     0                       Success
2450  *              EEXIST
2451  *      namei:???
2452  *      vnode_authorize:???
2453  *      vn_create:???
2454  */
2455 static int
2456 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
2457 {
2458         vnode_t vp, dvp;
2459         int error;
2460         struct nameidata nd;
2461
2462         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2463                 UIO_USERSPACE, upath, ctx);
2464         error = namei(&nd);
2465         if (error)
2466                 return (error);
2467         dvp = nd.ni_dvp;
2468         vp = nd.ni_vp;
2469
2470         /* check that this is a new file and authorize addition */
2471         if (vp != NULL) {
2472                 error = EEXIST;
2473                 goto out;
2474         }
2475         VATTR_SET(vap, va_type, VFIFO);
2476
2477 #if CONFIG_MACF
2478         error = mac_vnode_check_create(ctx, nd.ni_dvp,
2479             &nd.ni_cnd, vap);
2480         if (error)
2481                 goto out;
2482 #endif
2483
2484
2485         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2486                 goto out;
2487
2488
2489         error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx);
2490 out:
2491         /*
2492          * nameidone has to happen before we vnode_put(dvp)
2493          * since it may need to release the fs_nodelock on the dvp
2494          */
2495         nameidone(&nd);
2496
2497         if (vp)
2498                 vnode_put(vp);
2499         vnode_put(dvp);
2500
2501         return error;
2502 }
2503
2504
2505 /*
2506  * A mkfifo system call using an extended argument list compared to the regular
2507  * system call 'mkfifo'.
2508  *
2509  * Parameters:  p                       Process requesting the open
2510  *              uap                     User argument descriptor (see below)
2511  *              retval                  (Ignored)
2512  *
2513  * Indirect:    uap->path               Path to fifo (same as 'mkfifo')
2514  *              uap->uid                UID to set
2515  *              uap->gid                GID to set
2516  *              uap->mode               File mode to set (same as 'mkfifo')
2517  *              uap->xsecurity          ACL to set, if creating
2518  *
2519  * Returns:     0                       Success
2520  *              !0                      errno value
2521  *
2522  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
2523  *
2524  * XXX:         We should enummerate the possible errno values here, and where
2525  *              in the code they originated.
2526  */
2527 int
2528 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused register_t *retval)
2529 {
2530         int ciferror;
2531         kauth_filesec_t xsecdst;
2532         struct vnode_attr va;
2533
2534         xsecdst = KAUTH_FILESEC_NONE;
2535         if (uap->xsecurity != USER_ADDR_NULL) {
2536                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
2537                         return ciferror;
2538         }
2539
2540         VATTR_INIT(&va);
2541         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2542         if (uap->uid != KAUTH_UID_NONE)
2543                 VATTR_SET(&va, va_uid, uap->uid);
2544         if (uap->gid != KAUTH_GID_NONE)
2545                 VATTR_SET(&va, va_gid, uap->gid);
2546         if (xsecdst != KAUTH_FILESEC_NONE)
2547                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
2548
2549         ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
2550
2551         if (xsecdst != KAUTH_FILESEC_NONE)
2552                 kauth_filesec_free(xsecdst);
2553         return ciferror;
2554 }
2555
2556 /* ARGSUSED */
2557 int
2558 mkfifo(proc_t p, struct mkfifo_args *uap, __unused register_t *retval)
2559 {
2560         struct vnode_attr va;
2561
2562         VATTR_INIT(&va);
2563         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2564
2565         return(mkfifo1(vfs_context_current(), uap->path, &va));
2566 }
2567
2568 /*
2569  * Make a hard file link.
2570  *
2571  * Returns:     0                       Success
2572  *              EPERM
2573  *              EEXIST
2574  *              EXDEV
2575  *      namei:???
2576  *      vnode_authorize:???
2577  *      VNOP_LINK:???
2578  */
2579 /* ARGSUSED */
2580 int
2581 link(__unused proc_t p, struct link_args *uap, __unused register_t *retval)
2582 {
2583         vnode_t vp, dvp, lvp;
2584         struct nameidata nd;
2585         vfs_context_t ctx = vfs_context_current();
2586         int error;
2587         fse_info finfo;
2588         int need_event, has_listeners;
2589         char *target_path = NULL;
2590
2591         vp = dvp = lvp = NULLVP;
2592
2593         /* look up the object we are linking to */
2594         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
2595                 UIO_USERSPACE, uap->path, ctx);
2596         error = namei(&nd);
2597         if (error)
2598                 return (error);
2599         vp = nd.ni_vp;
2600
2601         nameidone(&nd);
2602
2603         /*
2604          * Normally, linking to directories is not supported.
2605          * However, some file systems may have limited support.
2606          */
2607         if (vp->v_type == VDIR) {
2608                 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
2609                         error = EPERM;   /* POSIX */
2610                         goto out;
2611                 }
2612                 /* Linking to a directory requires ownership. */
2613                 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
2614                         struct vnode_attr dva;
2615
2616                         VATTR_INIT(&dva);
2617                         VATTR_WANTED(&dva, va_uid);
2618                         if (vnode_getattr(vp, &dva, ctx) != 0 ||
2619                             !VATTR_IS_SUPPORTED(&dva, va_uid) ||
2620                             (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
2621                                 error = EACCES;
2622                                 goto out;
2623                         }
2624                 }
2625         }
2626
2627         /* lookup the target node */
2628         nd.ni_cnd.cn_nameiop = CREATE;
2629         nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
2630         nd.ni_dirp = uap->link;
2631         error = namei(&nd);
2632         if (error != 0)
2633                 goto out;
2634         dvp = nd.ni_dvp;
2635         lvp = nd.ni_vp;
2636
2637 #if CONFIG_MACF
2638         if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
2639                 goto out2;
2640 #endif
2641
2642         /* or to anything that kauth doesn't want us to (eg. immutable items) */
2643         if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
2644                 goto out2;
2645
2646         /* target node must not exist */
2647         if (lvp != NULLVP) {
2648                 error = EEXIST;
2649                 goto out2;
2650         }
2651         /* cannot link across mountpoints */
2652         if (vnode_mount(vp) != vnode_mount(dvp)) {
2653                 error = EXDEV;
2654                 goto out2;
2655         }
2656
2657         /* authorize creation of the target note */
2658         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2659                 goto out2;
2660
2661         /* and finally make the link */
2662         error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
2663         if (error)
2664                 goto out2;
2665
2666 #if CONFIG_FSE
2667         need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2668 #else
2669         need_event = 0;
2670 #endif
2671         has_listeners = kauth_authorize_fileop_has_listeners();
2672
2673         if (need_event || has_listeners) {
2674                 char *link_to_path = NULL;
2675                 int len, link_name_len;
2676
2677                 /* build the path to the new link file */
2678                 GET_PATH(target_path);
2679                 if (target_path == NULL) {
2680                         error = ENOMEM;
2681                         goto out2;
2682                 }
2683
2684                 len = MAXPATHLEN;
2685                 vn_getpath(dvp, target_path, &len);
2686                 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
2687                     target_path[len-1] = '/';
2688                     strlcpy(&target_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
2689                     len += nd.ni_cnd.cn_namelen;
2690                 }
2691
2692                 if (has_listeners) {
2693                         /* build the path to file we are linking to */
2694                         GET_PATH(link_to_path);
2695                         if (link_to_path == NULL) {
2696                                 error = ENOMEM;
2697                                 goto out2;
2698                         }
2699
2700                         link_name_len = MAXPATHLEN;
2701                         vn_getpath(vp, link_to_path, &link_name_len);
2702
2703                         /*
2704                          * Call out to allow 3rd party notification of rename.
2705                          * Ignore result of kauth_authorize_fileop call.
2706                          */
2707                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
2708                                                (uintptr_t)link_to_path, (uintptr_t)target_path);
2709                         if (link_to_path != NULL) {
2710                                 RELEASE_PATH(link_to_path);
2711                         }
2712                 }
2713 #if CONFIG_FSE
2714                 if (need_event) {
2715                         /* construct fsevent */
2716                         if (get_fse_info(vp, &finfo, ctx) == 0) {
2717                                 // build the path to the destination of the link
2718                                 add_fsevent(FSE_CREATE_FILE, ctx,
2719                                             FSE_ARG_STRING, len, target_path,
2720                                             FSE_ARG_FINFO, &finfo,
2721                                             FSE_ARG_DONE);
2722                         }
2723                 }
2724 #endif
2725         }
2726 out2:
2727         /*
2728          * nameidone has to happen before we vnode_put(dvp)
2729          * since it may need to release the fs_nodelock on the dvp
2730          */
2731         nameidone(&nd);
2732         if (target_path != NULL) {
2733                 RELEASE_PATH(target_path);
2734         }
2735 out:
2736         if (lvp)
2737                 vnode_put(lvp);
2738         if (dvp)
2739                 vnode_put(dvp);
2740         vnode_put(vp);
2741         return (error);
2742 }
2743
2744 /*
2745  * Make a symbolic link.
2746  *
2747  * We could add support for ACLs here too...
2748  */
2749 /* ARGSUSED */
2750 int
2751 symlink(proc_t p, struct symlink_args *uap, __unused register_t *retval)
2752 {
2753         struct vnode_attr va;
2754         char *path;
2755         int error;
2756         struct nameidata nd;
2757         vfs_context_t ctx = vfs_context_current();
2758         vnode_t vp, dvp;
2759         size_t dummy=0;
2760
2761         MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
2762         error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
2763         if (error)
2764                 goto out;
2765         AUDIT_ARG(text, path);  /* This is the link string */
2766
2767         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2768                 UIO_USERSPACE, uap->link, ctx);
2769         error = namei(&nd);
2770         if (error)
2771                 goto out;
2772         dvp = nd.ni_dvp;
2773         vp = nd.ni_vp;
2774
2775         VATTR_INIT(&va);
2776         VATTR_SET(&va, va_type, VLNK);
2777         VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
2778 #if CONFIG_MACF
2779         error = mac_vnode_check_create(ctx,
2780                         dvp, &nd.ni_cnd, &va);
2781 #endif
2782         if (error != 0) {
2783             goto skipit;
2784         }
2785
2786         if (vp != NULL) {
2787             error = EEXIST;
2788             goto skipit;
2789         }
2790
2791         /* authorize */
2792         if (error == 0)
2793                 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
2794         /* get default ownership, etc. */
2795         if (error == 0)
2796                 error = vnode_authattr_new(dvp, &va, 0, ctx);
2797         if (error == 0)
2798                 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
2799
2800         /* do fallback attribute handling */
2801         if (error == 0)
2802                 error = vnode_setattr_fallback(vp, &va, ctx);
2803
2804         if (error == 0) {
2805                 int     update_flags = 0;
2806
2807                 if (vp == NULL) {
2808                         nd.ni_cnd.cn_nameiop = LOOKUP;
2809                         nd.ni_cnd.cn_flags = 0;
2810                         error = namei(&nd);
2811                         vp = nd.ni_vp;
2812
2813                         if (vp == NULL)
2814                                 goto skipit;
2815                 }
2816
2817 #if 0  /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
2818                 /* call out to allow 3rd party notification of rename.
2819                  * Ignore result of kauth_authorize_fileop call.
2820                  */
2821                 if (kauth_authorize_fileop_has_listeners() &&
2822                     namei(&nd) == 0) {
2823                         char *new_link_path = NULL;
2824                         int             len;
2825
2826                         /* build the path to the new link file */
2827                         new_link_path = get_pathbuff();
2828                         len = MAXPATHLEN;
2829                         vn_getpath(dvp, new_link_path, &len);
2830                         if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
2831                                 new_link_path[len - 1] = '/';
2832                                 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
2833                         }
2834
2835                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
2836                                            (uintptr_t)path, (uintptr_t)new_link_path);
2837                         if (new_link_path != NULL)
2838                                 release_pathbuff(new_link_path);
2839                 }
2840 #endif
2841                 // Make sure the name & parent pointers are hooked up
2842                 if (vp->v_name == NULL)
2843                         update_flags |= VNODE_UPDATE_NAME;
2844                 if (vp->v_parent == NULLVP)
2845                         update_flags |= VNODE_UPDATE_PARENT;
2846
2847                 if (update_flags)
2848                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
2849
2850 #if CONFIG_FSE
2851                 add_fsevent(FSE_CREATE_FILE, ctx,
2852                             FSE_ARG_VNODE, vp,
2853                             FSE_ARG_DONE);
2854 #endif
2855         }
2856
2857 skipit:
2858         /*
2859          * nameidone has to happen before we vnode_put(dvp)
2860          * since it may need to release the fs_nodelock on the dvp
2861          */
2862         nameidone(&nd);
2863
2864         if (vp)
2865                 vnode_put(vp);
2866         vnode_put(dvp);
2867 out:
2868         FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
2869
2870         return (error);
2871 }
2872
2873 /*
2874  * Delete a whiteout from the filesystem.
2875  */
2876 /* ARGSUSED */
2877 #warning XXX authorization not implmented for whiteouts
2878 int
2879 undelete(__unused proc_t p, struct undelete_args *uap, __unused register_t *retval)
2880 {
2881         int error;
2882         struct nameidata nd;
2883         vfs_context_t ctx = vfs_context_current();
2884         vnode_t vp, dvp;
2885
2886         NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT|AUDITVNPATH1,
2887                 UIO_USERSPACE, uap->path, ctx);
2888         error = namei(&nd);
2889         if (error)
2890                 return (error);
2891         dvp = nd.ni_dvp;
2892         vp = nd.ni_vp;
2893
2894         if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2895                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
2896         } else
2897                 error = EEXIST;
2898
2899         /*
2900          * nameidone has to happen before we vnode_put(dvp)
2901          * since it may need to release the fs_nodelock on the dvp
2902          */
2903         nameidone(&nd);
2904
2905         if (vp)
2906                 vnode_put(vp);
2907         vnode_put(dvp);
2908
2909         return (error);
2910 }
2911
2912 /*
2913  * Delete a name from the filesystem.
2914  */
2915 /* ARGSUSED */
2916 int
2917 unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
2918 {
2919         vnode_t vp, dvp;
2920         int error;
2921         struct componentname *cnp;
2922         char  *path = NULL;
2923         int  len;
2924         fse_info  finfo;
2925         int flags = 0;
2926         int need_event = 0;
2927         int has_listeners = 0;
2928
2929         ndp->ni_cnd.cn_flags |= LOCKPARENT;
2930         cnp = &ndp->ni_cnd;
2931
2932         error = namei(ndp);
2933         if (error)
2934                 return (error);
2935         dvp = ndp->ni_dvp;
2936         vp = ndp->ni_vp;
2937
2938         /* With Carbon delete semantics, busy files cannot be deleted */
2939         if (nodelbusy) {
2940                 flags |= VNODE_REMOVE_NODELETEBUSY;
2941         }
2942
2943         /*
2944          * Normally, unlinking of directories is not supported.
2945          * However, some file systems may have limited support.
2946          */
2947         if ((vp->v_type == VDIR) &&
2948             !(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
2949                 error = EPERM;  /* POSIX */
2950         }
2951
2952         /*
2953          * The root of a mounted filesystem cannot be deleted.
2954          */
2955         if (vp->v_flag & VROOT) {
2956                 error = EBUSY;
2957         }
2958         if (error)
2959                 goto out;
2960
2961
2962         /* authorize the delete operation */
2963 #if CONFIG_MACF
2964         if (!error)
2965                 error = mac_vnode_check_unlink(ctx,
2966                     dvp, vp, cnp);
2967 #endif /* MAC */
2968         if (!error)
2969                 error = vnode_authorize(vp, ndp->ni_dvp, KAUTH_VNODE_DELETE, ctx);
2970         if (error)
2971                 goto out;
2972
2973 #if CONFIG_FSE
2974         need_event = need_fsevent(FSE_DELETE, dvp);
2975         if (need_event) {
2976                 if ((vp->v_flag & VISHARDLINK) == 0) {
2977                         get_fse_info(vp, &finfo, ctx);
2978                 }
2979         }
2980 #endif
2981         has_listeners = kauth_authorize_fileop_has_listeners();
2982         if (need_event || has_listeners) {
2983                 GET_PATH(path);
2984                 if (path == NULL) {
2985                         error = ENOMEM;
2986                         goto out;
2987                 }
2988                 len = MAXPATHLEN;
2989                 vn_getpath(vp, path, &len);
2990         }
2991
2992 #if NAMEDRSRCFORK
2993         if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
2994                 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
2995         else
2996 #endif
2997                 error = VNOP_REMOVE(dvp, vp, &ndp->ni_cnd, flags, ctx);
2998
2999         /*
3000          * Call out to allow 3rd party notification of delete.
3001          * Ignore result of kauth_authorize_fileop call.
3002          */
3003         if (!error) {
3004                 if (has_listeners) {
3005                         kauth_authorize_fileop(vfs_context_ucred(ctx),
3006                                 KAUTH_FILEOP_DELETE,
3007                                 (uintptr_t)vp,
3008                                 (uintptr_t)path);
3009                 }
3010
3011                 if (vp->v_flag & VISHARDLINK) {
3012                     //
3013                     // if a hardlink gets deleted we want to blow away the
3014                     // v_parent link because the path that got us to this
3015                     // instance of the link is no longer valid.  this will
3016                     // force the next call to get the path to ask the file
3017                     // system instead of just following the v_parent link.
3018                     //
3019                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
3020                 }
3021
3022 #if CONFIG_FSE
3023                 if (need_event) {
3024                         if (vp->v_flag & VISHARDLINK) {
3025                                 get_fse_info(vp, &finfo, ctx);
3026                         }
3027                         add_fsevent(FSE_DELETE, ctx,
3028                                                 FSE_ARG_STRING, len, path,
3029                                                 FSE_ARG_FINFO, &finfo,
3030                                                 FSE_ARG_DONE);
3031                 }
3032 #endif
3033         }
3034         if (path != NULL)
3035                 RELEASE_PATH(path);
3036
3037         /*
3038          * nameidone has to happen before we vnode_put(dvp)
3039          * since it may need to release the fs_nodelock on the dvp
3040          */
3041 out:
3042         nameidone(ndp);
3043         vnode_put(dvp);
3044         vnode_put(vp);
3045         return (error);
3046 }
3047
3048 /*
3049  * Delete a name from the filesystem using POSIX semantics.
3050  */
3051 int
3052 unlink(__unused proc_t p, struct unlink_args *uap, __unused register_t *retval)
3053 {
3054         struct nameidata nd;
3055         vfs_context_t ctx = vfs_context_current();
3056
3057         NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3058         return unlink1(ctx, &nd, 0);
3059 }
3060
3061 /*
3062  * Delete a name from the filesystem using Carbon semantics.
3063  */
3064 int
3065 delete(__unused proc_t p, struct delete_args *uap, __unused register_t *retval)
3066 {
3067         struct nameidata nd;
3068         vfs_context_t ctx = vfs_context_current();
3069
3070         NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3071         return unlink1(ctx, &nd, 1);
3072 }
3073
3074 /*
3075  * Reposition read/write file offset.
3076  */
3077 int
3078 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
3079 {
3080         struct fileproc *fp;
3081         vnode_t vp;
3082         struct vfs_context *ctx;
3083         off_t offset = uap->offset, file_size;
3084         int error;
3085
3086         if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
3087                 if (error == ENOTSUP)
3088                         return (ESPIPE);
3089                 return (error);
3090         }
3091         if (vnode_isfifo(vp)) {
3092                 file_drop(uap->fd);
3093                 return(ESPIPE);
3094         }
3095
3096
3097         ctx = vfs_context_current();
3098 #if CONFIG_MACF
3099         if (uap->whence == L_INCR && uap->offset == 0)
3100                 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
3101                     fp->f_fglob);
3102         else
3103                 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
3104                     fp->f_fglob);
3105         if (error) {
3106                 file_drop(uap->fd);
3107                 return (error);
3108         }
3109 #endif
3110         if ( (error = vnode_getwithref(vp)) ) {
3111                 file_drop(uap->fd);
3112                 return(error);
3113         }
3114
3115         switch (uap->whence) {
3116         case L_INCR:
3117                 offset += fp->f_fglob->fg_offset;
3118                 break;
3119         case L_XTND:
3120                 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
3121                         break;
3122                 offset += file_size;
3123                 break;
3124         case L_SET:
3125                 break;
3126         default:
3127                 error = EINVAL;
3128         }
3129         if (error == 0) {
3130                 if (uap->offset > 0 && offset < 0) {
3131                         /* Incremented/relative move past max size */
3132                         error = EOVERFLOW;
3133                 } else {
3134                         /*
3135                          * Allow negative offsets on character devices, per
3136                          * POSIX 1003.1-2001.  Most likely for writing disk
3137                          * labels.
3138                          */
3139                         if (offset < 0 && vp->v_type != VCHR) {
3140                                 /* Decremented/relative move before start */
3141                                 error = EINVAL;
3142                         } else {
3143                                 /* Success */
3144                                 fp->f_fglob->fg_offset = offset;
3145                                 *retval = fp->f_fglob->fg_offset;
3146                         }
3147                 }
3148         }
3149         (void)vnode_put(vp);
3150         file_drop(uap->fd);
3151         return (error);
3152 }
3153
3154
3155 /*
3156  * Check access permissions.
3157  *
3158  * Returns:     0                       Success
3159  *              vnode_authorize:???
3160  */
3161 static int
3162 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
3163 {
3164         kauth_action_t action;
3165         int error;
3166
3167         /*
3168          * If just the regular access bits, convert them to something
3169          * that vnode_authorize will understand.
3170          */
3171         if (!(uflags & _ACCESS_EXTENDED_MASK)) {
3172                 action = 0;
3173                 if (uflags & R_OK)
3174                         action |= KAUTH_VNODE_READ_DATA;        /* aka KAUTH_VNODE_LIST_DIRECTORY */
3175                 if (uflags & W_OK) {
3176                         if (vnode_isdir(vp)) {
3177                                 action |= KAUTH_VNODE_ADD_FILE |
3178                                     KAUTH_VNODE_ADD_SUBDIRECTORY;
3179                                 /* might want delete rights here too */
3180                         } else {
3181                                 action |= KAUTH_VNODE_WRITE_DATA;
3182                         }
3183                 }
3184                 if (uflags & X_OK) {
3185                         if (vnode_isdir(vp)) {
3186                                 action |= KAUTH_VNODE_SEARCH;
3187                         } else {
3188                                 action |= KAUTH_VNODE_EXECUTE;
3189                         }
3190                 }
3191         } else {
3192                 /* take advantage of definition of uflags */
3193                 action = uflags >> 8;
3194         }
3195
3196 #if CONFIG_MACF
3197         error = mac_vnode_check_access(ctx, vp, uflags);
3198         if (error)
3199                 return (error);
3200 #endif /* MAC */
3201
3202         /* action == 0 means only check for existence */
3203         if (action != 0) {
3204                 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
3205         } else {
3206                 error = 0;
3207         }
3208
3209         return(error);
3210 }
3211
3212
3213
3214 /*
3215  * access_extended
3216  *
3217  * Description: uap->entries                    Pointer to argument descriptor
3218  *              uap->size                       Size of the area pointed to by
3219  *                                              the descriptor
3220  *              uap->results                    Pointer to the results array
3221  *
3222  * Returns:     0                       Success
3223  *              ENOMEM                  Insufficient memory
3224  *              EINVAL                  Invalid arguments
3225  *              namei:EFAULT            Bad address
3226  *              namei:ENAMETOOLONG      Filename too long
3227  *              namei:ENOENT            No such file or directory
3228  *              namei:ELOOP             Too many levels of symbolic links
3229  *              namei:EBADF             Bad file descriptor
3230  *              namei:ENOTDIR           Not a directory
3231  *              namei:???
3232  *              access1:
3233  *
3234  * Implicit returns:
3235  *              uap->results            Array contents modified
3236  *
3237  * Notes:       The uap->entries are structured as an arbitrary length array
3238  *              of accessx descriptors, followed by one or more NULL terniated
3239  *              strings
3240  *
3241  *                      struct accessx_descriptor[0]
3242  *                      ...
3243  *                      struct accessx_descriptor[n]
3244  *                      char name_data[0];
3245  *
3246  *              We determine the entry count by walking the buffer containing
3247  *              the uap->entries argument descriptor.  For each descrptor we
3248  *              see, the valid values for the offset ad_name_offset will be
3249  *              in the byte range:
3250  *
3251  *                      [ uap->entries + sizeof(struct accessx_descriptor) ]
3252  *                                              to
3253  *                              [ uap->entries + uap->size - 2 ]
3254  *
3255  *              since we must have at least one string, and the string must
3256  *              be at least one character plus the NUL terminator in length.
3257  *
3258  * XXX:         Need to support the check-as uid argument
3259  */
3260 int
3261 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused register_t *retval)
3262 {
3263         struct accessx_descriptor *input = NULL;
3264         errno_t *result = NULL;
3265         errno_t error = 0;
3266         int wantdelete = 0;
3267         unsigned int desc_max, desc_actual, i, j;
3268         struct vfs_context context;
3269         struct nameidata nd;
3270         int niopts;
3271         vnode_t vp = NULL;
3272         vnode_t dvp = NULL;
3273 #define ACCESSX_MAX_DESCR_ON_STACK 10
3274         struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
3275
3276         context.vc_ucred = NULL;
3277
3278         /*
3279          * Validate parameters; if valid, copy the descriptor array and string
3280          * arguments into local memory.  Before proceeding, the following
3281          * conditions must have been met:
3282          *
3283          * o    The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
3284          * o    There must be sufficient room in the request for at least one
3285          *      descriptor and a one yte NUL terminated string.
3286          * o    The allocation of local storage must not fail.
3287          */
3288         if (uap->size > ACCESSX_MAX_TABLESIZE)
3289                 return(ENOMEM);
3290         if (uap->size < (sizeof(struct accessx_descriptor) + 2))
3291                 return(EINVAL);
3292         if (uap->size <= sizeof (stack_input)) {
3293                 input = stack_input;
3294         } else {
3295         MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
3296         if (input == NULL) {
3297                 error = ENOMEM;
3298                 goto out;
3299         }
3300         }
3301         error = copyin(uap->entries, input, uap->size);
3302         if (error)
3303                 goto out;
3304
3305         /*
3306          * Force NUL termination of the copyin buffer to avoid nami() running
3307          * off the end.  If the caller passes us bogus data, they may get a
3308          * bogus result.
3309          */
3310         ((char *)input)[uap->size - 1] = 0;
3311
3312         /*
3313          * Access is defined as checking against the process' real identity,
3314          * even if operations are checking the effective identity.  This
3315          * requires that we use a local vfs context.
3316          */
3317         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
3318         context.vc_thread = current_thread();
3319
3320         /*
3321          * Find out how many entries we have, so we can allocate the result
3322          * array by walking the list and adjusting the count downward by the
3323          * earliest string offset we see.
3324          */
3325         desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
3326         desc_actual = desc_max;
3327         for (i = 0; i < desc_actual; i++) {
3328                 /*
3329                  * Take the offset to the name string for this entry and
3330                  * convert to an input array index, which would be one off
3331                  * the end of the array if this entry was the lowest-addressed
3332                  * name string.
3333                  */
3334                 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
3335
3336                 /*
3337                  * An offset greater than the max allowable offset is an error.
3338                  * It is also an error for any valid entry to point
3339                  * to a location prior to the end of the current entry, if
3340                  * it's not a reference to the string of the previous entry.
3341                  */
3342                 if (j > desc_max || (j != 0 && j <= i)) {
3343                         error = EINVAL;
3344                         goto out;
3345                 }
3346
3347                 /*
3348                  * An offset of 0 means use the previous descriptor's offset;
3349                  * this is used to chain multiple requests for the same file
3350                  * to avoid multiple lookups.
3351                  */
3352                 if (j == 0) {
3353                         /* This is not valid for the first entry */
3354                         if (i == 0) {
3355                                 error = EINVAL;
3356                                 goto out;
3357                         }
3358                         continue;
3359                 }
3360
3361                 /*
3362                  * If the offset of the string for this descriptor is before
3363                  * what we believe is the current actual last descriptor,
3364                  * then we need to adjust our estimate downward; this permits
3365                  * the string table following the last descriptor to be out
3366                  * of order relative to the descriptor list.
3367                  */
3368                 if (j < desc_actual)
3369                         desc_actual = j;
3370         }
3371
3372         /*
3373          * We limit the actual number of descriptors we are willing to process
3374          * to a hard maximum of ACCESSX_MAX_DESCRIPTORS.  If the number being
3375          * requested does not exceed this limit,
3376          */
3377         if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
3378                 error = ENOMEM;
3379                 goto out;
3380         }
3381         MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
3382         if (result == NULL) {
3383                 error = ENOMEM;
3384                 goto out;
3385         }
3386
3387         /*
3388          * Do the work by iterating over the descriptor entries we know to
3389          * at least appear to contain valid data.
3390          */
3391         error = 0;
3392         for (i = 0; i < desc_actual; i++) {
3393                 /*
3394                  * If the ad_name_offset is 0, then we use the previous
3395                  * results to make the check; otherwise, we are looking up
3396                  * a new file name.
3397                  */
3398                 if (input[i].ad_name_offset != 0) {
3399                         /* discard old vnodes */
3400                         if (vp) {
3401                                 vnode_put(vp);
3402                                 vp = NULL;
3403                         }
3404                         if (dvp) {
3405                                 vnode_put(dvp);
3406                                 dvp = NULL;
3407                         }
3408
3409                         /*
3410                          * Scan forward in the descriptor list to see if we
3411                          * need the parent vnode.  We will need it if we are
3412                          * deleting, since we must have rights  to remove
3413                          * entries in the parent directory, as well as the
3414                          * rights to delete the object itself.
3415                          */
3416                         wantdelete = input[i].ad_flags & _DELETE_OK;
3417                         for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
3418                                 if (input[j].ad_flags & _DELETE_OK)
3419                                         wantdelete = 1;
3420
3421                         niopts = FOLLOW | AUDITVNPATH1;
3422
3423                         /* need parent for vnode_authorize for deletion test */
3424                         if (wantdelete)
3425                                 niopts |= WANTPARENT;
3426
3427                         /* do the lookup */
3428                         NDINIT(&nd, LOOKUP, niopts, UIO_SYSSPACE, CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset), &context);
3429                         error = namei(&nd);
3430                         if (!error) {
3431                                 vp = nd.ni_vp;
3432                                 if (wantdelete)
3433                                         dvp = nd.ni_dvp;
3434                         }
3435                         nameidone(&nd);
3436                 }
3437
3438                 /*
3439                  * Handle lookup errors.
3440                  */
3441                 switch(error) {
3442                 case ENOENT:
3443                 case EACCES:
3444                 case EPERM:
3445                 case ENOTDIR:
3446                         result[i] = error;
3447                         break;
3448                 case 0:
3449                         /* run this access check */
3450                         result[i] = access1(vp, dvp, input[i].ad_flags, &context);
3451                         break;
3452                 default:
3453                         /* fatal lookup error */
3454
3455                         goto out;
3456                 }
3457         }
3458
3459         /* copy out results */
3460         error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
3461
3462 out:
3463         if (input && input != stack_input)
3464                 FREE(input, M_TEMP);
3465         if (result)
3466                 FREE(result, M_TEMP);
3467         if (vp)
3468                 vnode_put(vp);
3469         if (dvp)
3470                 vnode_put(dvp);
3471         if (IS_VALID_CRED(context.vc_ucred))
3472                 kauth_cred_unref(&context.vc_ucred);
3473         return(error);
3474 }
3475
3476
3477 /*
3478  * Returns:     0                       Success
3479  *              namei:EFAULT            Bad address
3480  *              namei:ENAMETOOLONG      Filename too long
3481  *              namei:ENOENT            No such file or directory
3482  *              namei:ELOOP             Too many levels of symbolic links
3483  *              namei:EBADF             Bad file descriptor
3484  *              namei:ENOTDIR           Not a directory
3485  *              namei:???
3486  *              access1:
3487  */
3488 int
3489 access(__unused proc_t p, struct access_args *uap, __unused register_t *retval)
3490 {
3491         int error;
3492         struct nameidata nd;
3493         int niopts;
3494         struct vfs_context context;
3495
3496         /*
3497          * Access is defined as checking against the process'
3498          * real identity, even if operations are checking the
3499          * effective identity.  So we need to tweak the credential
3500          * in the context.
3501          */
3502         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
3503         context.vc_thread = current_thread();
3504
3505         niopts = FOLLOW | AUDITVNPATH1;
3506         /* need parent for vnode_authorize for deletion test */
3507         if (uap->flags & _DELETE_OK)
3508                 niopts |= WANTPARENT;
3509         NDINIT(&nd, LOOKUP, niopts, UIO_USERSPACE, uap->path, &context);
3510
3511 #if NAMEDRSRCFORK
3512         /* access(F_OK) calls are allowed for resource forks. */
3513         if (uap->flags == F_OK)
3514                 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3515 #endif
3516         error = namei(&nd);
3517         if (error)
3518                 goto out;
3519
3520         error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
3521
3522         vnode_put(nd.ni_vp);
3523         if (uap->flags & _DELETE_OK)
3524                 vnode_put(nd.ni_dvp);
3525         nameidone(&nd);
3526
3527 out:
3528         kauth_cred_unref(&context.vc_ucred);
3529         return(error);
3530 }
3531
3532
3533 /*
3534  * Returns:     0                       Success
3535  *              EFAULT
3536  *      copyout:EFAULT
3537  *      namei:???
3538  *      vn_stat:???
3539  */
3540 static int
3541 stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3542 {
3543         struct stat sb;
3544         struct stat64 sb64;
3545         struct user_stat user_sb;
3546         struct user_stat64 user_sb64;
3547         caddr_t sbp;
3548         int error, my_size;
3549         kauth_filesec_t fsec;
3550         size_t xsecurity_bufsize;
3551         void * statptr;
3552
3553 #if NAMEDRSRCFORK
3554         /* stat calls are allowed for resource forks. */
3555         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3556 #endif
3557         error = namei(ndp);
3558         if (error)
3559                 return (error);
3560         fsec = KAUTH_FILESEC_NONE;
3561         if (isstat64 != 0)
3562                 statptr  = (void *)&sb64;
3563         else
3564                 statptr  = (void *)&sb;
3565         error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
3566
3567 #if NAMEDRSRCFORK
3568         /* Clean up resource fork shadow file if needed. */
3569         if ((ndp->ni_vp->v_flag & VISNAMEDSTREAM) &&
3570             (ndp->ni_vp->v_parent != NULLVP) &&
3571             !(ndp->ni_vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS)) {
3572                 (void) vnode_relenamedstream(ndp->ni_vp->v_parent, ndp->ni_vp, ctx);
3573         }
3574 #endif
3575         vnode_put(ndp->ni_vp);
3576         nameidone(ndp);
3577
3578         if (error)
3579                 return (error);
3580         /* Zap spare fields */
3581         if (isstat64 != 0) {
3582                 sb64.st_lspare = 0;
3583                 sb64.st_qspare[0] = 0LL;
3584                 sb64.st_qspare[1] = 0LL;
3585                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
3586                         munge_stat64(&sb64, &user_sb64);
3587                         my_size = sizeof(user_sb64);
3588                         sbp = (caddr_t)&user_sb64;
3589                 } else {
3590                         my_size = sizeof(sb64);
3591                         sbp = (caddr_t)&sb64;
3592                 }
3593                 /*
3594                  * Check if we raced (post lookup) against the last unlink of a file.
3595                  */
3596                 if ((sb64.st_nlink == 0) && S_ISREG(sb64.st_mode)) {
3597                         sb64.st_nlink = 1;
3598                 }
3599         } else {
3600                 sb.st_lspare = 0;
3601                 sb.st_qspare[0] = 0LL;
3602                 sb.st_qspare[1] = 0LL;
3603                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
3604                         munge_stat(&sb, &user_sb);
3605                         my_size = sizeof(user_sb);
3606                         sbp = (caddr_t)&user_sb;
3607                 } else {
3608                         my_size = sizeof(sb);
3609                         sbp = (caddr_t)&sb;
3610                 }
3611
3612                 /*
3613                  * Check if we raced (post lookup) against the last unlink of a file.
3614                  */
3615                 if ((sb.st_nlink == 0) && S_ISREG(sb.st_mode)) {
3616                         sb.st_nlink = 1;
3617                 }
3618         }
3619         if ((error = copyout(sbp, ub, my_size)) != 0)
3620                 goto out;
3621
3622         /* caller wants extended security information? */
3623         if (xsecurity != USER_ADDR_NULL) {
3624
3625                 /* did we get any? */
3626                 if (fsec == KAUTH_FILESEC_NONE) {
3627                         if (susize(xsecurity_size, 0) != 0) {
3628                                 error = EFAULT;
3629                                 goto out;
3630                         }
3631                 } else {
3632                         /* find the user buffer size */
3633                         xsecurity_bufsize = fusize(xsecurity_size);
3634
3635                         /* copy out the actual data size */
3636                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
3637                                 error = EFAULT;
3638                                 goto out;
3639                         }
3640
3641                         /* if the caller supplied enough room, copy out to it */
3642                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
3643                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3644                 }
3645         }
3646 out:
3647         if (fsec != KAUTH_FILESEC_NONE)
3648                 kauth_filesec_free(fsec);
3649         return (error);
3650 }
3651
3652 /*
3653  * Get file status; this version follows links.
3654  *
3655  * Returns:     0                       Success
3656  *      stat2:???                       [see stat2() in this file]
3657  */
3658 static int
3659 stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3660 {
3661         struct nameidata nd;
3662         vfs_context_t ctx = vfs_context_current();
3663
3664         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
3665             UIO_USERSPACE, path, ctx);
3666         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
3667 }
3668
3669 int
3670 stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused register_t *retval)
3671 {
3672         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
3673 }
3674
3675 /*
3676  * Returns:     0                       Success
3677  *      stat1:???                       [see stat1() in this file]
3678  */
3679 int
3680 stat(__unused proc_t p, struct stat_args *uap, __unused register_t *retval)
3681 {
3682         return(stat1(uap->path, uap->ub, 0, 0, 0));
3683 }
3684
3685 int
3686 stat64(__unused proc_t p, struct stat64_args *uap, __unused register_t *retval)
3687 {
3688         return(stat1(uap->path, uap->ub, 0, 0, 1));
3689 }
3690
3691 int
3692 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused register_t *retval)
3693 {
3694         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
3695 }
3696 /*
3697  * Get file status; this version does not follow links.
3698  */
3699 static int
3700 lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3701 {
3702         struct nameidata nd;
3703         vfs_context_t ctx = vfs_context_current();
3704
3705         NDINIT(&nd, LOOKUP, NOTRIGGER | NOFOLLOW | AUDITVNPATH1,
3706             UIO_USERSPACE, path, ctx);
3707
3708         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
3709 }
3710
3711 int
3712 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused register_t *retval)
3713 {
3714         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
3715 }
3716
3717 int
3718 lstat(__unused proc_t p, struct lstat_args *uap, __unused register_t *retval)
3719 {
3720         return(lstat1(uap->path, uap->ub, 0, 0, 0));
3721 }
3722 int
3723 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused register_t *retval)
3724 {
3725         return(lstat1(uap->path, uap->ub, 0, 0, 1));
3726 }
3727
3728 int
3729 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused register_t *retval)
3730 {
3731         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
3732 }
3733
3734 /*
3735  * Get configurable pathname variables.
3736  *
3737  * Returns:     0                       Success
3738  *      namei:???
3739  *      vn_pathconf:???
3740  *
3741  * Notes:       Global implementation  constants are intended to be
3742  *              implemented in this function directly; all other constants
3743  *              are per-FS implementation, and therefore must be handled in
3744  *              each respective FS, instead.
3745  *
3746  * XXX We implement some things globally right now that should actually be
3747  * XXX per-FS; we will need to deal with this at some point.
3748  */
3749 /* ARGSUSED */
3750 int
3751 pathconf(__unused proc_t p, struct pathconf_args *uap, register_t *retval)
3752 {
3753         int error;
3754         struct nameidata nd;
3755         vfs_context_t ctx = vfs_context_current();
3756
3757         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
3758                 UIO_USERSPACE, uap->path, ctx);
3759         error = namei(&nd);
3760         if (error)
3761                 return (error);
3762
3763         error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
3764
3765         vnode_put(nd.ni_vp);
3766         nameidone(&nd);
3767         return (error);
3768 }
3769
3770 /*
3771  * Return target name of a symbolic link.
3772  */
3773 /* ARGSUSED */
3774 int
3775 readlink(proc_t p, struct readlink_args *uap, register_t *retval)
3776 {
3777         vnode_t vp;
3778         uio_t auio;
3779         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3780         int error;
3781         struct nameidata nd;
3782         vfs_context_t ctx = vfs_context_current();
3783         char uio_buf[ UIO_SIZEOF(1) ];
3784
3785         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNPATH1,
3786                 UIO_USERSPACE, uap->path, ctx);
3787         error = namei(&nd);
3788         if (error)
3789                 return (error);
3790         vp = nd.ni_vp;
3791
3792         nameidone(&nd);
3793
3794         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
3795                                                                   &uio_buf[0], sizeof(uio_buf));
3796         uio_addiov(auio, uap->buf, uap->count);
3797         if (vp->v_type != VLNK)
3798                 error = EINVAL;
3799         else {
3800 #if CONFIG_MACF
3801                 error = mac_vnode_check_readlink(ctx,
3802                     vp);
3803 #endif
3804                 if (error == 0)
3805                         error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
3806                 if (error == 0)
3807                         error = VNOP_READLINK(vp, auio, ctx);
3808         }
3809         vnode_put(vp);
3810         // LP64todo - fix this
3811         *retval = uap->count - (int)uio_resid(auio);
3812         return (error);
3813 }
3814
3815 /*
3816  * Change file flags.
3817  */
3818 static int
3819 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
3820 {
3821         struct vnode_attr va;
3822         kauth_action_t action;
3823         int error;
3824
3825         VATTR_INIT(&va);
3826         VATTR_SET(&va, va_flags, flags);
3827
3828 #if CONFIG_MACF
3829         error = mac_vnode_check_setflags(ctx, vp, flags);
3830         if (error)
3831                 goto out;
3832 #endif
3833
3834         /* request authorisation, disregard immutability */
3835         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
3836                 goto out;
3837         /*
3838          * Request that the auth layer disregard those file flags it's allowed to when
3839          * authorizing this operation; we need to do this in order to be able to
3840          * clear immutable flags.
3841          */
3842         if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
3843                 goto out;
3844         error = vnode_setattr(vp, &va, ctx);
3845
3846         if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
3847                 error = ENOTSUP;
3848         }
3849 out:
3850         vnode_put(vp);
3851         return(error);
3852 }
3853
3854 /*
3855  * Change flags of a file given a path name.
3856  */
3857 /* ARGSUSED */
3858 int
3859 chflags(__unused proc_t p, struct chflags_args *uap, __unused register_t *retval)
3860 {
3861         vnode_t vp;
3862         vfs_context_t ctx = vfs_context_current();
3863         int error;
3864         struct nameidata nd;
3865
3866         AUDIT_ARG(fflags, uap->flags);
3867         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
3868                 UIO_USERSPACE, uap->path, ctx);
3869         error = namei(&nd);
3870         if (error)
3871                 return (error);
3872         vp = nd.ni_vp;
3873         nameidone(&nd);
3874
3875         error = chflags1(vp, uap->flags, ctx);
3876
3877         return(error);
3878 }
3879
3880 /*
3881  * Change flags of a file given a file descriptor.
3882  */
3883 /* ARGSUSED */
3884 int
3885 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused register_t *retval)
3886 {
3887         vnode_t vp;
3888         int error;
3889
3890         AUDIT_ARG(fd, uap->fd);
3891         AUDIT_ARG(fflags, uap->flags);
3892         if ( (error = file_vnode(uap->fd, &vp)) )
3893                 return (error);
3894
3895         if ((error = vnode_getwithref(vp))) {
3896                 file_drop(uap->fd);
3897                 return(error);
3898         }
3899
3900         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3901
3902         error = chflags1(vp, uap->flags, vfs_context_current());
3903
3904         file_drop(uap->fd);
3905         return (error);
3906 }
3907
3908 /*
3909  * Change security information on a filesystem object.
3910  *
3911  * Returns:     0                       Success
3912  *              EPERM                   Operation not permitted
3913  *              vnode_authattr:???      [anything vnode_authattr can return]
3914  *              vnode_authorize:???     [anything vnode_authorize can return]
3915  *              vnode_setattr:???       [anything vnode_setattr can return]
3916  *
3917  * Notes:       If vnode_authattr or vnode_authorize return EACCES, it will be
3918  *              translated to EPERM before being returned.
3919  */
3920 static int
3921 chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
3922 {
3923         kauth_action_t action;
3924         int error;
3925
3926         AUDIT_ARG(mode, (mode_t)vap->va_mode);
3927 #warning XXX audit new args
3928
3929 #if NAMEDSTREAMS
3930         /* chmod calls are not allowed for resource forks. */
3931         if (vp->v_flag & VISNAMEDSTREAM) {
3932                 return (EPERM);
3933         }
3934 #endif
3935
3936 #if CONFIG_MACF
3937         error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode);
3938         if (error)
3939                 return (error);
3940 #endif
3941
3942         /* make sure that the caller is allowed to set this security information */
3943         if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
3944             ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
3945                 if (error == EACCES)
3946                         error = EPERM;
3947                 return(error);
3948         }
3949
3950         error = vnode_setattr(vp, vap, ctx);
3951
3952         return (error);
3953 }
3954
3955
3956 /*
3957  * Change mode of a file given path name.
3958  *
3959  * Returns:     0                       Success
3960  *              namei:???               [anything namei can return]
3961  *              chmod2:???              [anything chmod2 can return]
3962  */
3963 static int
3964 chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
3965 {
3966         struct nameidata nd;
3967         int error;
3968
3969         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
3970                 UIO_USERSPACE, path, ctx);
3971         if ((error = namei(&nd)))
3972                 return (error);
3973         error = chmod2(ctx, nd.ni_vp, vap);
3974         vnode_put(nd.ni_vp);
3975         nameidone(&nd);
3976         return(error);
3977 }
3978
3979 /*
3980  * A chmod system call using an extended argument list compared to the regular
3981  * system call 'mkfifo'.
3982  *
3983  * Parameters:  p                       Process requesting the open
3984  *              uap                     User argument descriptor (see below)
3985  *              retval                  (ignored)
3986  *
3987  * Indirect:    uap->path               Path to object (same as 'chmod')
3988  *              uap->uid                UID to set
3989  *              uap->gid                GID to set
3990  *              uap->mode               File mode to set (same as 'chmod')
3991  *              uap->xsecurity          ACL to set (or delete)
3992  *
3993  * Returns:     0                       Success
3994  *              !0                      errno value
3995  *
3996  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
3997  *
3998  * XXX:         We should enummerate the possible errno values here, and where
3999  *              in the code they originated.
4000  */
4001 int
4002 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused register_t *retval)
4003 {
4004         int error;
4005         struct vnode_attr va;
4006         kauth_filesec_t xsecdst;
4007
4008         VATTR_INIT(&va);
4009         if (uap->mode != -1)
4010                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4011         if (uap->uid != KAUTH_UID_NONE)
4012                 VATTR_SET(&va, va_uid, uap->uid);
4013         if (uap->gid != KAUTH_GID_NONE)
4014                 VATTR_SET(&va, va_gid, uap->gid);
4015
4016         xsecdst = NULL;
4017         switch(uap->xsecurity) {
4018                 /* explicit remove request */
4019         case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */
4020                 VATTR_SET(&va, va_acl, NULL);
4021                 break;
4022                 /* not being set */
4023         case USER_ADDR_NULL:
4024                 break;
4025         default:
4026                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4027                         return(error);
4028                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4029                 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
4030         }
4031
4032         error = chmod1(vfs_context_current(), uap->path, &va);
4033
4034         if (xsecdst != NULL)
4035                 kauth_filesec_free(xsecdst);
4036         return(error);
4037 }
4038
4039 /*
4040  * Returns:     0                       Success
4041  *              chmod1:???              [anything chmod1 can return]
4042  */
4043 int
4044 chmod(__unused proc_t p, struct chmod_args *uap, __unused register_t *retval)
4045 {
4046         struct vnode_attr va;
4047
4048         VATTR_INIT(&va);
4049         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4050
4051         return(chmod1(vfs_context_current(), uap->path, &va));
4052 }
4053
4054 /*
4055  * Change mode of a file given a file descriptor.
4056  */
4057 static int
4058 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
4059 {
4060         vnode_t vp;
4061         int error;
4062
4063         AUDIT_ARG(fd, fd);
4064
4065         if ((error = file_vnode(fd, &vp)) != 0)
4066                 return (error);
4067         if ((error = vnode_getwithref(vp)) != 0) {
4068                 file_drop(fd);
4069                 return(error);
4070         }
4071         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4072
4073         error = chmod2(vfs_context_current(), vp, vap);
4074         (void)vnode_put(vp);
4075         file_drop(fd);
4076
4077         return (error);
4078 }
4079
4080 int
4081 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused register_t *retval)
4082 {
4083         int error;
4084         struct vnode_attr va;
4085         kauth_filesec_t xsecdst;
4086
4087         VATTR_INIT(&va);
4088         if (uap->mode != -1)
4089                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4090         if (uap->uid != KAUTH_UID_NONE)
4091                 VATTR_SET(&va, va_uid, uap->uid);
4092         if (uap->gid != KAUTH_GID_NONE)
4093                 VATTR_SET(&va, va_gid, uap->gid);
4094
4095         xsecdst = NULL;
4096         switch(uap->xsecurity) {
4097         case USER_ADDR_NULL:
4098                 VATTR_SET(&va, va_acl, NULL);
4099                 break;
4100         case CAST_USER_ADDR_T(-1):
4101                 break;
4102         default:
4103                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4104                         return(error);
4105                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4106         }
4107
4108         error = fchmod1(p, uap->fd, &va);
4109
4110
4111         switch(uap->xsecurity) {
4112         case USER_ADDR_NULL:
4113         case CAST_USER_ADDR_T(-1):
4114                 break;
4115         default:
4116                 if (xsecdst != NULL)
4117                         kauth_filesec_free(xsecdst);
4118         }
4119         return(error);
4120 }
4121
4122 int
4123 fchmod(proc_t p, struct fchmod_args *uap, __unused register_t *retval)
4124 {
4125         struct vnode_attr va;
4126
4127         VATTR_INIT(&va);
4128         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4129
4130         return(fchmod1(p, uap->fd, &va));
4131 }
4132
4133
4134 /*
4135  * Set ownership given a path name.
4136  */
4137 /* ARGSUSED */
4138 static int
4139 chown1(vfs_context_t ctx, struct chown_args *uap, __unused register_t *retval, int follow)
4140 {
4141         vnode_t vp;
4142         struct vnode_attr va;
4143         int error;
4144         struct nameidata nd;
4145         kauth_action_t action;
4146
4147         AUDIT_ARG(owner, uap->uid, uap->gid);
4148
4149         NDINIT(&nd, LOOKUP, (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1,
4150                 UIO_USERSPACE, uap->path, ctx);
4151         error = namei(&nd);
4152         if (error)
4153                 return (error);
4154         vp = nd.ni_vp;
4155
4156         nameidone(&nd);
4157
4158         VATTR_INIT(&va);
4159         if (uap->uid != VNOVAL)
4160                 VATTR_SET(&va, va_uid, uap->uid);
4161         if (uap->gid != VNOVAL)
4162                 VATTR_SET(&va, va_gid, uap->gid);
4163
4164 #if CONFIG_MACF
4165         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
4166         if (error)
4167                 goto out;
4168 #endif
4169
4170         /* preflight and authorize attribute changes */
4171         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4172                 goto out;
4173         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
4174                 goto out;
4175         error = vnode_setattr(vp, &va, ctx);
4176
4177 out:
4178         /*
4179          * EACCES is only allowed from namei(); permissions failure should
4180          * return EPERM, so we need to translate the error code.
4181          */
4182         if (error == EACCES)
4183                 error = EPERM;
4184
4185         vnode_put(vp);
4186         return (error);
4187 }
4188
4189 int
4190 chown(__unused proc_t p, struct chown_args *uap, register_t *retval)
4191 {
4192         return chown1(vfs_context_current(), uap, retval, 1);
4193 }
4194
4195 int
4196 lchown(__unused proc_t p, struct lchown_args *uap, register_t *retval)
4197 {
4198         /* Argument list identical, but machine generated; cast for chown1() */
4199         return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
4200 }
4201
4202 /*
4203  * Set ownership given a file descriptor.
4204  */
4205 /* ARGSUSED */
4206 int
4207 fchown(__unused proc_t p, struct fchown_args *uap, __unused register_t *retval)
4208 {
4209         struct vnode_attr va;
4210         vfs_context_t ctx = vfs_context_current();
4211         vnode_t vp;
4212         int error;
4213         kauth_action_t action;
4214
4215         AUDIT_ARG(owner, uap->uid, uap->gid);
4216         AUDIT_ARG(fd, uap->fd);
4217
4218         if ( (error = file_vnode(uap->fd, &vp)) )
4219                 return (error);
4220
4221         if ( (error = vnode_getwithref(vp)) ) {
4222                 file_drop(uap->fd);
4223                 return(error);
4224         }
4225         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4226
4227         VATTR_INIT(&va);
4228         if (uap->uid != VNOVAL)
4229                 VATTR_SET(&va, va_uid, uap->uid);
4230         if (uap->gid != VNOVAL)
4231                 VATTR_SET(&va, va_gid, uap->gid);
4232
4233 #if NAMEDSTREAMS
4234         /* chown calls are not allowed for resource forks. */
4235         if (vp->v_flag & VISNAMEDSTREAM) {
4236                 error = EPERM;
4237                 goto out;
4238         }
4239 #endif
4240
4241 #if CONFIG_MACF
4242         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
4243         if (error)
4244                 goto out;
4245 #endif
4246
4247         /* preflight and authorize attribute changes */
4248         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4249                 goto out;
4250         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4251                 if (error == EACCES)
4252                         error = EPERM;
4253                 goto out;
4254         }
4255         error = vnode_setattr(vp, &va, ctx);
4256
4257 out:
4258         (void)vnode_put(vp);
4259         file_drop(uap->fd);
4260         return (error);
4261 }
4262
4263 static int
4264 getutimes(user_addr_t usrtvp, struct timespec *tsp)
4265 {
4266         struct user_timeval tv[2];
4267         int error;
4268
4269         if (usrtvp == USER_ADDR_NULL) {
4270                 struct timeval old_tv;
4271                 /* XXX Y2038 bug because of microtime argument */
4272                 microtime(&old_tv);
4273                 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
4274                 tsp[1] = tsp[0];
4275         } else {
4276                 if (IS_64BIT_PROCESS(current_proc())) {
4277                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
4278                 } else {
4279                         struct timeval old_tv[2];
4280                         error = copyin(usrtvp, (void *)old_tv, sizeof(old_tv));
4281                         tv[0].tv_sec = old_tv[0].tv_sec;
4282                         tv[0].tv_usec = old_tv[0].tv_usec;
4283                         tv[1].tv_sec = old_tv[1].tv_sec;
4284                         tv[1].tv_usec = old_tv[1].tv_usec;
4285                 }
4286                 if (error)
4287                         return (error);
4288                 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
4289                 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
4290         }
4291         return 0;
4292 }
4293
4294 static int
4295 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
4296         int nullflag)
4297 {
4298         int error;
4299         struct vnode_attr va;
4300         kauth_action_t action;
4301
4302         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4303
4304         VATTR_INIT(&va);
4305         VATTR_SET(&va, va_access_time, ts[0]);
4306         VATTR_SET(&va, va_modify_time, ts[1]);
4307         if (nullflag)
4308                 va.va_vaflags |= VA_UTIMES_NULL;
4309
4310 #if NAMEDSTREAMS
4311         /* utimes calls are not allowed for resource forks. */
4312         if (vp->v_flag & VISNAMEDSTREAM) {
4313                 error = EPERM;
4314                 goto out;
4315         }
4316 #endif
4317
4318 #if CONFIG_MACF
4319         error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
4320         if (error)
4321                 goto out;
4322 #endif
4323         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
4324                 if (!nullflag && error == EACCES)
4325                         error = EPERM;
4326                 goto out;
4327         }
4328
4329         /* since we may not need to auth anything, check here */
4330         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4331                 if (!nullflag && error == EACCES)
4332                         error = EPERM;
4333                 goto out;
4334         }
4335         error = vnode_setattr(vp, &va, ctx);
4336
4337 out:
4338         return error;
4339 }
4340
4341 /*
4342  * Set the access and modification times of a file.
4343  */
4344 /* ARGSUSED */
4345 int
4346 utimes(__unused proc_t p, struct utimes_args *uap, __unused register_t *retval)
4347 {
4348         struct timespec ts[2];
4349         user_addr_t usrtvp;
4350         int error;
4351         struct nameidata nd;
4352         vfs_context_t ctx = vfs_context_current();
4353
4354         /*
4355          * AUDIT: Needed to change the order of operations to do the
4356          * name lookup first because auditing wants the path.
4357          */
4358         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4359                 UIO_USERSPACE, uap->path, ctx);
4360         error = namei(&nd);
4361         if (error)
4362                 return (error);
4363         nameidone(&nd);
4364
4365         /*
4366          * Fetch the user-supplied time.  If usrtvp is USER_ADDR_NULL, we fetch
4367          * the current time instead.
4368          */
4369         usrtvp = uap->tptr;
4370         if ((error = getutimes(usrtvp, ts)) != 0)
4371                 goto out;
4372
4373         error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
4374
4375 out:
4376         vnode_put(nd.ni_vp);
4377         return (error);
4378 }
4379
4380 /*
4381  * Set the access and modification times of a file.
4382  */
4383 /* ARGSUSED */
4384 int
4385 futimes(__unused proc_t p, struct futimes_args *uap, __unused register_t *retval)
4386 {
4387         struct timespec ts[2];
4388         vnode_t vp;
4389         user_addr_t usrtvp;
4390         int error;
4391
4392         AUDIT_ARG(fd, uap->fd);
4393         usrtvp = uap->tptr;
4394         if ((error = getutimes(usrtvp, ts)) != 0)
4395                 return (error);
4396         if ((error = file_vnode(uap->fd, &vp)) != 0)
4397                 return (error);
4398         if((error = vnode_getwithref(vp))) {
4399                 file_drop(uap->fd);
4400                 return(error);
4401         }
4402
4403         error =  setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
4404         vnode_put(vp);
4405         file_drop(uap->fd);
4406         return(error);
4407 }
4408
4409 /*
4410  * Truncate a file given its path name.
4411  */
4412 /* ARGSUSED */
4413 int
4414 truncate(__unused proc_t p, struct truncate_args *uap, __unused register_t *retval)
4415 {
4416         vnode_t vp;
4417         struct vnode_attr va;
4418         vfs_context_t ctx = vfs_context_current();
4419         int error;
4420         struct nameidata nd;
4421         kauth_action_t action;
4422
4423         if (uap->length < 0)
4424                 return(EINVAL);
4425         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4426                 UIO_USERSPACE, uap->path, ctx);
4427         if ((error = namei(&nd)))
4428                 return (error);
4429         vp = nd.ni_vp;
4430
4431         nameidone(&nd);
4432
4433         VATTR_INIT(&va);
4434         VATTR_SET(&va, va_data_size, uap->length);
4435
4436 #if CONFIG_MACF
4437         error = mac_vnode_check_truncate(ctx, NOCRED, vp);
4438         if (error)
4439                 goto out;
4440 #endif
4441
4442         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4443                 goto out;
4444         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
4445                 goto out;
4446         error = vnode_setattr(vp, &va, ctx);
4447 out:
4448         vnode_put(vp);
4449         return (error);
4450 }
4451
4452 /*
4453  * Truncate a file given a file descriptor.
4454  */
4455 /* ARGSUSED */
4456 int
4457 ftruncate(proc_t p, struct ftruncate_args *uap, register_t *retval)
4458 {
4459         vfs_context_t ctx = vfs_context_current();
4460         struct vnode_attr va;
4461         vnode_t vp;
4462         struct fileproc *fp;
4463         int error ;
4464         int fd = uap->fd;
4465
4466         AUDIT_ARG(fd, uap->fd);
4467         if (uap->length < 0)
4468                 return(EINVAL);
4469
4470         if ( (error = fp_lookup(p,fd,&fp,0)) ) {
4471                 return(error);
4472         }
4473
4474         if (fp->f_fglob->fg_type == DTYPE_PSXSHM) {
4475                 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
4476                 goto out;
4477         }
4478         if (fp->f_fglob->fg_type != DTYPE_VNODE)  {
4479                 error = EINVAL;
4480                 goto out;
4481         }
4482
4483         vp = (vnode_t)fp->f_fglob->fg_data;
4484
4485         if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
4486                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
4487                 error = EINVAL;
4488                 goto out;
4489         }
4490
4491         if ((error = vnode_getwithref(vp)) != 0) {
4492                 goto out;
4493         }
4494
4495         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4496
4497 #if CONFIG_MACF
4498         error = mac_vnode_check_truncate(ctx,
4499             fp->f_fglob->fg_cred, vp);
4500         if (error) {
4501                 (void)vnode_put(vp);
4502                 goto out;
4503         }
4504 #endif
4505         VATTR_INIT(&va);
4506         VATTR_SET(&va, va_data_size, uap->length);
4507         error = vnode_setattr(vp, &va, ctx);
4508         (void)vnode_put(vp);
4509 out:
4510         file_drop(fd);
4511         return (error);
4512 }
4513
4514
4515 /*
4516  * Sync an open file.
4517  */
4518 /* ARGSUSED */
4519 int
4520 fsync(proc_t p, struct fsync_args *uap, register_t *retval)
4521 {
4522         __pthread_testcancel(1);
4523         return(fsync_nocancel(p, (struct fsync_nocancel_args *)uap, retval));
4524 }
4525
4526 int
4527 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused register_t *retval)
4528 {
4529         vnode_t vp;
4530         struct fileproc *fp;
4531         vfs_context_t ctx = vfs_context_current();
4532         int error;
4533
4534         if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
4535                 return (error);
4536         if ( (error = vnode_getwithref(vp)) ) {
4537                 file_drop(uap->fd);
4538                 return(error);
4539         }
4540
4541         error = VNOP_FSYNC(vp, MNT_WAIT, ctx);
4542
4543 #if NAMEDRSRCFORK
4544         /* Sync resource fork shadow file if necessary. */
4545         if ((error == 0) &&
4546             (vp->v_flag & VISNAMEDSTREAM) &&
4547             (vp->v_parent != NULLVP) &&
4548             !(vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) &&
4549             (fp->f_flags & FP_WRITTEN)) {
4550                 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
4551         }
4552 #endif
4553
4554         (void)vnode_put(vp);
4555         file_drop(uap->fd);
4556         return (error);
4557 }
4558
4559 /*
4560  * Duplicate files.  Source must be a file, target must be a file or
4561  * must not exist.
4562  *
4563  * XXX Copyfile authorisation checking is woefully inadequate, and will not
4564  *     perform inheritance correctly.
4565  */
4566 /* ARGSUSED */
4567 int
4568 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused register_t *retval)
4569 {
4570         vnode_t tvp, fvp, tdvp, sdvp;
4571         struct nameidata fromnd, tond;
4572         int error;
4573         vfs_context_t ctx = vfs_context_current();
4574
4575         /* Check that the flags are valid. */
4576
4577         if (uap->flags & ~CPF_MASK) {
4578                 return(EINVAL);
4579         }
4580
4581         NDINIT(&fromnd, LOOKUP, SAVESTART | AUDITVNPATH1,
4582                 UIO_USERSPACE, uap->from, ctx);
4583         if ((error = namei(&fromnd)))
4584                 return (error);
4585         fvp = fromnd.ni_vp;
4586
4587         NDINIT(&tond, CREATE,  LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
4588             UIO_USERSPACE, uap->to, ctx);
4589         if ((error = namei(&tond))) {
4590                 goto out1;
4591         }
4592         tdvp = tond.ni_dvp;
4593         tvp = tond.ni_vp;
4594
4595         if (tvp != NULL) {
4596                 if (!(uap->flags & CPF_OVERWRITE)) {
4597                         error = EEXIST;
4598                         goto out;
4599                 }
4600         }
4601         if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
4602                 error = EISDIR;
4603                 goto out;
4604         }
4605
4606         if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4607                 goto out;
4608
4609         if (fvp == tdvp)
4610                 error = EINVAL;
4611         /*
4612          * If source is the same as the destination (that is the
4613          * same inode number) then there is nothing to do.
4614          * (fixed to have POSIX semantics - CSM 3/2/98)
4615          */
4616         if (fvp == tvp)
4617                 error = -1;
4618         if (!error)
4619                 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
4620 out:
4621         sdvp = tond.ni_startdir;
4622         /*
4623          * nameidone has to happen before we vnode_put(tdvp)
4624          * since it may need to release the fs_nodelock on the tdvp
4625          */
4626         nameidone(&tond);
4627
4628         if (tvp)
4629                 vnode_put(tvp);
4630         vnode_put(tdvp);
4631         vnode_put(sdvp);
4632 out1:
4633         vnode_put(fvp);
4634
4635         if (fromnd.ni_startdir)
4636                 vnode_put(fromnd.ni_startdir);
4637         nameidone(&fromnd);
4638
4639         if (error == -1)
4640                 return (0);
4641         return (error);
4642 }
4643
4644
4645 /*
4646  * Rename files.  Source and destination must either both be directories,
4647  * or both not be directories.  If target is a directory, it must be empty.
4648  */
4649 /* ARGSUSED */
4650 int
4651 rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval)
4652 {
4653         vnode_t tvp, tdvp;
4654         vnode_t fvp, fdvp;
4655         struct nameidata fromnd, tond;
4656         vfs_context_t ctx = vfs_context_current();
4657         int error;
4658         int mntrename;
4659         int need_event;
4660         const char *oname;
4661         char *from_name = NULL, *to_name = NULL;
4662         int from_len, to_len;
4663         int holding_mntlock;
4664         mount_t locked_mp = NULL;
4665         vnode_t oparent;
4666         fse_info from_finfo, to_finfo;
4667
4668         holding_mntlock = 0;
4669 retry:
4670         fvp = tvp = NULL;
4671         fdvp = tdvp = NULL;
4672         mntrename = FALSE;
4673
4674         NDINIT(&fromnd, DELETE, WANTPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->from, ctx);
4675
4676         if ( (error = namei(&fromnd)) )
4677                 goto out1;
4678         fdvp = fromnd.ni_dvp;
4679         fvp  = fromnd.ni_vp;
4680
4681 #if CONFIG_MACF
4682         error = mac_vnode_check_rename_from(ctx, fdvp, fvp, &fromnd.ni_cnd);
4683         if (error)
4684                 goto out1;
4685 #endif
4686
4687         NDINIT(&tond, RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK , UIO_USERSPACE, uap->to, ctx);
4688         if (fvp->v_type == VDIR)
4689                 tond.ni_cnd.cn_flags |= WILLBEDIR;
4690
4691         if ( (error = namei(&tond)) ) {
4692                 /*
4693                  * Translate error code for rename("dir1", "dir2/.").
4694                  */
4695                 if (error == EISDIR && fvp->v_type == VDIR)
4696                         error = EINVAL;
4697                 goto out1;
4698         }
4699         tdvp = tond.ni_dvp;
4700         tvp  = tond.ni_vp;
4701
4702 #if CONFIG_MACF
4703         error = mac_vnode_check_rename_to(ctx,
4704             tdvp, tvp, fdvp == tdvp, &tond.ni_cnd);
4705         if (error)
4706                 goto out1;
4707 #endif
4708
4709         if (tvp != NULL) {
4710                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
4711                         error = ENOTDIR;
4712                         goto out1;
4713                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
4714                         error = EISDIR;
4715                         goto out1;
4716                 }
4717         }
4718         if (fvp == tdvp) {
4719                 error = EINVAL;
4720                 goto out1;
4721         }
4722         /*
4723          * If the source and destination are the same (i.e. they're
4724          * links to the same vnode) and the target file system is
4725          * case sensitive, then there is nothing to do.
4726          */
4727         if (fvp == tvp) {
4728                 int pathconf_val;
4729
4730                 /*
4731                  * Note: if _PC_CASE_SENSITIVE selector isn't supported,
4732                  * then assume that this file system is case sensitive.
4733                  */
4734                 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
4735                     pathconf_val != 0) {
4736                         goto out1;
4737                 }
4738         }
4739
4740         /*
4741          * Authorization.
4742          *
4743          * If tvp is a directory and not the same as fdvp, or tdvp is not
4744          * the same as fdvp, the node is moving between directories and we
4745          * need rights to remove from the old and add to the new.
4746          *
4747          * If tvp already exists and is not a directory, we need to be
4748          * allowed to delete it.
4749          *
4750          * Note that we do not inherit when renaming.
4751          *
4752          * XXX This needs to be revisited to implement the deferred-inherit bit
4753          */
4754         {
4755                 int moving = 0;
4756
4757                 error = 0;
4758                 if ((tvp != NULL) && vnode_isdir(tvp)) {
4759                         if (tvp != fdvp)
4760                                 moving = 1;
4761                 } else if (tdvp != fdvp) {
4762                         moving = 1;
4763                 }
4764                 /*
4765                  * must have delete rights to remove the old name even in
4766                  * the simple case of fdvp == tdvp.
4767                  *
4768                  * If fvp is a directory, and we are changing it's parent,
4769                  * then we also need rights to rewrite its ".." entry as well.
4770                  */
4771                 if (vnode_isdir(fvp)) {
4772                         if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
4773                                 goto auth_exit;
4774                 } else {
4775                 if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx)) != 0)
4776                         goto auth_exit;
4777                 }
4778                 if (moving) {
4779                         /* moving into tdvp or tvp, must have rights to add */
4780                         if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
4781                                  NULL,
4782                                  vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
4783                                  ctx)) != 0)
4784                                 goto auth_exit;
4785                 } else {
4786                         /* node staying in same directory, must be allowed to add new name */
4787                         if ((error = vnode_authorize(fdvp, NULL,
4788                                  vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4789                                 goto auth_exit;
4790                 }
4791                 /* overwriting tvp */
4792                 if ((tvp != NULL) && !vnode_isdir(tvp) &&
4793                     ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0))
4794                         goto auth_exit;
4795
4796                 /* XXX more checks? */
4797
4798 auth_exit:
4799                 /* authorization denied */
4800                 if (error != 0)
4801                         goto out1;
4802         }
4803         /*
4804          * Allow the renaming of mount points.
4805          * - target must not exist
4806          * - target must reside in the same directory as source
4807          * - union mounts cannot be renamed
4808          * - "/" cannot be renamed
4809          */
4810         if ((fvp->v_flag & VROOT) &&
4811             (fvp->v_type == VDIR) &&
4812             (tvp == NULL)  &&
4813             (fvp->v_mountedhere == NULL)  &&
4814             (fdvp == tdvp)  &&
4815             ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0)  &&
4816             (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
4817                 vnode_t coveredvp;
4818
4819                 /* switch fvp to the covered vnode */
4820                 coveredvp = fvp->v_mount->mnt_vnodecovered;
4821                 if ( (vnode_getwithref(coveredvp)) ) {
4822                         error = ENOENT;
4823                         goto out1;
4824                 }
4825                 vnode_put(fvp);
4826
4827                 fvp = coveredvp;
4828                 mntrename = TRUE;
4829         }
4830         /*
4831          * Check for cross-device rename.
4832          */
4833         if ((fvp->v_mount != tdvp->v_mount) ||
4834             (tvp && (fvp->v_mount != tvp->v_mount))) {
4835                 error = EXDEV;
4836                 goto out1;
4837         }
4838         /*
4839          * Avoid renaming "." and "..".
4840          */
4841         if (fvp->v_type == VDIR &&
4842             ((fdvp == fvp) ||
4843              (fromnd.ni_cnd.cn_namelen == 1 && fromnd.ni_cnd.cn_nameptr[0] == '.') ||
4844              ((fromnd.ni_cnd.cn_flags | tond.ni_cnd.cn_flags) & ISDOTDOT)) ) {
4845                 error = EINVAL;
4846                 goto out1;
4847         }
4848         /*
4849          * The following edge case is caught here:
4850          * (to cannot be a descendent of from)
4851          *
4852          *       o fdvp
4853          *      /
4854          *     /
4855          *    o fvp
4856          *     \
4857          *      \
4858          *       o tdvp
4859          *      /
4860          *     /
4861          *    o tvp
4862          */
4863         if (tdvp->v_parent == fvp) {
4864                 error = EINVAL;
4865                 goto out1;
4866         }
4867
4868         /*
4869          * If source is the same as the destination (that is the
4870          * same inode number) then there is nothing to do...
4871          * EXCEPT if the underlying file system supports case
4872          * insensitivity and is case preserving.  In this case
4873          * the file system needs to handle the special case of
4874          * getting the same vnode as target (fvp) and source (tvp).
4875          *
4876          * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
4877          * and _PC_CASE_PRESERVING can have this exception, and they need to
4878          * handle the special case of getting the same vnode as target and
4879          * source.  NOTE: Then the target is unlocked going into vnop_rename,
4880          * so not to cause locking problems. There is a single reference on tvp.
4881          *
4882          * NOTE - that fvp == tvp also occurs if they are hard linked - NOTE
4883          * that correct behaviour then is just to remove the source (link)
4884          */
4885         if (fvp == tvp && fdvp == tdvp) {
4886                 if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
4887                     !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
4888                           fromnd.ni_cnd.cn_namelen)) {
4889                         goto out1;
4890                 }
4891         }
4892
4893         if (holding_mntlock && fvp->v_mount != locked_mp) {
4894                 /*
4895                  * we're holding a reference and lock
4896                  * on locked_mp, but it no longer matches
4897                  * what we want to do... so drop our hold
4898                  */
4899                 mount_unlock_renames(locked_mp);
4900                 mount_drop(locked_mp, 0);
4901                 holding_mntlock = 0;
4902         }
4903         if (tdvp != fdvp && fvp->v_type == VDIR) {
4904                 /*
4905                  * serialize renames that re-shape
4906                  * the tree... if holding_mntlock is
4907                  * set, then we're ready to go...
4908                  * otherwise we
4909                  * first need to drop the iocounts
4910                  * we picked up, second take the
4911                  * lock to serialize the access,
4912                  * then finally start the lookup
4913                  * process over with the lock held
4914                  */
4915                 if (!holding_mntlock) {
4916                         /*
4917                          * need to grab a reference on
4918                          * the mount point before we
4919                          * drop all the iocounts... once
4920                          * the iocounts are gone, the mount
4921                          * could follow
4922                          */
4923                         locked_mp = fvp->v_mount;
4924                         mount_ref(locked_mp, 0);
4925
4926                         /*
4927                          * nameidone has to happen before we vnode_put(tvp)
4928                          * since it may need to release the fs_nodelock on the tvp
4929                          */
4930                         nameidone(&tond);
4931
4932                         if (tvp)
4933                                 vnode_put(tvp);
4934                         vnode_put(tdvp);
4935
4936                         /*
4937                          * nameidone has to happen before we vnode_put(fdvp)
4938                          * since it may need to release the fs_nodelock on the fvp
4939                          */
4940                         nameidone(&fromnd);
4941
4942                         vnode_put(fvp);
4943                         vnode_put(fdvp);
4944
4945                         mount_lock_renames(locked_mp);
4946                         holding_mntlock = 1;
4947
4948                         goto retry;
4949                 }
4950         } else {
4951                 /*
4952                  * when we dropped the iocounts to take
4953                  * the lock, we allowed the identity of
4954                  * the various vnodes to change... if they did,
4955                  * we may no longer be dealing with a rename
4956                  * that reshapes the tree... once we're holding
4957                  * the iocounts, the vnodes can't change type
4958                  * so we're free to drop the lock at this point
4959                  * and continue on
4960                  */
4961                 if (holding_mntlock) {
4962                         mount_unlock_renames(locked_mp);
4963                         mount_drop(locked_mp, 0);
4964                         holding_mntlock = 0;
4965                 }
4966         }
4967         // save these off so we can later verify that fvp is the same
4968         oname   = fvp->v_name;
4969         oparent = fvp->v_parent;
4970
4971 #if CONFIG_FSE
4972         need_event = need_fsevent(FSE_RENAME, fvp);
4973         if (need_event) {
4974                 get_fse_info(fvp, &from_finfo, ctx);
4975
4976                 if (tvp) {
4977                         get_fse_info(tvp, &to_finfo, ctx);
4978                 }
4979         }
4980 #else
4981         need_event = 0;
4982 #endif /* CONFIG_FSE */
4983
4984         if (need_event || kauth_authorize_fileop_has_listeners()) {
4985                 GET_PATH(from_name);
4986                 if (from_name == NULL) {
4987                         error = ENOMEM;
4988                         goto out1;
4989                 }
4990                 from_len = MAXPATHLEN;
4991                 vn_getpath(fdvp, from_name, &from_len);
4992                 if ((from_len + 1 + fromnd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
4993                     if (from_len > 2) {
4994                         from_name[from_len-1] = '/';
4995                     } else {
4996                         from_len--;
4997                     }
4998                     strlcpy(&from_name[from_len], fromnd.ni_cnd.cn_nameptr, MAXPATHLEN-from_len);
4999                     from_len += fromnd.ni_cnd.cn_namelen + 1;
5000                     from_name[from_len] = '\0';
5001                 }
5002
5003                 GET_PATH(to_name);
5004                 if (to_name == NULL) {
5005                         error = ENOMEM;
5006                         goto out1;
5007                 }
5008
5009                 to_len = MAXPATHLEN;
5010                 vn_getpath(tdvp, to_name, &to_len);
5011                 // if the path is not just "/", then append a "/"
5012                 if ((to_len + 1 + tond.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
5013                     if (to_len > 2) {
5014                         to_name[to_len-1] = '/';
5015                     } else {
5016                         to_len--;
5017                     }
5018                     strlcpy(&to_name[to_len], tond.ni_cnd.cn_nameptr, MAXPATHLEN-to_len);
5019                     to_len += tond.ni_cnd.cn_namelen + 1;
5020                     to_name[to_len] = '\0';
5021                 }
5022         }
5023
5024         error = VNOP_RENAME(fdvp, fvp, &fromnd.ni_cnd,
5025                             tdvp, tvp, &tond.ni_cnd,
5026                             ctx);
5027
5028         if (holding_mntlock) {
5029                 /*
5030                  * we can drop our serialization
5031                  * lock now
5032                  */
5033                 mount_unlock_renames(locked_mp);
5034                 mount_drop(locked_mp, 0);
5035                 holding_mntlock = 0;
5036         }
5037         if (error) {
5038
5039                 goto out1;
5040         }
5041
5042         /* call out to allow 3rd party notification of rename.
5043          * Ignore result of kauth_authorize_fileop call.
5044          */
5045         kauth_authorize_fileop(vfs_context_ucred(ctx),
5046                         KAUTH_FILEOP_RENAME,
5047                         (uintptr_t)from_name, (uintptr_t)to_name);
5048
5049 #if CONFIG_FSE
5050         if (from_name != NULL && to_name != NULL) {
5051                 if (tvp) {
5052                         add_fsevent(FSE_RENAME, ctx,
5053                                     FSE_ARG_STRING, from_len, from_name,
5054                                     FSE_ARG_FINFO, &from_finfo,
5055                                     FSE_ARG_STRING, to_len, to_name,
5056                                     FSE_ARG_FINFO, &to_finfo,
5057                                     FSE_ARG_DONE);
5058                 } else {
5059                         add_fsevent(FSE_RENAME, ctx,
5060                                     FSE_ARG_STRING, from_len, from_name,
5061                                     FSE_ARG_FINFO, &from_finfo,
5062                                     FSE_ARG_STRING, to_len, to_name,
5063                                     FSE_ARG_DONE);
5064                 }
5065         }
5066 #endif /* CONFIG_FSE */
5067
5068         /*
5069          * update filesystem's mount point data
5070          */
5071         if (mntrename) {
5072                 char *cp, *pathend, *mpname;
5073                 char * tobuf;
5074                 struct mount *mp;
5075                 int maxlen;
5076                 size_t len = 0;
5077
5078                 mp = fvp->v_mountedhere;
5079
5080                 if (vfs_busy(mp, LK_NOWAIT)) {
5081                         error = EBUSY;
5082                         goto out1;
5083                 }
5084                 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
5085
5086                 error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
5087                 if (!error) {
5088                         /* find current mount point prefix */
5089                         pathend = &mp->mnt_vfsstat.f_mntonname[0];
5090                         for (cp = pathend; *cp != '\0'; ++cp) {
5091                                 if (*cp == '/')
5092                                         pathend = cp + 1;
5093                         }
5094                         /* find last component of target name */
5095                         for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
5096                                 if (*cp == '/')
5097                                         mpname = cp + 1;
5098                         }
5099                         /* append name to prefix */
5100                         maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
5101                         bzero(pathend, maxlen);
5102                         strlcpy(pathend, mpname, maxlen);
5103                 }
5104                 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
5105
5106                 vfs_unbusy(mp);
5107         }
5108         /*
5109          * fix up name & parent pointers.  note that we first
5110          * check that fvp has the same name/parent pointers it
5111          * had before the rename call... this is a 'weak' check
5112          * at best...
5113          */
5114         if (oname == fvp->v_name && oparent == fvp->v_parent) {
5115                 int update_flags;
5116
5117                 update_flags = VNODE_UPDATE_NAME;
5118
5119                 if (fdvp != tdvp)
5120                         update_flags |= VNODE_UPDATE_PARENT;
5121
5122                 vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags);
5123         }
5124 out1:
5125         if (to_name != NULL)
5126                 RELEASE_PATH(to_name);
5127         if (from_name != NULL)
5128                 RELEASE_PATH(from_name);
5129
5130         if (holding_mntlock) {
5131                 mount_unlock_renames(locked_mp);
5132                 mount_drop(locked_mp, 0);
5133         }
5134         if (tdvp) {
5135                 /*
5136                  * nameidone has to happen before we vnode_put(tdvp)
5137                  * since it may need to release the fs_nodelock on the tdvp
5138                  */
5139                 nameidone(&tond);
5140
5141                 if (tvp)
5142                         vnode_put(tvp);
5143                 vnode_put(tdvp);
5144         }
5145         if (fdvp) {
5146                 /*
5147                  * nameidone has to happen before we vnode_put(fdvp)
5148                  * since it may need to release the fs_nodelock on the fdvp
5149                  */
5150                 nameidone(&fromnd);
5151
5152                 if (fvp)
5153                         vnode_put(fvp);
5154                 vnode_put(fdvp);
5155         }
5156         return (error);
5157 }
5158
5159 /*
5160  * Make a directory file.
5161  *
5162  * Returns:     0                       Success
5163  *              EEXIST
5164  *      namei:???
5165  *      vnode_authorize:???
5166  *      vn_create:???
5167  */
5168 /* ARGSUSED */
5169 static int
5170 mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
5171 {
5172         vnode_t vp, dvp;
5173         int error;
5174         int update_flags = 0;
5175         struct nameidata nd;
5176
5177         AUDIT_ARG(mode, vap->va_mode);
5178         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
5179                 UIO_USERSPACE, path, ctx);
5180         nd.ni_cnd.cn_flags |= WILLBEDIR;
5181         error = namei(&nd);
5182         if (error)
5183                 return (error);
5184         dvp = nd.ni_dvp;
5185         vp = nd.ni_vp;
5186
5187         if (vp != NULL) {
5188                 error = EEXIST;
5189                 goto out;
5190         }
5191
5192         VATTR_SET(vap, va_type, VDIR);
5193
5194 #if CONFIG_MACF
5195         error = mac_vnode_check_create(ctx,
5196             nd.ni_dvp, &nd.ni_cnd, vap);
5197         if (error)
5198                 goto out;
5199 #endif
5200
5201         /* authorize addition of a directory to the parent */
5202         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
5203                 goto out;
5204
5205
5206         /* make the directory */
5207         if ((error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx)) != 0)
5208                 goto out;
5209
5210         // Make sure the name & parent pointers are hooked up
5211         if (vp->v_name == NULL)
5212                 update_flags |= VNODE_UPDATE_NAME;
5213         if (vp->v_parent == NULLVP)
5214                 update_flags |= VNODE_UPDATE_PARENT;
5215
5216         if (update_flags)
5217                 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
5218
5219 #if CONFIG_FSE
5220         add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
5221 #endif
5222
5223 out:
5224         /*
5225          * nameidone has to happen before we vnode_put(dvp)
5226          * since it may need to release the fs_nodelock on the dvp
5227          */
5228         nameidone(&nd);
5229
5230         if (vp)
5231                 vnode_put(vp);
5232         vnode_put(dvp);
5233
5234         return (error);
5235 }
5236
5237
5238 int
5239 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused register_t *retval)
5240 {
5241         int ciferror;
5242         kauth_filesec_t xsecdst;
5243         struct vnode_attr va;
5244
5245         xsecdst = NULL;
5246         if ((uap->xsecurity != USER_ADDR_NULL) &&
5247             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
5248                 return ciferror;
5249
5250         VATTR_INIT(&va);
5251         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
5252         if (xsecdst != NULL)
5253                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5254
5255         ciferror = mkdir1(vfs_context_current(), uap->path, &va);
5256         if (xsecdst != NULL)
5257                 kauth_filesec_free(xsecdst);
5258         return ciferror;
5259 }
5260
5261 int
5262 mkdir(proc_t p, struct mkdir_args *uap, __unused register_t *retval)
5263 {
5264         struct vnode_attr va;
5265
5266         VATTR_INIT(&va);
5267         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
5268
5269         return(mkdir1(vfs_context_current(), uap->path, &va));
5270 }
5271
5272 /*
5273  * Remove a directory file.
5274  */
5275 /* ARGSUSED */
5276 int
5277 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused register_t *retval)
5278 {
5279         vnode_t vp, dvp;
5280         int error;
5281         struct nameidata nd;
5282         vfs_context_t ctx = vfs_context_current();
5283
5284         int restart_flag, oldvp_id = -1;
5285
5286         /*
5287          * This loop exists to restart rmdir in the unlikely case that two
5288          * processes are simultaneously trying to remove the same directory
5289          * containing orphaned appleDouble files.
5290          */
5291         do {
5292                 restart_flag = 0;
5293
5294                 NDINIT(&nd, DELETE, LOCKPARENT | AUDITVNPATH1,
5295                                 UIO_USERSPACE, uap->path, ctx);
5296                 error = namei(&nd);
5297                 if (error)
5298                         return (error);
5299
5300                 dvp = nd.ni_dvp;
5301                 vp = nd.ni_vp;
5302
5303
5304                 /*
5305                  * If being restarted check if the new vp
5306                  * still has the same v_id.
5307                  */
5308                 if (oldvp_id != -1 && oldvp_id != vp->v_id) {
5309                         error = ENOENT;
5310                         goto out;
5311                 }
5312
5313                 if (vp->v_type != VDIR) {
5314                         /*
5315                          * rmdir only deals with directories
5316                          */
5317                         error = ENOTDIR;
5318                 } else if (dvp == vp) {
5319                         /*
5320                          * No rmdir "." please.
5321                          */
5322                         error = EINVAL;
5323                 } else if (vp->v_flag & VROOT) {
5324                         /*
5325                          * The root of a mounted filesystem cannot be deleted.
5326                          */
5327                         error = EBUSY;
5328                 } else {
5329 #if CONFIG_MACF
5330                         error = mac_vnode_check_unlink(ctx, dvp,
5331                                         vp, &nd.ni_cnd);
5332                         if (!error)
5333 #endif
5334                                 error = vnode_authorize(vp, nd.ni_dvp, KAUTH_VNODE_DELETE, ctx);
5335                 }
5336                 if (!error) {
5337                         char     *path = NULL;
5338                         int       len;
5339                         fse_info  finfo;
5340                         int has_listeners = 0;
5341                         int need_event = 0;
5342
5343 #if CONFIG_FSE
5344                         need_event = need_fsevent(FSE_DELETE, dvp);
5345                         if (need_event) {
5346                                 get_fse_info(vp, &finfo, ctx);
5347                         }
5348 #endif
5349                         has_listeners = kauth_authorize_fileop_has_listeners();
5350                         if (need_event || has_listeners) {
5351                                 GET_PATH(path);
5352                                 if (path == NULL) {
5353                                         error = ENOMEM;
5354                                         goto out;
5355                                 }
5356                                 len = MAXPATHLEN;
5357                                 vn_getpath(vp, path, &len);
5358                         }
5359
5360                         error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
5361
5362                         /*
5363                          * Special case to remove orphaned AppleDouble
5364                          * files. I don't like putting this in the kernel,
5365                          * but carbon does not like putting this in carbon either,
5366                          * so here we are.
5367                          */
5368                         if (error == ENOTEMPTY) {
5369                                 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
5370                                 if (error == EBUSY) {
5371                                         oldvp_id = vp->v_id;
5372                                         goto out;
5373                                 }
5374
5375
5376                                 /*
5377                                  * Assuming everything went well, we will try the RMDIR again
5378                                  */
5379                                 if (!error)
5380                                         error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
5381                         }
5382
5383                         /*
5384                          * Call out to allow 3rd party notification of delete.
5385                          * Ignore result of kauth_authorize_fileop call.
5386                          */
5387                         if (!error) {
5388                                 if (has_listeners) {
5389                                         kauth_authorize_fileop(vfs_context_ucred(ctx),
5390                                                         KAUTH_FILEOP_DELETE,
5391                                                         (uintptr_t)vp,
5392                                                         (uintptr_t)path);
5393                                 }
5394
5395                                 if (vp->v_flag & VISHARDLINK) {
5396                                     // see the comment in unlink1() about why we update
5397                                     // the parent of a hard link when it is removed
5398                                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
5399                                 }
5400
5401 #if CONFIG_FSE
5402                                 if (need_event) {
5403                                         add_fsevent(FSE_DELETE, ctx,
5404                                                         FSE_ARG_STRING, len, path,
5405                                                         FSE_ARG_FINFO, &finfo,
5406                                                         FSE_ARG_DONE);
5407                                 }
5408 #endif
5409                         }
5410                         if (path != NULL)
5411                                 RELEASE_PATH(path);
5412                 }
5413
5414 out:
5415                 /*
5416                  * nameidone has to happen before we vnode_put(dvp)
5417                  * since it may need to release the fs_nodelock on the dvp
5418                  */
5419                 nameidone(&nd);
5420
5421                 vnode_put(dvp);
5422                 vnode_put(vp);
5423
5424                 if (restart_flag == 0) {
5425                         wakeup_one((caddr_t)vp);
5426                         return (error);
5427                 }
5428                 tsleep(vp, PVFS, "rm AD", 1);
5429
5430         } while (restart_flag != 0);
5431
5432         return (error);
5433
5434 }
5435
5436 /* Get direntry length padded to 8 byte alignment */
5437 #define DIRENT64_LEN(namlen) \
5438         ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
5439
5440 static errno_t
5441 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
5442                 int *numdirent, vfs_context_t ctxp)
5443 {
5444         /* Check if fs natively supports VNODE_READDIR_EXTENDED */
5445         if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) {
5446                 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
5447         } else {
5448                 size_t bufsize;
5449                 void * bufptr;
5450                 uio_t auio;
5451                 struct direntry entry64;
5452                 struct dirent *dep;
5453                 int bytesread;
5454                 int error;
5455
5456                 /*
5457                  * Our kernel buffer needs to be smaller since re-packing
5458                  * will expand each dirent.  The worse case (when the name
5459                  * length is 3) corresponds to a struct direntry size of 32
5460                  * bytes (8-byte aligned) and a struct dirent size of 12 bytes
5461                  * (4-byte aligned).  So having a buffer that is 3/8 the size
5462                  * will prevent us from reading more than we can pack.
5463                  *
5464                  * Since this buffer is wired memory, we will limit the
5465                  * buffer size to a maximum of 32K. We would really like to
5466                  * use 32K in the MIN(), but we use magic number 87371 to
5467                  * prevent uio_resid() * 3 / 8 from overflowing.
5468                  */
5469                 bufsize = 3 * MIN(uio_resid(uio), 87371) / 8;
5470                 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
5471
5472                 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
5473                 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
5474                 auio->uio_offset = uio->uio_offset;
5475
5476                 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
5477
5478                 dep = (struct dirent *)bufptr;
5479                 bytesread = bufsize - uio_resid(auio);
5480
5481                 /*
5482                  * Convert all the entries and copy them out to user's buffer.
5483                  */
5484                 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
5485                         /* Convert a dirent to a dirent64. */
5486                         entry64.d_ino = dep->d_ino;
5487                         entry64.d_seekoff = 0;
5488                         entry64.d_reclen = DIRENT64_LEN(dep->d_namlen);
5489                         entry64.d_namlen = dep->d_namlen;
5490                         entry64.d_type = dep->d_type;
5491                         bcopy(dep->d_name, entry64.d_name, dep->d_namlen + 1);
5492
5493                         /* Move to next entry. */
5494                         dep = (struct dirent *)((char *)dep + dep->d_reclen);
5495
5496                         /* Copy entry64 to user's buffer. */
5497                         error = uiomove((caddr_t)&entry64, entry64.d_reclen, uio);
5498                 }
5499
5500                 /* Update the real offset using the offset we got from VNOP_READDIR. */
5501                 if (error == 0) {
5502                         uio->uio_offset = auio->uio_offset;
5503                 }
5504                 uio_free(auio);
5505                 FREE(bufptr, M_TEMP);
5506                 return (error);
5507         }
5508 }
5509
5510 /*
5511  * Read a block of directory entries in a file system independent format.
5512  */
5513 static int
5514 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
5515                      off_t *offset, int flags)
5516 {
5517         vnode_t vp;
5518         struct vfs_context context = *vfs_context_current();    /* local copy */
5519         struct fileproc *fp;
5520         uio_t auio;
5521         int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5522         off_t loff;
5523         int error, eofflag, numdirent;
5524         char uio_buf[ UIO_SIZEOF(1) ];
5525
5526         error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
5527         if (error) {
5528                 return (error);
5529         }
5530         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
5531                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5532                 error = EBADF;
5533                 goto out;
5534         }
5535
5536 #if CONFIG_MACF
5537         error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
5538         if (error)
5539                 goto out;
5540 #endif
5541         if ( (error = vnode_getwithref(vp)) ) {
5542                 goto out;
5543         }
5544         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5545
5546 unionread:
5547         if (vp->v_type != VDIR) {
5548                 (void)vnode_put(vp);
5549                 error = EINVAL;
5550                 goto out;
5551         }
5552
5553 #if CONFIG_MACF
5554         error = mac_vnode_check_readdir(&context, vp);
5555         if (error != 0) {
5556                 (void)vnode_put(vp);
5557                 goto out;
5558         }
5559 #endif /* MAC */
5560
5561         loff = fp->f_fglob->fg_offset;
5562         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
5563         uio_addiov(auio, bufp, bufsize);
5564
5565         if (flags & VNODE_READDIR_EXTENDED) {
5566                 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
5567                 fp->f_fglob->fg_offset = uio_offset(auio);
5568         } else {
5569                 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
5570                 fp->f_fglob->fg_offset = uio_offset(auio);
5571         }
5572         if (error) {
5573                 (void)vnode_put(vp);
5574                 goto out;
5575         }
5576
5577         if ((user_ssize_t)bufsize == uio_resid(auio)){
5578                 if (union_dircheckp) {
5579                         error = union_dircheckp(&vp, fp, &context);
5580                         if (error == -1)
5581                                 goto unionread;
5582                         if (error)
5583                                 goto out;
5584                 }
5585
5586                 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) {
5587                         struct vnode *tvp = vp;
5588                         vp = vp->v_mount->mnt_vnodecovered;
5589                         vnode_getwithref(vp);
5590                         vnode_ref(vp);
5591                         fp->f_fglob->fg_data = (caddr_t) vp;
5592                         fp->f_fglob->fg_offset = 0;
5593                         vnode_rele(tvp);
5594                         vnode_put(tvp);
5595                         goto unionread;
5596                 }
5597         }
5598
5599         vnode_put(vp);
5600         if (offset) {
5601                 *offset = loff;
5602         }
5603         // LP64todo - fix this
5604         *bytesread = bufsize - uio_resid(auio);
5605 out:
5606         file_drop(fd);
5607         return (error);
5608 }
5609
5610
5611 int
5612 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, register_t *retval)
5613 {
5614         off_t offset;
5615         long loff;
5616         ssize_t bytesread;
5617         int error;
5618
5619         AUDIT_ARG(fd, uap->fd);
5620         error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
5621
5622         if (error == 0) {
5623                 loff = (long)offset;
5624                 error = copyout((caddr_t)&loff, uap->basep, sizeof(long));
5625                 *retval = bytesread;
5626         }
5627         return (error);
5628 }
5629
5630 int
5631 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
5632 {
5633         off_t offset;
5634         ssize_t bytesread;
5635         int error;
5636
5637         AUDIT_ARG(fd, uap->fd);
5638         error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
5639
5640         if (error == 0) {
5641                 *retval = bytesread;
5642                 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
5643         }
5644         return (error);
5645 }
5646
5647
5648 /*
5649  * Set the mode mask for creation of filesystem nodes.
5650  */
5651 #warning XXX implement xsecurity
5652
5653 #define UMASK_NOXSECURITY        (void *)1      /* leave existing xsecurity alone */
5654 static int
5655 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, register_t *retval)
5656 {
5657         struct filedesc *fdp;
5658
5659         AUDIT_ARG(mask, newmask);
5660         proc_fdlock(p);
5661         fdp = p->p_fd;
5662         *retval = fdp->fd_cmask;
5663         fdp->fd_cmask = newmask & ALLPERMS;
5664         proc_fdunlock(p);
5665         return (0);
5666 }
5667
5668
5669 int
5670 umask_extended(proc_t p, struct umask_extended_args *uap, register_t *retval)
5671 {
5672         int ciferror;
5673         kauth_filesec_t xsecdst;
5674
5675         xsecdst = KAUTH_FILESEC_NONE;
5676         if (uap->xsecurity != USER_ADDR_NULL) {
5677                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5678                         return ciferror;
5679         } else {
5680                 xsecdst = KAUTH_FILESEC_NONE;
5681         }
5682
5683         ciferror = umask1(p, uap->newmask, xsecdst, retval);
5684
5685         if (xsecdst != KAUTH_FILESEC_NONE)
5686                 kauth_filesec_free(xsecdst);
5687         return ciferror;
5688 }
5689
5690 int
5691 umask(proc_t p, struct umask_args *uap, register_t *retval)
5692 {
5693         return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
5694 }
5695
5696 /*
5697  * Void all references to file by ripping underlying filesystem
5698  * away from vnode.
5699  */
5700 /* ARGSUSED */
5701 int
5702 revoke(proc_t p, struct revoke_args *uap, __unused register_t *retval)
5703 {
5704         vnode_t vp;
5705         struct vnode_attr va;
5706         vfs_context_t ctx = vfs_context_current();
5707         int error;
5708         struct nameidata nd;
5709
5710         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
5711                 UIO_USERSPACE, uap->path, ctx);
5712         error = namei(&nd);
5713         if (error)
5714                 return (error);
5715         vp = nd.ni_vp;
5716
5717         nameidone(&nd);
5718
5719 #if CONFIG_MACF
5720         error = mac_vnode_check_revoke(ctx, vp);
5721         if (error)
5722                 goto out;
5723 #endif
5724
5725         VATTR_INIT(&va);
5726         VATTR_WANTED(&va, va_uid);
5727         if ((error = vnode_getattr(vp, &va, ctx)))
5728                 goto out;
5729         if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
5730             (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
5731                 goto out;
5732         if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
5733                 VNOP_REVOKE(vp, REVOKEALL, ctx);
5734 out:
5735         vnode_put(vp);
5736         return (error);
5737 }
5738
5739
5740 /*
5741  *  HFS/HFS PlUS SPECIFIC SYSTEM CALLS
5742  *  The following system calls are designed to support features
5743  *  which are specific to the HFS & HFS Plus volume formats
5744  */
5745
5746 #ifdef __APPLE_API_OBSOLETE
5747
5748 /************************************************/
5749 /* *** Following calls will be deleted soon *** */
5750 /************************************************/
5751
5752 /*
5753  * Make a complex file.  A complex file is one with multiple forks (data streams)
5754  */
5755 /* ARGSUSED */
5756 int
5757 mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused register_t *retval)
5758 {
5759         return (ENOTSUP);
5760 }
5761
5762 /*
5763  * Extended stat call which returns volumeid and vnodeid as well as other info
5764  */
5765 /* ARGSUSED */
5766 int
5767 statv(__unused proc_t p,
5768           __unused struct statv_args *uap,
5769           __unused register_t *retval)
5770 {
5771         return (ENOTSUP);       /*  We'll just return an error for now */
5772
5773 } /* end of statv system call */
5774
5775 /*
5776 * Extended lstat call which returns volumeid and vnodeid as well as other info
5777 */
5778 /* ARGSUSED */
5779 int
5780 lstatv(__unused proc_t p,
5781            __unused struct lstatv_args *uap,
5782            __unused register_t *retval)
5783 {
5784        return (ENOTSUP);        /*  We'll just return an error for now */
5785 } /* end of lstatv system call */
5786
5787 /*
5788 * Extended fstat call which returns volumeid and vnodeid as well as other info
5789 */
5790 /* ARGSUSED */
5791 int
5792 fstatv(__unused proc_t p,
5793            __unused struct fstatv_args *uap,
5794            __unused register_t *retval)
5795 {
5796        return (ENOTSUP);        /*  We'll just return an error for now */
5797 } /* end of fstatv system call */
5798
5799
5800 /************************************************/
5801 /* *** Preceding calls will be deleted soon *** */
5802 /************************************************/
5803
5804 #endif /* __APPLE_API_OBSOLETE */
5805
5806 /*
5807 * Obtain attribute information on objects in a directory while enumerating
5808 * the directory.  This call does not yet support union mounted directories.
5809 * TO DO
5810 *  1.union mounted directories.
5811 */
5812
5813 /* ARGSUSED */
5814 int
5815 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, register_t *retval)
5816 {
5817         vnode_t vp;
5818         struct fileproc *fp;
5819         uio_t auio = NULL;
5820         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5821         uint32_t count;
5822         uint32_t newstate;
5823         int error, eofflag;
5824         uint32_t loff;
5825         struct attrlist attributelist;
5826         vfs_context_t ctx = vfs_context_current();
5827         int fd = uap->fd;
5828         char uio_buf[ UIO_SIZEOF(1) ];
5829         kauth_action_t action;
5830
5831         AUDIT_ARG(fd, fd);
5832
5833         /* Get the attributes into kernel space */
5834         if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
5835                 return(error);
5836         }
5837         if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
5838                 return(error);
5839         }
5840         if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
5841                 return (error);
5842         }
5843         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
5844                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5845                 error = EBADF;
5846                 goto out;
5847         }
5848
5849
5850 #if CONFIG_MACF
5851         error = mac_file_check_change_offset(vfs_context_ucred(ctx),
5852             fp->f_fglob);
5853         if (error)
5854                 goto out;
5855 #endif
5856
5857
5858         if ( (error = vnode_getwithref(vp)) )
5859                 goto out;
5860
5861         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5862
5863         if (vp->v_type != VDIR) {
5864                 (void)vnode_put(vp);
5865                 error = EINVAL;
5866                 goto out;
5867         }
5868
5869 #if CONFIG_MACF
5870         error = mac_vnode_check_readdir(ctx, vp);
5871         if (error != 0) {
5872                 (void)vnode_put(vp);
5873                 goto out;
5874         }
5875 #endif /* MAC */
5876
5877         /* set up the uio structure which will contain the users return buffer */
5878         loff = fp->f_fglob->fg_offset;
5879         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ,
5880             &uio_buf[0], sizeof(uio_buf));
5881         uio_addiov(auio, uap->buffer, uap->buffersize);
5882
5883         /*
5884          * If the only item requested is file names, we can let that past with
5885          * just LIST_DIRECTORY.  If they want any other attributes, that means
5886          * they need SEARCH as well.
5887          */
5888         action = KAUTH_VNODE_LIST_DIRECTORY;
5889         if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
5890             attributelist.fileattr || attributelist.dirattr)
5891                 action |= KAUTH_VNODE_SEARCH;
5892
5893         if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
5894                 u_long ulcount = count;
5895
5896                 error = VNOP_READDIRATTR(vp, &attributelist, auio,
5897                                          count,
5898                                          uap->options, (unsigned long *)&newstate, &eofflag,
5899                                          &ulcount, ctx);
5900                 if (!error)
5901                         count = ulcount;
5902         }
5903         (void)vnode_put(vp);
5904
5905         if (error)
5906                 goto out;
5907         fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
5908
5909         if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
5910                 goto out;
5911         if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
5912                 goto out;
5913         if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
5914                 goto out;
5915
5916         *retval = eofflag;  /* similar to getdirentries */
5917         error = 0;
5918 out:
5919         file_drop(fd);
5920         return (error); /* return error earlier, an retval of 0 or 1 now */
5921
5922 } /* end of getdirentryattr system call */
5923
5924 /*
5925 * Exchange data between two files
5926 */
5927
5928 /* ARGSUSED */
5929 int
5930 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused register_t *retval)
5931 {
5932
5933         struct nameidata fnd, snd;
5934         vfs_context_t ctx = vfs_context_current();
5935         vnode_t fvp;
5936         vnode_t svp;
5937         int error;
5938         u_long nameiflags;
5939         char *fpath = NULL;
5940         char *spath = NULL;
5941         int   flen, slen;
5942         fse_info f_finfo, s_finfo;
5943
5944         nameiflags = 0;
5945         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
5946
5947     NDINIT(&fnd, LOOKUP, nameiflags | AUDITVNPATH1,
5948                 UIO_USERSPACE, uap->path1, ctx);
5949
5950     error = namei(&fnd);
5951     if (error)
5952         goto out2;
5953
5954         nameidone(&fnd);
5955         fvp = fnd.ni_vp;
5956
5957     NDINIT(&snd, LOOKUP | CN_NBMOUNTLOOK, nameiflags | AUDITVNPATH2,
5958                 UIO_USERSPACE, uap->path2, ctx);
5959
5960     error = namei(&snd);
5961     if (error) {
5962                 vnode_put(fvp);
5963                 goto out2;
5964     }
5965         nameidone(&snd);
5966         svp = snd.ni_vp;
5967
5968         /*
5969          * if the files are the same, return an inval error
5970          */
5971         if (svp == fvp) {
5972                 error = EINVAL;
5973                 goto out;
5974         }
5975
5976         /*
5977          * if the files are on different volumes, return an error
5978          */
5979         if (svp->v_mount != fvp->v_mount) {
5980                 error = EXDEV;
5981                 goto out;
5982         }
5983
5984 #if CONFIG_MACF
5985         error = mac_vnode_check_exchangedata(ctx,
5986             fvp, svp);
5987         if (error)
5988                 goto out;
5989 #endif
5990         if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
5991             ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
5992                 goto out;
5993
5994         if (
5995 #if CONFIG_FSE
5996         need_fsevent(FSE_EXCHANGE, fvp) ||
5997 #endif
5998         kauth_authorize_fileop_has_listeners()) {
5999                 GET_PATH(fpath);
6000                 GET_PATH(spath);
6001                 if (fpath == NULL || spath == NULL) {
6002                         error = ENOMEM;
6003                         goto out;
6004                 }
6005                 flen = MAXPATHLEN;
6006                 slen = MAXPATHLEN;
6007                 if (vn_getpath(fvp, fpath, &flen) != 0 || fpath[0] == '\0') {
6008                         printf("exchange: vn_getpath(fvp=%p) failed <<%s>>\n",
6009                                fvp, fpath);
6010                 }
6011                 if (vn_getpath(svp, spath, &slen) != 0 || spath[0] == '\0') {
6012                         printf("exchange: vn_getpath(svp=%p) failed <<%s>>\n",
6013                                svp, spath);
6014                 }
6015 #if CONFIG_FSE
6016                 get_fse_info(fvp, &f_finfo, ctx);
6017                 get_fse_info(svp, &s_finfo, ctx);
6018 #endif
6019         }
6020         /* Ok, make the call */
6021         error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
6022
6023         if (error == 0) {
6024             const char *tmpname;
6025
6026             if (fpath != NULL && spath != NULL) {
6027                     /* call out to allow 3rd party notification of exchangedata.
6028                      * Ignore result of kauth_authorize_fileop call.
6029                      */
6030                     kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
6031                                            (uintptr_t)fpath, (uintptr_t)spath);
6032             }
6033             name_cache_lock();
6034
6035             tmpname     = fvp->v_name;
6036             fvp->v_name = svp->v_name;
6037             svp->v_name = tmpname;
6038
6039             if (fvp->v_parent != svp->v_parent) {
6040                 vnode_t tmp;
6041
6042                 tmp           = fvp->v_parent;
6043                 fvp->v_parent = svp->v_parent;
6044                 svp->v_parent = tmp;
6045             }
6046             name_cache_unlock();
6047
6048 #if CONFIG_FSE
6049             if (fpath != NULL && spath != NULL) {
6050                     add_fsevent(FSE_EXCHANGE, ctx,
6051                                 FSE_ARG_STRING, flen, fpath,
6052                                 FSE_ARG_FINFO, &f_finfo,
6053                                 FSE_ARG_STRING, slen, spath,
6054                                 FSE_ARG_FINFO, &s_finfo,
6055                                 FSE_ARG_DONE);
6056             }
6057 #endif
6058         }
6059
6060 out:
6061         if (fpath != NULL)
6062                 RELEASE_PATH(fpath);
6063         if (spath != NULL)
6064                 RELEASE_PATH(spath);
6065         vnode_put(svp);
6066         vnode_put(fvp);
6067 out2:
6068         return (error);
6069 }
6070
6071
6072 /* ARGSUSED */
6073
6074 int
6075 searchfs(proc_t p, struct searchfs_args *uap, __unused register_t *retval)
6076 {
6077         vnode_t vp;
6078         int error=0;
6079         int fserror = 0;
6080         struct nameidata nd;
6081         struct user_fssearchblock searchblock;
6082         struct searchstate *state;
6083         struct attrlist *returnattrs;
6084         void *searchparams1,*searchparams2;
6085         uio_t auio = NULL;
6086         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6087         u_long nummatches;
6088         int mallocsize;
6089         u_long nameiflags;
6090         vfs_context_t ctx = vfs_context_current();
6091         char uio_buf[ UIO_SIZEOF(1) ];
6092
6093         /* Start by copying in fsearchblock paramater list */
6094     if (IS_64BIT_PROCESS(p)) {
6095        error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
6096     }
6097     else {
6098         struct fssearchblock tmp_searchblock;
6099         error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
6100         // munge into 64-bit version
6101         searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
6102         searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
6103         searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
6104         searchblock.maxmatches = tmp_searchblock.maxmatches;
6105         searchblock.timelimit.tv_sec = tmp_searchblock.timelimit.tv_sec;
6106         searchblock.timelimit.tv_usec = tmp_searchblock.timelimit.tv_usec;
6107         searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
6108         searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
6109         searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
6110         searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
6111         searchblock.searchattrs = tmp_searchblock.searchattrs;
6112     }
6113         if (error)
6114                 return(error);
6115
6116         /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
6117          */
6118         if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
6119                 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
6120                 return(EINVAL);
6121
6122         /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
6123         /* It all has to do into local memory and it's not that big so we might as well  put it all together. */
6124         /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
6125         /* block.                                                                                             */
6126
6127         mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
6128                       sizeof(struct attrlist) + sizeof(struct searchstate);
6129
6130         MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
6131
6132         /* Now set up the various pointers to the correct place in our newly allocated memory */
6133
6134         searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
6135         returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
6136         state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
6137
6138         /* Now copy in the stuff given our local variables. */
6139
6140         if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
6141                 goto freeandexit;
6142
6143         if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
6144                 goto freeandexit;
6145
6146         if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
6147                 goto freeandexit;
6148
6149         if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
6150                 goto freeandexit;
6151
6152         /* set up the uio structure which will contain the users return buffer */
6153
6154         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
6155                                                                   &uio_buf[0], sizeof(uio_buf));
6156     uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
6157
6158         nameiflags = 0;
6159         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6160         NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1,
6161                 UIO_USERSPACE, uap->path, ctx);
6162
6163         error = namei(&nd);
6164         if (error)
6165                 goto freeandexit;
6166
6167         nameidone(&nd);
6168         vp = nd.ni_vp;
6169
6170
6171         /*
6172          * If searchblock.maxmatches == 0, then skip the search. This has happened
6173          * before and sometimes the underlyning code doesnt deal with it well.
6174          */
6175          if (searchblock.maxmatches == 0) {
6176                 nummatches = 0;
6177                 goto saveandexit;
6178          }
6179
6180         /*
6181            Allright, we have everything we need, so lets make that call.
6182
6183            We keep special track of the return value from the file system:
6184            EAGAIN is an acceptable error condition that shouldn't keep us
6185            from copying out any results...
6186          */
6187
6188         fserror = VNOP_SEARCHFS(vp,
6189                                                         searchparams1,
6190                                                         searchparams2,
6191                                                         &searchblock.searchattrs,
6192                                                         searchblock.maxmatches,
6193                                                         &searchblock.timelimit,
6194                                                         returnattrs,
6195                                                         &nummatches,
6196                                                         uap->scriptcode,
6197                                                         uap->options,
6198                                                         auio,
6199                                                         state,
6200                                                         ctx);
6201
6202 saveandexit:
6203
6204         vnode_put(vp);
6205
6206         /* Now copy out the stuff that needs copying out. That means the number of matches, the
6207            search state.  Everything was already put into he return buffer by the vop call. */
6208
6209         if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
6210                 goto freeandexit;
6211
6212     if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
6213                 goto freeandexit;
6214
6215         error = fserror;
6216
6217 freeandexit:
6218
6219         FREE(searchparams1,M_TEMP);
6220
6221         return(error);
6222
6223
6224 } /* end of searchfs system call */
6225
6226
6227 /*
6228  * Make a filesystem-specific control call:
6229  */
6230 /* ARGSUSED */
6231 int
6232 fsctl (proc_t p, struct fsctl_args *uap, __unused register_t *retval)
6233 {
6234         int error;
6235         boolean_t is64bit;
6236         struct nameidata nd;
6237         u_long nameiflags;
6238         u_long cmd = uap->cmd;
6239         u_int size;
6240 #define STK_PARAMS 128
6241         char stkbuf[STK_PARAMS];
6242         caddr_t data, memp;
6243         vfs_context_t ctx = vfs_context_current();
6244
6245         size = IOCPARM_LEN(cmd);
6246         if (size > IOCPARM_MAX) return (EINVAL);
6247
6248     is64bit = proc_is64bit(p);
6249
6250         memp = NULL;
6251         if (size > sizeof (stkbuf)) {
6252                 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
6253                 data = memp;
6254         } else {
6255                 data = &stkbuf[0];
6256         };
6257
6258         if (cmd & IOC_IN) {
6259                 if (size) {
6260                         error = copyin(uap->data, data, size);
6261                         if (error) goto FSCtl_Exit;
6262                 } else {
6263                     if (is64bit) {
6264                         *(user_addr_t *)data = uap->data;
6265                     }
6266                     else {
6267                         *(uint32_t *)data = (uint32_t)uap->data;
6268                     }
6269                 };
6270         } else if ((cmd & IOC_OUT) && size) {
6271                 /*
6272                  * Zero the buffer so the user always
6273                  * gets back something deterministic.
6274                  */
6275                 bzero(data, size);
6276         } else if (cmd & IOC_VOID) {
6277         if (is64bit) {
6278             *(user_addr_t *)data = uap->data;
6279         }
6280         else {
6281             *(uint32_t *)data = (uint32_t)uap->data;
6282         }
6283         }
6284
6285         /* Get the vnode for the file we are getting info on:  */
6286         nameiflags = 0;
6287         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6288         NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, uap->path, ctx);
6289         if ((error = namei(&nd))) goto FSCtl_Exit;
6290
6291 #if CONFIG_MACF
6292         error = mac_mount_check_fsctl(ctx, vnode_mount(nd.ni_vp), cmd);
6293         if (error) {
6294                 vnode_put(nd.ni_vp);
6295                 nameidone(&nd);
6296                 goto FSCtl_Exit;
6297         }
6298 #endif
6299
6300         /* Invoke the filesystem-specific code */
6301         error = VNOP_IOCTL(nd.ni_vp, IOCBASECMD(cmd), data, uap->options, ctx);
6302
6303         vnode_put(nd.ni_vp);
6304         nameidone(&nd);
6305
6306         /*
6307          * Copy any data to user, size was
6308          * already set and checked above.
6309          */
6310         if (error == 0 && (cmd & IOC_OUT) && size)
6311                 error = copyout(data, uap->data, size);
6312
6313 FSCtl_Exit:
6314         if (memp) kfree(memp, size);
6315
6316         return error;
6317 }
6318 /* end of fsctl system call */
6319
6320 /*
6321  * An in-kernel sync for power management to call.
6322  */
6323 __private_extern__ int
6324 sync_internal(void)
6325 {
6326         int error;
6327
6328         struct sync_args data;
6329
6330         int retval[2];
6331
6332
6333         error = sync(current_proc(), &data, &retval[0]);
6334
6335
6336         return (error);
6337 } /* end of sync_internal call */
6338
6339
6340 /*
6341  *  Retrieve the data of an extended attribute.
6342  */
6343 int
6344 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
6345 {
6346         vnode_t vp;
6347         struct nameidata nd;
6348         char attrname[XATTR_MAXNAMELEN+1];
6349         vfs_context_t ctx = vfs_context_current();
6350         uio_t auio = NULL;
6351         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6352         size_t attrsize = 0;
6353         size_t namelen;
6354         u_long nameiflags;
6355         int error;
6356         char uio_buf[ UIO_SIZEOF(1) ];
6357
6358         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6359                 return (EINVAL);
6360
6361         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6362         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6363         if ((error = namei(&nd))) {
6364                 return (error);
6365         }
6366         vp = nd.ni_vp;
6367         nameidone(&nd);
6368
6369         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6370                 goto out;
6371         }
6372         if (xattr_protected(attrname)) {
6373                 error = EPERM;
6374                 goto out;
6375         }
6376         if (uap->value && uap->size > 0) {
6377                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
6378                                             &uio_buf[0], sizeof(uio_buf));
6379                 uio_addiov(auio, uap->value, uap->size);
6380         }
6381
6382         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
6383 out:
6384         vnode_put(vp);
6385
6386         if (auio) {
6387                 *retval = uap->size - uio_resid(auio);
6388         } else {
6389                 *retval = (user_ssize_t)attrsize;
6390         }
6391
6392         return (error);
6393 }
6394
6395 /*
6396  * Retrieve the data of an extended attribute.
6397  */
6398 int
6399 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
6400 {
6401         vnode_t vp;
6402         char attrname[XATTR_MAXNAMELEN+1];
6403         uio_t auio = NULL;
6404         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6405         size_t attrsize = 0;
6406         size_t namelen;
6407         int error;
6408         char uio_buf[ UIO_SIZEOF(1) ];
6409
6410         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6411                 return (EINVAL);
6412
6413         if ( (error = file_vnode(uap->fd, &vp)) ) {
6414                 return (error);
6415         }
6416         if ( (error = vnode_getwithref(vp)) ) {
6417                 file_drop(uap->fd);
6418                 return(error);
6419         }
6420         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6421                 goto out;
6422         }
6423         if (xattr_protected(attrname)) {
6424                 error = EPERM;
6425                 goto out;
6426         }
6427         if (uap->value && uap->size > 0) {
6428                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
6429                                             &uio_buf[0], sizeof(uio_buf));
6430                 uio_addiov(auio, uap->value, uap->size);
6431         }
6432
6433         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
6434 out:
6435         (void)vnode_put(vp);
6436         file_drop(uap->fd);
6437
6438         if (auio) {
6439                 *retval = uap->size - uio_resid(auio);
6440         } else {
6441                 *retval = (user_ssize_t)attrsize;
6442         }
6443         return (error);
6444 }
6445
6446 /*
6447  * Set the data of an extended attribute.
6448  */
6449 int
6450 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
6451 {
6452         vnode_t vp;
6453         struct nameidata nd;
6454         char attrname[XATTR_MAXNAMELEN+1];
6455         vfs_context_t ctx = vfs_context_current();
6456         uio_t auio = NULL;
6457         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6458         size_t namelen;
6459         u_long nameiflags;
6460         int error;
6461         char uio_buf[ UIO_SIZEOF(1) ];
6462
6463         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6464                 return (EINVAL);
6465
6466         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6467                 return (error);
6468         }
6469         if (xattr_protected(attrname))
6470                 return(EPERM);
6471         if (uap->size != 0 && uap->value == 0) {
6472                 return (EINVAL);
6473         }
6474
6475         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6476         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6477         if ((error = namei(&nd))) {
6478                 return (error);
6479         }
6480         vp = nd.ni_vp;
6481         nameidone(&nd);
6482
6483         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
6484                                     &uio_buf[0], sizeof(uio_buf));
6485         uio_addiov(auio, uap->value, uap->size);
6486
6487         error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
6488 #if CONFIG_FSE
6489         if (error == 0) {
6490                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
6491                     FSE_ARG_VNODE, vp,
6492                     FSE_ARG_DONE);
6493         }
6494 #endif
6495         vnode_put(vp);
6496         *retval = 0;
6497         return (error);
6498 }
6499
6500 /*
6501  * Set the data of an extended attribute.
6502  */
6503 int
6504 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
6505 {
6506         vnode_t vp;
6507         char attrname[XATTR_MAXNAMELEN+1];
6508         uio_t auio = NULL;
6509         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6510         size_t namelen;
6511         int error;
6512         char uio_buf[ UIO_SIZEOF(1) ];
6513         vfs_context_t ctx = vfs_context_current();
6514
6515         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6516                 return (EINVAL);
6517
6518         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6519                 return (error);
6520         }
6521         if (xattr_protected(attrname))
6522                 return(EPERM);
6523         if (uap->size != 0 && uap->value == 0) {
6524                 return (EINVAL);
6525         }
6526         if ( (error = file_vnode(uap->fd, &vp)) ) {
6527                 return (error);
6528         }
6529         if ( (error = vnode_getwithref(vp)) ) {
6530                 file_drop(uap->fd);
6531                 return(error);
6532         }
6533         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
6534                                     &uio_buf[0], sizeof(uio_buf));
6535         uio_addiov(auio, uap->value, uap->size);
6536
6537         error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
6538 #if CONFIG_FSE
6539         if (error == 0) {
6540                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
6541                     FSE_ARG_VNODE, vp,
6542                     FSE_ARG_DONE);
6543         }
6544 #endif
6545         vnode_put(vp);
6546         file_drop(uap->fd);
6547         *retval = 0;
6548         return (error);
6549 }
6550
6551 /*
6552  * Remove an extended attribute.
6553  */
6554 #warning "code duplication"
6555 int
6556 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
6557 {
6558         vnode_t vp;
6559         struct nameidata nd;
6560         char attrname[XATTR_MAXNAMELEN+1];
6561         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6562         vfs_context_t ctx = vfs_context_current();
6563         size_t namelen;
6564         u_long nameiflags;
6565         int error;
6566
6567         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6568                 return (EINVAL);
6569
6570         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
6571         if (error != 0) {
6572                 return (error);
6573         }
6574         if (xattr_protected(attrname))
6575                 return(EPERM);
6576         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6577         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6578         if ((error = namei(&nd))) {
6579                 return (error);
6580         }
6581         vp = nd.ni_vp;
6582         nameidone(&nd);
6583
6584         error = vn_removexattr(vp, attrname, uap->options, ctx);
6585 #if CONFIG_FSE
6586         if (error == 0) {
6587                 add_fsevent(FSE_XATTR_REMOVED, ctx,
6588                     FSE_ARG_VNODE, vp,
6589                     FSE_ARG_DONE);
6590         }
6591 #endif
6592         vnode_put(vp);
6593         *retval = 0;
6594         return (error);
6595 }
6596
6597 /*
6598  * Remove an extended attribute.
6599  */
6600 #warning "code duplication"
6601 int
6602 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
6603 {
6604         vnode_t vp;
6605         char attrname[XATTR_MAXNAMELEN+1];
6606         size_t namelen;
6607         int error;
6608         vfs_context_t ctx = vfs_context_current();
6609
6610         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6611                 return (EINVAL);
6612
6613         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
6614         if (error != 0) {
6615                 return (error);
6616         }
6617         if (xattr_protected(attrname))
6618                 return(EPERM);
6619         if ( (error = file_vnode(uap->fd, &vp)) ) {
6620                 return (error);
6621         }
6622         if ( (error = vnode_getwithref(vp)) ) {
6623                 file_drop(uap->fd);
6624                 return(error);
6625         }
6626
6627         error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
6628 #if CONFIG_FSE
6629         if (error == 0) {
6630                 add_fsevent(FSE_XATTR_REMOVED, ctx,
6631                     FSE_ARG_VNODE, vp,
6632                     FSE_ARG_DONE);
6633         }
6634 #endif
6635         vnode_put(vp);
6636         file_drop(uap->fd);
6637         *retval = 0;
6638         return (error);
6639 }
6640
6641 /*
6642  * Retrieve the list of extended attribute names.
6643  */
6644 #warning "code duplication"
6645 int
6646 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
6647 {
6648         vnode_t vp;
6649         struct nameidata nd;
6650         vfs_context_t ctx = vfs_context_current();
6651         uio_t auio = NULL;
6652         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6653         size_t attrsize = 0;
6654         u_long nameiflags;
6655         int error;
6656         char uio_buf[ UIO_SIZEOF(1) ];
6657
6658         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6659                 return (EINVAL);
6660
6661         nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
6662         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6663         if ((error = namei(&nd))) {
6664                 return (error);
6665         }
6666         vp = nd.ni_vp;
6667         nameidone(&nd);
6668         if (uap->namebuf != 0 && uap->bufsize > 0) {
6669                 // LP64todo - fix this!
6670                 auio = uio_createwithbuffer(1, 0, spacetype,
6671                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
6672                 uio_addiov(auio, uap->namebuf, uap->bufsize);
6673         }
6674
6675         error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
6676
6677         vnode_put(vp);
6678         if (auio) {
6679                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
6680         } else {
6681                 *retval = (user_ssize_t)attrsize;
6682         }
6683         return (error);
6684 }
6685
6686 /*
6687  * Retrieve the list of extended attribute names.
6688  */
6689 #warning "code duplication"
6690 int
6691 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
6692 {
6693         vnode_t vp;
6694         uio_t auio = NULL;
6695         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6696         size_t attrsize = 0;
6697         int error;
6698         char uio_buf[ UIO_SIZEOF(1) ];
6699
6700         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6701                 return (EINVAL);
6702
6703         if ( (error = file_vnode(uap->fd, &vp)) ) {
6704                 return (error);
6705         }
6706         if ( (error = vnode_getwithref(vp)) ) {
6707                 file_drop(uap->fd);
6708                 return(error);
6709         }
6710         if (uap->namebuf != 0 && uap->bufsize > 0) {
6711                 // LP64todo - fix this!
6712                 auio = uio_createwithbuffer(1, 0, spacetype,
6713                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
6714                 uio_addiov(auio, uap->namebuf, uap->bufsize);
6715         }
6716
6717         error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
6718
6719         vnode_put(vp);
6720         file_drop(uap->fd);
6721         if (auio) {
6722                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
6723         } else {
6724                 *retval = (user_ssize_t)attrsize;
6725         }
6726         return (error);
6727 }
6728
6729 /*
6730  * Common routine to handle various flavors of statfs data heading out
6731  *      to user space.
6732  *
6733  * Returns:     0                       Success
6734  *              EFAULT
6735  */
6736 static int
6737 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
6738     user_addr_t bufp, int *sizep, boolean_t is_64_bit,
6739     boolean_t partial_copy)
6740 {
6741         int             error;
6742         int             my_size, copy_size;
6743
6744         if (is_64_bit) {
6745                 struct user_statfs sfs;
6746                 my_size = copy_size = sizeof(sfs);
6747                 bzero(&sfs, my_size);
6748                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
6749                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
6750                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
6751                 sfs.f_bsize = (user_long_t)sfsp->f_bsize;
6752                 sfs.f_iosize = (user_long_t)sfsp->f_iosize;
6753                 sfs.f_blocks = (user_long_t)sfsp->f_blocks;
6754                 sfs.f_bfree = (user_long_t)sfsp->f_bfree;
6755                 sfs.f_bavail = (user_long_t)sfsp->f_bavail;
6756                 sfs.f_files = (user_long_t)sfsp->f_files;
6757                 sfs.f_ffree = (user_long_t)sfsp->f_ffree;
6758                 sfs.f_fsid = sfsp->f_fsid;
6759                 sfs.f_owner = sfsp->f_owner;
6760                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
6761                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
6762                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
6763
6764                 if (partial_copy) {
6765                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
6766                 }
6767                 error = copyout((caddr_t)&sfs, bufp, copy_size);
6768         }
6769         else {
6770                 struct statfs sfs;
6771                 my_size = copy_size = sizeof(sfs);
6772                 bzero(&sfs, my_size);
6773
6774                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
6775                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
6776                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
6777
6778                 /*
6779                  * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
6780                  * have to fudge the numbers here in that case.   We inflate the blocksize in order
6781                  * to reflect the filesystem size as best we can.
6782                  */
6783                 if ((sfsp->f_blocks > LONG_MAX)
6784                         /* Hack for 4061702 . I think the real fix is for Carbon to
6785                          * look for some volume capability and not depend on hidden
6786                          * semantics agreed between a FS and carbon.
6787                          * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
6788                          * for Carbon to set bNoVolumeSizes volume attribute.
6789                          * Without this the webdavfs files cannot be copied onto
6790                          * disk as they look huge. This change should not affect
6791                          * XSAN as they should not setting these to -1..
6792                          */
6793                          && (sfsp->f_blocks != 0xffffffffffffffffULL)
6794                          && (sfsp->f_bfree != 0xffffffffffffffffULL)
6795                          && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
6796                         int             shift;
6797
6798                         /*
6799                          * Work out how far we have to shift the block count down to make it fit.
6800                          * Note that it's possible to have to shift so far that the resulting
6801                          * blocksize would be unreportably large.  At that point, we will clip
6802                          * any values that don't fit.
6803                          *
6804                          * For safety's sake, we also ensure that f_iosize is never reported as
6805                          * being smaller than f_bsize.
6806                          */
6807                         for (shift = 0; shift < 32; shift++) {
6808                                 if ((sfsp->f_blocks >> shift) <= LONG_MAX)
6809                                         break;
6810                                 if ((sfsp->f_bsize << (shift + 1)) > LONG_MAX)
6811                                         break;
6812                         }
6813 #define __SHIFT_OR_CLIP(x, s)   ((((x) >> (s)) > LONG_MAX) ? LONG_MAX : ((x) >> (s)))
6814                         sfs.f_blocks = (long)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
6815                         sfs.f_bfree = (long)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
6816                         sfs.f_bavail = (long)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
6817 #undef __SHIFT_OR_CLIP
6818                         sfs.f_bsize = (long)(sfsp->f_bsize << shift);
6819                         sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
6820                 } else {
6821                         /* filesystem is small enough to be reported honestly */
6822                         sfs.f_bsize = (long)sfsp->f_bsize;
6823                         sfs.f_iosize = (long)sfsp->f_iosize;
6824                         sfs.f_blocks = (long)sfsp->f_blocks;
6825                         sfs.f_bfree = (long)sfsp->f_bfree;
6826                         sfs.f_bavail = (long)sfsp->f_bavail;
6827                 }
6828                 sfs.f_files = (long)sfsp->f_files;
6829                 sfs.f_ffree = (long)sfsp->f_ffree;
6830                 sfs.f_fsid = sfsp->f_fsid;
6831                 sfs.f_owner = sfsp->f_owner;
6832                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
6833                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
6834                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
6835
6836                 if (partial_copy) {
6837                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
6838                 }
6839                 error = copyout((caddr_t)&sfs, bufp, copy_size);
6840         }
6841
6842         if (sizep != NULL) {
6843                 *sizep = my_size;
6844         }
6845         return(error);
6846 }
6847
6848 /*
6849  * copy stat structure into user_stat structure.
6850  */
6851 void munge_stat(struct stat *sbp, struct user_stat *usbp)
6852 {
6853         bzero(usbp, sizeof(struct user_stat));
6854
6855         usbp->st_dev = sbp->st_dev;
6856         usbp->st_ino = sbp->st_ino;
6857         usbp->st_mode = sbp->st_mode;
6858         usbp->st_nlink = sbp->st_nlink;
6859         usbp->st_uid = sbp->st_uid;
6860         usbp->st_gid = sbp->st_gid;
6861         usbp->st_rdev = sbp->st_rdev;
6862 #ifndef _POSIX_C_SOURCE
6863         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
6864         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
6865         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
6866         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
6867         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
6868         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
6869 #else
6870         usbp->st_atime = sbp->st_atime;
6871         usbp->st_atimensec = sbp->st_atimensec;
6872         usbp->st_mtime = sbp->st_mtime;
6873         usbp->st_mtimensec = sbp->st_mtimensec;
6874         usbp->st_ctime = sbp->st_ctime;
6875         usbp->st_ctimensec = sbp->st_ctimensec;
6876 #endif
6877         usbp->st_size = sbp->st_size;
6878         usbp->st_blocks = sbp->st_blocks;
6879         usbp->st_blksize = sbp->st_blksize;
6880         usbp->st_flags = sbp->st_flags;
6881         usbp->st_gen = sbp->st_gen;
6882         usbp->st_lspare = sbp->st_lspare;
6883         usbp->st_qspare[0] = sbp->st_qspare[0];
6884         usbp->st_qspare[1] = sbp->st_qspare[1];
6885 }
6886
6887 /*
6888  * copy stat64 structure into user_stat64 structure.
6889  */
6890 void munge_stat64(struct stat64 *sbp, struct user_stat64 *usbp)
6891 {
6892         bzero(usbp, sizeof(struct user_stat));
6893
6894         usbp->st_dev = sbp->st_dev;
6895         usbp->st_ino = sbp->st_ino;
6896         usbp->st_mode = sbp->st_mode;
6897         usbp->st_nlink = sbp->st_nlink;
6898         usbp->st_uid = sbp->st_uid;
6899         usbp->st_gid = sbp->st_gid;
6900         usbp->st_rdev = sbp->st_rdev;
6901 #ifndef _POSIX_C_SOURCE
6902         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
6903         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
6904         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
6905         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
6906         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
6907         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
6908         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
6909         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
6910 #else
6911         usbp->st_atime = sbp->st_atime;
6912         usbp->st_atimensec = sbp->st_atimensec;
6913         usbp->st_mtime = sbp->st_mtime;
6914         usbp->st_mtimensec = sbp->st_mtimensec;
6915         usbp->st_ctime = sbp->st_ctime;
6916         usbp->st_ctimensec = sbp->st_ctimensec;
6917         usbp->st_birthtime = sbp->st_birthtime;
6918         usbp->st_birthtimensec = sbp->st_birthtimensec;
6919 #endif
6920         usbp->st_size = sbp->st_size;
6921         usbp->st_blocks = sbp->st_blocks;
6922         usbp->st_blksize = sbp->st_blksize;
6923         usbp->st_flags = sbp->st_flags;
6924         usbp->st_gen = sbp->st_gen;
6925         usbp->st_lspare = sbp->st_lspare;
6926         usbp->st_qspare[0] = sbp->st_qspare[0];
6927         usbp->st_qspare[1] = sbp->st_qspare[1];
6928 }