bsd/vfs/vfs_syscalls.c

   1 /*
   2  * Copyright (c) 1995-2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1989, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)vfs_syscalls.c      8.41 (Berkeley) 6/15/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73
  74 #include <sys/param.h>
  75 #include <sys/systm.h>
  76 #include <sys/namei.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/file_internal.h>
  80 #include <sys/stat.h>
  81 #include <sys/vnode_internal.h>
  82 #include <sys/mount_internal.h>
  83 #include <sys/proc_internal.h>
  84 #include <sys/kauth.h>
  85 #include <sys/uio_internal.h>
  86 #include <sys/malloc.h>
  87 #include <sys/mman.h>
  88 #include <sys/dirent.h>
  89 #include <sys/attr.h>
  90 #include <sys/sysctl.h>
  91 #include <sys/ubc.h>
  92 #include <sys/quota.h>
  93 #include <sys/kdebug.h>
  94 #include <sys/fsevents.h>
  95 #include <sys/sysproto.h>
  96 #include <sys/xattr.h>
  97 #include <sys/ubc_internal.h>
  98 #include <machine/cons.h>
  99 #include <machine/limits.h>
 100 #include <miscfs/specfs/specdev.h>
 101 #include <miscfs/union/union.h>
 102
 103 #include <bsm/audit_kernel.h>
 104 #include <bsm/audit_kevents.h>
 105
 106 #include <mach/mach_types.h>
 107 #include <kern/kern_types.h>
 108 #include <kern/kalloc.h>
 109
 110 #include <vm/vm_pageout.h>
 111
 112 #include <libkern/OSAtomic.h>
 113
 114 #if CONFIG_MACF
 115 #include <security/mac.h>
 116 #include <security/mac_framework.h>
 117 #endif
 118
 119 #if CONFIG_FSE
 120 #define GET_PATH(x) \
 121         (x) = get_pathbuff();
 122 #define RELEASE_PATH(x) \
 123         release_pathbuff(x);
 124 #else
 125 #define GET_PATH(x)     \
 126         MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
 127 #define RELEASE_PATH(x) \
 128         FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
 129 #endif /* CONFIG_FSE */
 130
 131 /* struct for checkdirs iteration */
 132 struct cdirargs {
 133         vnode_t olddp;
 134         vnode_t newdp;
 135 };
 136 /* callback  for checkdirs iteration */
 137 static int checkdirs_callback(proc_t p, void * arg);
 138
 139 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
 140 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
 141 void enablequotas(struct mount *mp, vfs_context_t ctx);
 142 static int getfsstat_callback(mount_t mp, void * arg);
 143 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
 144 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
 145 static int sync_callback(mount_t, void *);
 146 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 147                         user_addr_t bufp, int *sizep, boolean_t is_64_bit,
 148                                                 boolean_t partial_copy);
 149 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp);
 150 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
 151
 152 __private_extern__
 153 int sync_internal(void);
 154
 155 __private_extern__
 156 int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, register_t *);
 157
 158 __private_extern__
 159 int unlink1(vfs_context_t, struct nameidata *, int);
 160
 161
 162 #ifdef __APPLE_API_OBSOLETE
 163 struct fstatv_args {
 164        int fd;                  /* file descriptor of the target file */
 165        struct vstat *vsb;       /* vstat structure for returned info  */
 166 };
 167 struct lstatv_args {
 168        const char *path;        /* pathname of the target file       */
 169        struct vstat *vsb;       /* vstat structure for returned info */
 170 };
 171 struct mkcomplex_args {
 172         const char *path;       /* pathname of the file to be created */
 173                 mode_t mode;            /* access mode for the newly created file */
 174         u_long type;            /* format of the complex file */
 175 };
 176 struct statv_args {
 177         const char *path;       /* pathname of the target file       */
 178         struct vstat *vsb;      /* vstat structure for returned info */
 179 };
 180
 181 int fstatv(proc_t p, struct fstatv_args *uap, register_t *retval);
 182 int lstatv(proc_t p, struct lstatv_args *uap, register_t *retval);
 183 int mkcomplex(proc_t p, struct mkcomplex_args *uap, register_t *retval);
 184 int statv(proc_t p, struct statv_args *uap, register_t *retval);
 185
 186 #endif /* __APPLE_API_OBSOLETE */
 187
 188 /*
 189  * incremented each time a mount or unmount operation occurs
 190  * used to invalidate the cached value of the rootvp in the
 191  * mount structure utilized by cache_lookup_path
 192  */
 193 int mount_generation = 0;
 194
 195 /* counts number of mount and unmount operations */
 196 unsigned int vfs_nummntops=0;
 197
 198 extern struct fileops vnops;
 199 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
 200
 201
 202 /*
 203  * Virtual File System System Calls
 204  */
 205
 206 /*
 207  * Mount a file system.
 208  */
 209 /* ARGSUSED */
 210 int
 211 mount(proc_t p, struct mount_args *uap, __unused register_t *retval)
 212 {
 213         struct __mac_mount_args muap;
 214
 215         muap.type = uap->type;
 216         muap.path = uap->path;
 217         muap.flags = uap->flags;
 218         muap.data = uap->data;
 219         muap.mac_p = USER_ADDR_NULL;
 220         return (__mac_mount(p, &muap, retval));
 221 }
 222
 223 int
 224 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused register_t *retval)
 225 {
 226         struct vnode *vp;
 227         struct vnode *devvp = NULLVP;
 228         struct vnode *device_vnode = NULLVP;
 229 #if CONFIG_MACF
 230         struct vnode *rvp;
 231 #endif
 232         struct mount *mp;
 233         struct vfstable *vfsp = (struct vfstable *)0;
 234         int error, flag = 0;
 235         struct vnode_attr va;
 236         vfs_context_t ctx = vfs_context_current();
 237         struct nameidata nd;
 238         struct nameidata nd1;
 239         char fstypename[MFSNAMELEN];
 240         size_t dummy=0;
 241         user_addr_t devpath = USER_ADDR_NULL;
 242         user_addr_t fsmountargs =  uap->data;
 243         int ronly = 0;
 244         int mntalloc = 0;
 245         mode_t accessmode;
 246         boolean_t is_64bit;
 247         boolean_t is_rwlock_locked = FALSE;
 248
 249         AUDIT_ARG(fflags, uap->flags);
 250
 251         is_64bit = proc_is64bit(p);
 252
 253         /*
 254          * Get vnode to be covered
 255          */
 256         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
 257                    UIO_USERSPACE, uap->path, ctx);
 258         error = namei(&nd);
 259         if (error)
 260                 return (error);
 261         vp = nd.ni_vp;
 262
 263         if ((vp->v_flag & VROOT) &&
 264                 (vp->v_mount->mnt_flag & MNT_ROOTFS))
 265                         uap->flags |= MNT_UPDATE;
 266
 267         error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
 268         if (error)
 269                 goto out1;
 270
 271         if (uap->flags & MNT_UPDATE) {
 272                 if ((vp->v_flag & VROOT) == 0) {
 273                         error = EINVAL;
 274                         goto out1;
 275                 }
 276                 mp = vp->v_mount;
 277
 278                 /* unmount in progress return error */
 279                 mount_lock(mp);
 280                 if (mp->mnt_lflag & MNT_LUNMOUNT) {
 281                         mount_unlock(mp);
 282                         error = EBUSY;
 283                         goto out1;
 284                 }
 285                 mount_unlock(mp);
 286                 lck_rw_lock_exclusive(&mp->mnt_rwlock);
 287                 is_rwlock_locked = TRUE;
 288                 /*
 289                  * We only allow the filesystem to be reloaded if it
 290                  * is currently mounted read-only.
 291                  */
 292                 if ((uap->flags & MNT_RELOAD) &&
 293                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
 294                         error = ENOTSUP;
 295                         goto out1;
 296                 }
 297                 /*
 298                  * Only root, or the user that did the original mount is
 299                  * permitted to update it.
 300                  */
 301                 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 302                     (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
 303                         goto out1;
 304                 }
 305 #if CONFIG_MACF
 306                 error = mac_mount_check_remount(ctx, mp);
 307                 if (error != 0) {
 308                         lck_rw_done(&mp->mnt_rwlock);
 309                         goto out1;
 310                 }
 311 #endif
 312                 /*
 313                  * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
 314                  * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
 315                  */
 316                 if (suser(vfs_context_ucred(ctx), NULL)) {
 317                         uap->flags |= MNT_NOSUID | MNT_NODEV;
 318                         if (mp->mnt_flag & MNT_NOEXEC)
 319                                 uap->flags |= MNT_NOEXEC;
 320                 }
 321                 flag = mp->mnt_flag;
 322
 323                 mp->mnt_flag |=
 324                     uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
 325
 326                 vfsp = mp->mnt_vtable;
 327                 goto update;
 328         }
 329         /*
 330          * If the user is not root, ensure that they own the directory
 331          * onto which we are attempting to mount.
 332          */
 333         VATTR_INIT(&va);
 334         VATTR_WANTED(&va, va_uid);
 335         if ((error = vnode_getattr(vp, &va, ctx)) ||
 336             (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 337              (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))) {
 338                 goto out1;
 339         }
 340         /*
 341          * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
 342          * MNT_NOEXEC if mount point is already MNT_NOEXEC.
 343          */
 344         if (suser(vfs_context_ucred(ctx), NULL)) {
 345                 uap->flags |= MNT_NOSUID | MNT_NODEV;
 346                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
 347                         uap->flags |= MNT_NOEXEC;
 348         }
 349         if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
 350                 goto out1;
 351
 352         if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
 353                 goto out1;
 354
 355         if (vp->v_type != VDIR) {
 356                 error = ENOTDIR;
 357                 goto out1;
 358         }
 359
 360         /* XXXAUDIT: Should we capture the type on the error path as well? */
 361         AUDIT_ARG(text, fstypename);
 362         mount_list_lock();
 363         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 364                 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN))
 365                         break;
 366         mount_list_unlock();
 367         if (vfsp == NULL) {
 368                 error = ENODEV;
 369                 goto out1;
 370         }
 371 #if CONFIG_MACF
 372         error = mac_mount_check_mount(ctx, vp,
 373             &nd.ni_cnd, vfsp->vfc_name);
 374         if (error != 0)
 375                 goto out1;
 376 #endif
 377         if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
 378                 error = EBUSY;
 379                 goto out1;
 380         }
 381         vnode_lock_spin(vp);
 382         SET(vp->v_flag, VMOUNT);
 383         vnode_unlock(vp);
 384
 385         /*
 386          * Allocate and initialize the filesystem.
 387          */
 388         MALLOC_ZONE(mp, struct mount *, (u_long)sizeof(struct mount),
 389                 M_MOUNT, M_WAITOK);
 390         bzero((char *)mp, (u_long)sizeof(struct mount));
 391         mntalloc = 1;
 392
 393         /* Initialize the default IO constraints */
 394         mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 395         mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 396         mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
 397         mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
 398         mp->mnt_devblocksize = DEV_BSIZE;
 399         mp->mnt_alignmentmask = PAGE_MASK;
 400         mp->mnt_ioflags = 0;
 401         mp->mnt_realrootvp = NULLVP;
 402         mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
 403
 404         TAILQ_INIT(&mp->mnt_vnodelist);
 405         TAILQ_INIT(&mp->mnt_workerqueue);
 406         TAILQ_INIT(&mp->mnt_newvnodes);
 407         mount_lock_init(mp);
 408         lck_rw_lock_exclusive(&mp->mnt_rwlock);
 409         is_rwlock_locked = TRUE;
 410         mp->mnt_op = vfsp->vfc_vfsops;
 411         mp->mnt_vtable = vfsp;
 412         mount_list_lock();
 413         vfsp->vfc_refcount++;
 414         mount_list_unlock();
 415         //mp->mnt_stat.f_type = vfsp->vfc_typenum;
 416         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 417         strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
 418         strncpy(mp->mnt_vfsstat.f_mntonname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
 419         mp->mnt_vnodecovered = vp;
 420         mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
 421
 422         /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
 423         vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
 424
 425 update:
 426         /*
 427          * Set the mount level flags.
 428          */
 429         if (uap->flags & MNT_RDONLY)
 430                 mp->mnt_flag |= MNT_RDONLY;
 431         else if (mp->mnt_flag & MNT_RDONLY)
 432                 mp->mnt_kern_flag |= MNTK_WANTRDWR;
 433         mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 434                           MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 435                           MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
 436                           MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
 437         mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 438                                       MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 439                                       MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
 440                                           MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
 441
 442 #if CONFIG_MACF
 443         if (uap->flags & MNT_MULTILABEL) {
 444                 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
 445                         error = EINVAL;
 446                         goto out1;
 447                 }
 448                 mp->mnt_flag |= MNT_MULTILABEL;
 449         }
 450 #endif
 451
 452         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
 453                 if (is_64bit) {
 454                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
 455                                 goto out1;
 456                         fsmountargs += sizeof(devpath);
 457                 } else {
 458                         char *tmp;
 459                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
 460                                 goto out1;
 461                         /* munge into LP64 addr */
 462                         devpath = CAST_USER_ADDR_T(tmp);
 463                         fsmountargs += sizeof(tmp);
 464                 }
 465
 466                 /* if it is not update and device name needs to be parsed */
 467                 if ((devpath)) {
 468                         NDINIT(&nd1, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
 469                         if ( (error = namei(&nd1)) )
 470                                 goto out1;
 471
 472                         strncpy(mp->mnt_vfsstat.f_mntfromname, nd1.ni_cnd.cn_pnbuf, MAXPATHLEN);
 473                         devvp = nd1.ni_vp;
 474
 475                         nameidone(&nd1);
 476
 477                         if (devvp->v_type != VBLK) {
 478                                 error = ENOTBLK;
 479                                 goto out2;
 480                         }
 481                         if (major(devvp->v_rdev) >= nblkdev) {
 482                                 error = ENXIO;
 483                                 goto out2;
 484                         }
 485                         /*
 486                         * If mount by non-root, then verify that user has necessary
 487                         * permissions on the device.
 488                         */
 489                         if (suser(vfs_context_ucred(ctx), NULL) != 0) {
 490                                 accessmode = KAUTH_VNODE_READ_DATA;
 491                                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 492                                         accessmode |= KAUTH_VNODE_WRITE_DATA;
 493                                 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
 494                                         goto out2;
 495                         }
 496                 }
 497                 if (devpath && ((uap->flags & MNT_UPDATE) == 0)) {
 498                         if ( (error = vnode_ref(devvp)) )
 499                                 goto out2;
 500                         /*
 501                         * Disallow multiple mounts of the same device.
 502                         * Disallow mounting of a device that is currently in use
 503                         * (except for root, which might share swap device for miniroot).
 504                         * Flush out any old buffers remaining from a previous use.
 505                         */
 506                         if ( (error = vfs_mountedon(devvp)) )
 507                                 goto out3;
 508
 509                         if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
 510                                 error = EBUSY;
 511                                 goto out3;
 512                         }
 513                         if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
 514                                 error = ENOTBLK;
 515                                 goto out3;
 516                         }
 517                         if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
 518                                 goto out3;
 519
 520                         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 521 #if CONFIG_MACF
 522                         error = mac_vnode_check_open(ctx,
 523                             devvp,
 524                             ronly ? FREAD : FREAD|FWRITE);
 525                         if (error)
 526                                 goto out3;
 527 #endif /* MAC */
 528                         if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
 529                                 goto out3;
 530
 531                         mp->mnt_devvp = devvp;
 532                         device_vnode = devvp;
 533                 } else {
 534                         if ((mp->mnt_flag & MNT_RDONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 535                                 /*
 536                                  * If upgrade to read-write by non-root, then verify
 537                                  * that user has necessary permissions on the device.
 538                                  */
 539                                 device_vnode = mp->mnt_devvp;
 540                                 if (device_vnode && suser(vfs_context_ucred(ctx), NULL)) {
 541                                         if ((error = vnode_authorize(device_vnode, NULL,
 542                                                  KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0)
 543                                                 goto out2;
 544                                 }
 545                         }
 546                         device_vnode = NULLVP;
 547                 }
 548         }
 549 #if CONFIG_MACF
 550         if ((uap->flags & MNT_UPDATE) == 0) {
 551                 mac_mount_label_init(mp);
 552                 mac_mount_label_associate(ctx, mp);
 553         }
 554         if (uap->mac_p != USER_ADDR_NULL) {
 555                 struct user_mac mac;
 556                 char *labelstr = NULL;
 557                 size_t ulen = 0;
 558
 559                 if ((uap->flags & MNT_UPDATE) != 0) {
 560                         error = mac_mount_check_label_update(
 561                             ctx, mp);
 562                         if (error != 0)
 563                                 goto out3;
 564                 }
 565                 if (is_64bit) {
 566                         error = copyin(uap->mac_p, &mac, sizeof(mac));
 567                 } else {
 568                         struct mac mac32;
 569                         error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 570                         mac.m_buflen = mac32.m_buflen;
 571                         mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
 572                 }
 573                 if (error != 0)
 574                         goto out3;
 575                 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
 576                     (mac.m_buflen < 2)) {
 577                         error = EINVAL;
 578                         goto out3;
 579                 }
 580                 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
 581                 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
 582                 if (error != 0) {
 583                         FREE(labelstr, M_MACTEMP);
 584                         goto out3;
 585                 }
 586                 AUDIT_ARG(mac_string, labelstr);
 587                 error = mac_mount_label_internalize(mp->mnt_mntlabel, labelstr);
 588                 FREE(labelstr, M_MACTEMP);
 589                 if (error != 0)
 590                         goto out3;
 591         }
 592 #endif
 593         /*
 594          * Mount the filesystem.
 595          */
 596         error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
 597
 598         if (uap->flags & MNT_UPDATE) {
 599                 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 600                         mp->mnt_flag &= ~MNT_RDONLY;
 601                 mp->mnt_flag &=~
 602                     (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
 603                 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 604                 if (error)
 605                         mp->mnt_flag = flag;
 606                 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
 607                 lck_rw_done(&mp->mnt_rwlock);
 608                 is_rwlock_locked = FALSE;
 609                 if (!error)
 610                         enablequotas(mp, ctx);
 611                 goto out2;
 612         }
 613         /*
 614          * Put the new filesystem on the mount list after root.
 615          */
 616         if (error == 0) {
 617                 struct vfs_attr vfsattr;
 618 #if CONFIG_MACF
 619                 if (vfs_flags(mp) & MNT_MULTILABEL) {
 620                         error = VFS_ROOT(mp, &rvp, ctx);
 621                         if (error) {
 622                                 printf("%s() VFS_ROOT returned %d\n", __func__, error);
 623                                 goto out3;
 624                         }
 625
 626                         /* VFS_ROOT provides reference so needref = 0 */
 627                         error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
 628                         if (error)
 629                                 goto out3;
 630                 }
 631 #endif  /* MAC */
 632
 633                 vnode_lock_spin(vp);
 634                 CLR(vp->v_flag, VMOUNT);
 635                 vp->v_mountedhere = mp;
 636                 vnode_unlock(vp);
 637
 638                 /*
 639                  * taking the name_cache_lock exclusively will
 640                  * insure that everyone is out of the fast path who
 641                  * might be trying to use a now stale copy of
 642                  * vp->v_mountedhere->mnt_realrootvp
 643                  * bumping mount_generation causes the cached values
 644                  * to be invalidated
 645                  */
 646                 name_cache_lock();
 647                 mount_generation++;
 648                 name_cache_unlock();
 649
 650                 vnode_ref(vp);
 651
 652                 error = checkdirs(vp, ctx);
 653                 if (error != 0)  {
 654                         /* Unmount the filesystem as cdir/rdirs cannot be updated */
 655                         goto out4;
 656                 }
 657                 /*
 658                  * there is no cleanup code here so I have made it void
 659                  * we need to revisit this
 660                  */
 661                 (void)VFS_START(mp, 0, ctx);
 662
 663                 mount_list_add(mp);
 664                 lck_rw_done(&mp->mnt_rwlock);
 665                 is_rwlock_locked = FALSE;
 666
 667                 /* Check if this mounted file system supports EAs or named streams. */
 668                 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
 669                 VFSATTR_INIT(&vfsattr);
 670                 VFSATTR_WANTED(&vfsattr, f_capabilities);
 671                 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
 672                     vfs_getattr(mp, &vfsattr, ctx) == 0 &&
 673                     VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
 674                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
 675                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
 676                                 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 677                         }
 678 #if NAMEDSTREAMS
 679                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
 680                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
 681                                 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
 682                         }
 683 #endif
 684                         /* Check if this file system supports path from id lookups. */
 685                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
 686                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
 687                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 688                         } else if (mp->mnt_flag & MNT_DOVOLFS) {
 689                                 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
 690                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 691                         }
 692                 }
 693                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
 694                         mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 695                 }
 696                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
 697                         mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
 698                 }
 699                 /* increment the operations count */
 700                 OSAddAtomic(1, (SInt32 *)&vfs_nummntops);
 701                 enablequotas(mp, ctx);
 702
 703                 if (device_vnode) {
 704                         device_vnode->v_specflags |= SI_MOUNTEDON;
 705
 706                         /*
 707                          *   cache the IO attributes for the underlying physical media...
 708                          *   an error return indicates the underlying driver doesn't
 709                          *   support all the queries necessary... however, reasonable
 710                          *   defaults will have been set, so no reason to bail or care
 711                          */
 712                         vfs_init_io_attributes(device_vnode, mp);
 713                 }
 714
 715                 /* Now that mount is setup, notify the listeners */
 716                 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
 717         } else {
 718                 vnode_lock_spin(vp);
 719                 CLR(vp->v_flag, VMOUNT);
 720                 vnode_unlock(vp);
 721                 mount_list_lock();
 722                 mp->mnt_vtable->vfc_refcount--;
 723                 mount_list_unlock();
 724
 725                 if (device_vnode ) {
 726                         VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
 727                         vnode_rele(device_vnode);
 728                 }
 729                 lck_rw_done(&mp->mnt_rwlock);
 730                 is_rwlock_locked = FALSE;
 731                 mount_lock_destroy(mp);
 732 #if CONFIG_MACF
 733                 mac_mount_label_destroy(mp);
 734 #endif
 735                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 736         }
 737         nameidone(&nd);
 738
 739         /*
 740          * drop I/O count on covered 'vp' and
 741          * on the device vp if there was one
 742          */
 743         if (devpath && devvp)
 744                 vnode_put(devvp);
 745         vnode_put(vp);
 746
 747         return(error);
 748 out4:
 749         (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
 750         if (device_vnode != NULLVP) {
 751                 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
 752                        ctx);
 753
 754         }
 755         vnode_lock_spin(vp);
 756         vp->v_mountedhere = (mount_t) 0;
 757         vnode_unlock(vp);
 758         vnode_rele(vp);
 759 out3:
 760         if (devpath && ((uap->flags & MNT_UPDATE) == 0))
 761                 vnode_rele(devvp);
 762 out2:
 763         if (devpath && devvp)
 764                 vnode_put(devvp);
 765 out1:
 766         /* Release mnt_rwlock only when it was taken */
 767         if (is_rwlock_locked == TRUE) {
 768                 lck_rw_done(&mp->mnt_rwlock);
 769         }
 770         if (mntalloc) {
 771 #if CONFIG_MACF
 772                 mac_mount_label_destroy(mp);
 773 #endif
 774                 mount_list_lock();
 775                 vfsp->vfc_refcount--;
 776                 mount_list_unlock();
 777                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 778         }
 779         vnode_put(vp);
 780         nameidone(&nd);
 781
 782         return(error);
 783 }
 784
 785 void
 786 enablequotas(struct mount *mp, vfs_context_t ctx)
 787 {
 788         struct nameidata qnd;
 789         int type;
 790         char qfpath[MAXPATHLEN];
 791         const char *qfname = QUOTAFILENAME;
 792         const char *qfopsname = QUOTAOPSNAME;
 793         const char *qfextension[] = INITQFNAMES;
 794
 795         /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
 796         if ((strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 )
 797                 && (strncmp( mp->mnt_vfsstat.f_fstypename, "ufs", sizeof("ufs")) != 0))
 798           return;
 799
 800         /*
 801          * Enable filesystem disk quotas if necessary.
 802          * We ignore errors as this should not interfere with final mount
 803          */
 804         for (type=0; type < MAXQUOTAS; type++) {
 805                 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
 806                 NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE32, CAST_USER_ADDR_T(qfpath), ctx);
 807                 if (namei(&qnd) != 0)
 808                         continue;           /* option file to trigger quotas is not present */
 809                 vnode_put(qnd.ni_vp);
 810                 nameidone(&qnd);
 811                 snprintf(qfpath, sizeof(qfpath),  "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
 812
 813                 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
 814         }
 815         return;
 816 }
 817
 818
 819 static int
 820 checkdirs_callback(proc_t p, void * arg)
 821 {
 822         struct cdirargs * cdrp = (struct cdirargs * )arg;
 823         vnode_t olddp = cdrp->olddp;
 824         vnode_t newdp = cdrp->newdp;
 825         struct filedesc *fdp;
 826         vnode_t tvp;
 827         vnode_t fdp_cvp;
 828         vnode_t fdp_rvp;
 829         int cdir_changed = 0;
 830         int rdir_changed = 0;
 831
 832         /*
 833          * XXX Also needs to iterate each thread in the process to see if it
 834          * XXX is using a per-thread current working directory, and, if so,
 835          * XXX update that as well.
 836          */
 837
 838         proc_fdlock(p);
 839         fdp = p->p_fd;
 840         if (fdp == (struct filedesc *)0) {
 841                 proc_fdunlock(p);
 842                 return(PROC_RETURNED);
 843         }
 844         fdp_cvp = fdp->fd_cdir;
 845         fdp_rvp = fdp->fd_rdir;
 846         proc_fdunlock(p);
 847
 848         if (fdp_cvp == olddp) {
 849                 vnode_ref(newdp);
 850                 tvp = fdp->fd_cdir;
 851                 fdp_cvp = newdp;
 852                 cdir_changed = 1;
 853                 vnode_rele(tvp);
 854         }
 855         if (fdp_rvp == olddp) {
 856                 vnode_ref(newdp);
 857                 tvp = fdp->fd_rdir;
 858                 fdp_rvp = newdp;
 859                 rdir_changed = 1;
 860                 vnode_rele(tvp);
 861         }
 862         if (cdir_changed || rdir_changed) {
 863                 proc_fdlock(p);
 864                 fdp->fd_cdir = fdp_cvp;
 865                 fdp->fd_rdir = fdp_rvp;
 866                 proc_fdunlock(p);
 867         }
 868         return(PROC_RETURNED);
 869 }
 870
 871
 872
 873 /*
 874  * Scan all active processes to see if any of them have a current
 875  * or root directory onto which the new filesystem has just been
 876  * mounted. If so, replace them with the new mount point.
 877  */
 878 static int
 879 checkdirs(vnode_t olddp, vfs_context_t ctx)
 880 {
 881         vnode_t newdp;
 882         vnode_t tvp;
 883         int err;
 884         struct cdirargs cdr;
 885         struct uthread * uth = get_bsdthread_info(current_thread());
 886
 887         if (olddp->v_usecount == 1)
 888                 return(0);
 889         if (uth != (struct uthread *)0)
 890                 uth->uu_notrigger = 1;
 891         err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
 892         if (uth != (struct uthread *)0)
 893                 uth->uu_notrigger = 0;
 894
 895         if (err != 0) {
 896 #if DIAGNOSTIC
 897                 panic("mount: lost mount: error %d", err);
 898 #endif
 899                 return(err);
 900         }
 901
 902         cdr.olddp = olddp;
 903         cdr.newdp = newdp;
 904         /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
 905         proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
 906
 907         if (rootvnode == olddp) {
 908                 vnode_ref(newdp);
 909                 tvp = rootvnode;
 910                 rootvnode = newdp;
 911                 vnode_rele(tvp);
 912         }
 913
 914         vnode_put(newdp);
 915         return(0);
 916 }
 917
 918 /*
 919  * Unmount a file system.
 920  *
 921  * Note: unmount takes a path to the vnode mounted on as argument,
 922  * not special file (as before).
 923  */
 924 /* ARGSUSED */
 925 int
 926 unmount(__unused proc_t p, struct unmount_args *uap, __unused register_t *retval)
 927 {
 928         vnode_t vp;
 929         struct mount *mp;
 930         int error;
 931         struct nameidata nd;
 932         vfs_context_t ctx = vfs_context_current();
 933
 934         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
 935                 UIO_USERSPACE, uap->path, ctx);
 936         error = namei(&nd);
 937         if (error)
 938                 return (error);
 939         vp = nd.ni_vp;
 940         mp = vp->v_mount;
 941         nameidone(&nd);
 942
 943 #if CONFIG_MACF
 944         error = mac_mount_check_umount(ctx, mp);
 945         if (error != 0) {
 946                 vnode_put(vp);
 947                 return (error);
 948         }
 949 #endif
 950         /*
 951          * Must be the root of the filesystem
 952          */
 953         if ((vp->v_flag & VROOT) == 0) {
 954                 vnode_put(vp);
 955                 return (EINVAL);
 956         }
 957         mount_ref(mp, 0);
 958         vnode_put(vp);
 959         /* safedounmount consumes the mount ref */
 960         return (safedounmount(mp, uap->flags, ctx));
 961 }
 962
 963 int
 964 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
 965 {
 966         mount_t mp;
 967
 968         mp = mount_list_lookupby_fsid(fsid, 0, 1);
 969         if (mp == (mount_t)0) {
 970                 return(ENOENT);
 971         }
 972         mount_ref(mp, 0);
 973         mount_iterdrop(mp);
 974         /* safedounmount consumes the mount ref */
 975         return(safedounmount(mp, flags, ctx));
 976 }
 977
 978
 979 /*
 980  * The mount struct comes with a mount ref which will be consumed.
 981  * Do the actual file system unmount, prevent some common foot shooting.
 982  */
 983 int
 984 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
 985 {
 986         int error;
 987         proc_t p = vfs_context_proc(ctx);
 988
 989         /*
 990          * Only root, or the user that did the original mount is
 991          * permitted to unmount this filesystem.
 992          */
 993         if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
 994             (error = suser(kauth_cred_get(), &p->p_acflag)))
 995                 goto out;
 996
 997         /*
 998          * Don't allow unmounting the root file system.
 999          */
1000         if (mp->mnt_flag & MNT_ROOTFS) {
1001                 error = EBUSY; /* the root is always busy */
1002                 goto out;
1003         }
1004
1005         return (dounmount(mp, flags, 1, ctx));
1006
1007 out:
1008         mount_drop(mp, 0);
1009         return(error);
1010 }
1011
1012 /*
1013  * Do the actual file system unmount.
1014  */
1015 int
1016 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1017 {
1018         vnode_t coveredvp = (vnode_t)0;
1019         int error;
1020         int needwakeup = 0;
1021         int forcedunmount = 0;
1022         int lflags = 0;
1023
1024         if (flags & MNT_FORCE)
1025                 forcedunmount = 1;
1026         mount_lock(mp);
1027         /* XXX post jaguar fix LK_DRAIN - then clean this up */
1028         if ((flags & MNT_FORCE)) {
1029                 mp->mnt_kern_flag |= MNTK_FRCUNMOUNT;
1030                 mp->mnt_lflag |= MNT_LFORCE;
1031         }
1032         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1033                 mp->mnt_lflag |= MNT_LWAIT;
1034                 if(withref != 0)
1035                         mount_drop(mp, 1);
1036                 msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL);
1037                 /*
1038                  * The prior unmount attempt has probably succeeded.
1039                  * Do not dereference mp here - returning EBUSY is safest.
1040                  */
1041                 return (EBUSY);
1042         }
1043         mp->mnt_kern_flag |= MNTK_UNMOUNT;
1044         mp->mnt_lflag |= MNT_LUNMOUNT;
1045         mp->mnt_flag &=~ MNT_ASYNC;
1046         /*
1047          * anyone currently in the fast path that
1048          * trips over the cached rootvp will be
1049          * dumped out and forced into the slow path
1050          * to regenerate a new cached value
1051          */
1052         mp->mnt_realrootvp = NULLVP;
1053         mount_unlock(mp);
1054
1055         /*
1056          * taking the name_cache_lock exclusively will
1057          * insure that everyone is out of the fast path who
1058          * might be trying to use a now stale copy of
1059          * vp->v_mountedhere->mnt_realrootvp
1060          * bumping mount_generation causes the cached values
1061          * to be invalidated
1062          */
1063         name_cache_lock();
1064         mount_generation++;
1065         name_cache_unlock();
1066
1067
1068         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1069         if (withref != 0)
1070                 mount_drop(mp, 0);
1071 #if CONFIG_FSE
1072         fsevent_unmount(mp);  /* has to come first! */
1073 #endif
1074         error = 0;
1075         if (forcedunmount == 0) {
1076                 ubc_umount(mp); /* release cached vnodes */
1077                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1078                         error = VFS_SYNC(mp, MNT_WAIT, ctx);
1079                         if (error) {
1080                                 mount_lock(mp);
1081                                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1082                                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1083                                 mp->mnt_lflag &= ~MNT_LFORCE;
1084                                 goto out;
1085                         }
1086                 }
1087         }
1088
1089         if (forcedunmount)
1090                 lflags |= FORCECLOSE;
1091         error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM  | SKIPROOT | lflags);
1092         if ((forcedunmount == 0) && error) {
1093                 mount_lock(mp);
1094                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1095                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1096                 mp->mnt_lflag &= ~MNT_LFORCE;
1097                 goto out;
1098         }
1099
1100         /* make sure there are no one in the mount iterations or lookup */
1101         mount_iterdrain(mp);
1102
1103         error = VFS_UNMOUNT(mp, flags, ctx);
1104         if (error) {
1105                 mount_iterreset(mp);
1106                 mount_lock(mp);
1107                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1108                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1109                 mp->mnt_lflag &= ~MNT_LFORCE;
1110                 goto out;
1111         }
1112
1113         /* increment the operations count */
1114         if (!error)
1115                 OSAddAtomic(1, (SInt32 *)&vfs_nummntops);
1116
1117         if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1118                 mp->mnt_devvp->v_specflags &= ~SI_MOUNTEDON;
1119                 VNOP_CLOSE(mp->mnt_devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1120                        ctx);
1121                 vnode_rele(mp->mnt_devvp);
1122         }
1123         lck_rw_done(&mp->mnt_rwlock);
1124         mount_list_remove(mp);
1125         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1126
1127         /* mark the mount point hook in the vp but not drop the ref yet */
1128         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1129                         vnode_getwithref(coveredvp);
1130                         vnode_lock_spin(coveredvp);
1131                         coveredvp->v_mountedhere = (struct mount *)0;
1132                         vnode_unlock(coveredvp);
1133                         vnode_put(coveredvp);
1134         }
1135
1136         mount_list_lock();
1137         mp->mnt_vtable->vfc_refcount--;
1138         mount_list_unlock();
1139
1140         cache_purgevfs(mp);     /* remove cache entries for this file sys */
1141         vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1142         mount_lock(mp);
1143         mp->mnt_lflag |= MNT_LDEAD;
1144
1145         if (mp->mnt_lflag & MNT_LWAIT) {
1146                 /*
1147                  * do the wakeup here
1148                  * in case we block in mount_refdrain
1149                  * which will drop the mount lock
1150                  * and allow anyone blocked in vfs_busy
1151                  * to wakeup and see the LDEAD state
1152                  */
1153                 mp->mnt_lflag &= ~MNT_LWAIT;
1154                 wakeup((caddr_t)mp);
1155         }
1156         mount_refdrain(mp);
1157 out:
1158         if (mp->mnt_lflag & MNT_LWAIT) {
1159                 mp->mnt_lflag &= ~MNT_LWAIT;
1160                 needwakeup = 1;
1161         }
1162         mount_unlock(mp);
1163         lck_rw_done(&mp->mnt_rwlock);
1164
1165         if (needwakeup)
1166                 wakeup((caddr_t)mp);
1167         if (!error) {
1168                 if ((coveredvp != NULLVP)) {
1169                         vnode_getwithref(coveredvp);
1170                         vnode_rele(coveredvp);
1171                         vnode_lock_spin(coveredvp);
1172                         if(mp->mnt_crossref == 0) {
1173                                 vnode_unlock(coveredvp);
1174                                 mount_lock_destroy(mp);
1175 #if CONFIG_MACF
1176                                 mac_mount_label_destroy(mp);
1177 #endif
1178                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1179                         }  else {
1180                                 coveredvp->v_lflag |= VL_MOUNTDEAD;
1181                                 vnode_unlock(coveredvp);
1182                         }
1183                         vnode_put(coveredvp);
1184                 } else if (mp->mnt_flag & MNT_ROOTFS) {
1185                                 mount_lock_destroy(mp);
1186 #if CONFIG_MACF
1187                                 mac_mount_label_destroy(mp);
1188 #endif
1189                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1190                 } else
1191                         panic("dounmount: no coveredvp");
1192         }
1193         return (error);
1194 }
1195
1196 void
1197 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
1198 {
1199                 vnode_lock(dp);
1200                 mp->mnt_crossref--;
1201                 if (mp->mnt_crossref < 0)
1202                         panic("mount cross refs -ve");
1203                 if (((dp->v_lflag & VL_MOUNTDEAD) == VL_MOUNTDEAD) && (mp->mnt_crossref == 0)) {
1204                         dp->v_lflag &= ~VL_MOUNTDEAD;
1205                         if (need_put)
1206                                 vnode_put_locked(dp);
1207                         vnode_unlock(dp);
1208                         mount_lock_destroy(mp);
1209 #if CONFIG_MACF
1210                         mac_mount_label_destroy(mp);
1211 #endif
1212                         FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1213                         return;
1214                 }
1215                 if (need_put)
1216                         vnode_put_locked(dp);
1217                 vnode_unlock(dp);
1218 }
1219
1220
1221 /*
1222  * Sync each mounted filesystem.
1223  */
1224 #if DIAGNOSTIC
1225 int syncprt = 0;
1226 struct ctldebug debug0 = { "syncprt", &syncprt };
1227 #endif
1228
1229 int print_vmpage_stat=0;
1230
1231 static int
1232 sync_callback(mount_t mp, __unused void * arg)
1233 {
1234         int asyncflag;
1235
1236         if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1237                         asyncflag = mp->mnt_flag & MNT_ASYNC;
1238                         mp->mnt_flag &= ~MNT_ASYNC;
1239                         VFS_SYNC(mp, MNT_NOWAIT, vfs_context_current());
1240                         if (asyncflag)
1241                                 mp->mnt_flag |= MNT_ASYNC;
1242         }
1243         return(VFS_RETURNED);
1244 }
1245
1246
1247 extern unsigned int vp_pagein, vp_pgodirty, vp_pgoclean;
1248 extern unsigned int dp_pgins, dp_pgouts;
1249
1250 /* ARGSUSED */
1251 int
1252 sync(__unused proc_t p, __unused struct sync_args *uap, __unused register_t *retval)
1253 {
1254
1255         vfs_iterate(LK_NOWAIT, sync_callback, (void *)0);
1256         {
1257         if(print_vmpage_stat) {
1258                 vm_countdirtypages();
1259                 printf("VP: %d: %d: %d: %d: %d\n", vp_pgodirty, vp_pgoclean, vp_pagein,
1260                         dp_pgins, dp_pgouts);
1261         }
1262         }
1263 #if DIAGNOSTIC
1264         if (syncprt)
1265                 vfs_bufstats();
1266 #endif /* DIAGNOSTIC */
1267         return (0);
1268 }
1269
1270 /*
1271  * Change filesystem quotas.
1272  */
1273 #if QUOTA
1274 static int quotactl_funneled(proc_t p, struct quotactl_args *uap, register_t *retval);
1275
1276 int
1277 quotactl(proc_t p, struct quotactl_args *uap, register_t *retval)
1278 {
1279         boolean_t funnel_state;
1280         int error;
1281
1282         funnel_state = thread_funnel_set(kernel_flock, TRUE);
1283         error = quotactl_funneled(p, uap, retval);
1284         thread_funnel_set(kernel_flock, funnel_state);
1285         return(error);
1286 }
1287
1288 static int
1289 quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused register_t *retval)
1290 {
1291         struct mount *mp;
1292         int error, quota_cmd, quota_status;
1293         caddr_t datap;
1294         size_t fnamelen;
1295         struct nameidata nd;
1296         vfs_context_t ctx = vfs_context_current();
1297         struct dqblk my_dqblk;
1298
1299         AUDIT_ARG(uid, uap->uid, 0, 0, 0);
1300         AUDIT_ARG(cmd, uap->cmd);
1301         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1302                 UIO_USERSPACE, uap->path, ctx);
1303         error = namei(&nd);
1304         if (error)
1305                 return (error);
1306         mp = nd.ni_vp->v_mount;
1307         vnode_put(nd.ni_vp);
1308         nameidone(&nd);
1309
1310         /* copyin any data we will need for downstream code */
1311         quota_cmd = uap->cmd >> SUBCMDSHIFT;
1312
1313         switch (quota_cmd) {
1314         case Q_QUOTAON:
1315                 /* uap->arg specifies a file from which to take the quotas */
1316                 fnamelen = MAXPATHLEN;
1317                 datap = kalloc(MAXPATHLEN);
1318                 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
1319                 break;
1320         case Q_GETQUOTA:
1321                 /* uap->arg is a pointer to a dqblk structure. */
1322                 datap = (caddr_t) &my_dqblk;
1323                 break;
1324         case Q_SETQUOTA:
1325         case Q_SETUSE:
1326                 /* uap->arg is a pointer to a dqblk structure. */
1327                 datap = (caddr_t) &my_dqblk;
1328                 if (proc_is64bit(p)) {
1329                         struct user_dqblk       my_dqblk64;
1330                         error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
1331                         if (error == 0) {
1332                                 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
1333                         }
1334                 }
1335                 else {
1336                         error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
1337                 }
1338                 break;
1339         case Q_QUOTASTAT:
1340                 /* uap->arg is a pointer to an integer */
1341                 datap = (caddr_t) &quota_status;
1342                 break;
1343         default:
1344                 datap = NULL;
1345                 break;
1346         } /* switch */
1347
1348         if (error == 0) {
1349                 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
1350         }
1351
1352         switch (quota_cmd) {
1353         case Q_QUOTAON:
1354                 if (datap != NULL)
1355                         kfree(datap, MAXPATHLEN);
1356                 break;
1357         case Q_GETQUOTA:
1358                 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
1359                 if (error == 0) {
1360                         if (proc_is64bit(p)) {
1361                                 struct user_dqblk       my_dqblk64;
1362                                 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
1363                                 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
1364                         }
1365                         else {
1366                                 error = copyout(datap, uap->arg, sizeof (struct dqblk));
1367                         }
1368                 }
1369                 break;
1370         case Q_QUOTASTAT:
1371                 /* uap->arg is a pointer to an integer */
1372                 if (error == 0) {
1373                         error = copyout(datap, uap->arg, sizeof(quota_status));
1374                 }
1375                 break;
1376         default:
1377                 break;
1378         } /* switch */
1379
1380         return (error);
1381 }
1382 #else
1383 int
1384 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused register_t *retval)
1385 {
1386         return (EOPNOTSUPP);
1387 }
1388 #endif /* QUOTA */
1389
1390 /*
1391  * Get filesystem statistics.
1392  *
1393  * Returns:     0                       Success
1394  *      namei:???
1395  *      vfs_update_vfsstat:???
1396  *      munge_statfs:EFAULT
1397  */
1398 /* ARGSUSED */
1399 int
1400 statfs(__unused proc_t p, struct statfs_args *uap, __unused register_t *retval)
1401 {
1402         struct mount *mp;
1403         struct vfsstatfs *sp;
1404         int error;
1405         struct nameidata nd;
1406         vfs_context_t ctx = vfs_context_current();
1407         vnode_t vp;
1408
1409         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1410                 UIO_USERSPACE, uap->path, ctx);
1411         error = namei(&nd);
1412         if (error)
1413                 return (error);
1414         vp = nd.ni_vp;
1415         mp = vp->v_mount;
1416         sp = &mp->mnt_vfsstat;
1417         nameidone(&nd);
1418
1419         error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
1420         vnode_put(vp);
1421         if (error != 0)
1422                 return (error);
1423
1424         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1425         return (error);
1426 }
1427
1428 /*
1429  * Get filesystem statistics.
1430  */
1431 /* ARGSUSED */
1432 int
1433 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused register_t *retval)
1434 {
1435         vnode_t vp;
1436         struct mount *mp;
1437         struct vfsstatfs *sp;
1438         int error;
1439
1440         AUDIT_ARG(fd, uap->fd);
1441
1442         if ( (error = file_vnode(uap->fd, &vp)) )
1443                 return (error);
1444
1445         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
1446
1447         mp = vp->v_mount;
1448         if (!mp) {
1449                 file_drop(uap->fd);
1450                 return (EBADF);
1451         }
1452         sp = &mp->mnt_vfsstat;
1453         if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
1454                 file_drop(uap->fd);
1455                 return (error);
1456         }
1457         file_drop(uap->fd);
1458
1459         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1460
1461         return (error);
1462 }
1463
1464 /*
1465  * Common routine to handle copying of statfs64 data to user space
1466  */
1467 static int
1468 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
1469 {
1470         int error;
1471         struct statfs64 sfs;
1472
1473         bzero(&sfs, sizeof(sfs));
1474
1475         sfs.f_bsize = sfsp->f_bsize;
1476         sfs.f_iosize = (int32_t)sfsp->f_iosize;
1477         sfs.f_blocks = sfsp->f_blocks;
1478         sfs.f_bfree = sfsp->f_bfree;
1479         sfs.f_bavail = sfsp->f_bavail;
1480         sfs.f_files = sfsp->f_files;
1481         sfs.f_ffree = sfsp->f_ffree;
1482         sfs.f_fsid = sfsp->f_fsid;
1483         sfs.f_owner = sfsp->f_owner;
1484         sfs.f_type = mp->mnt_vtable->vfc_typenum;
1485         sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1486         sfs.f_fssubtype = sfsp->f_fssubtype;
1487         strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
1488         strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
1489         strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
1490
1491         error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
1492
1493         return(error);
1494 }
1495
1496 /*
1497  * Get file system statistics in 64-bit mode
1498  */
1499 int
1500 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused register_t *retval)
1501 {
1502         struct mount *mp;
1503         struct vfsstatfs *sp;
1504         int error;
1505         struct nameidata nd;
1506         vfs_context_t ctxp = vfs_context_current();
1507         vnode_t vp;
1508
1509         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1510                 UIO_USERSPACE, uap->path, ctxp);
1511         error = namei(&nd);
1512         if (error)
1513                 return (error);
1514         vp = nd.ni_vp;
1515         mp = vp->v_mount;
1516         sp = &mp->mnt_vfsstat;
1517         nameidone(&nd);
1518
1519         error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
1520         vnode_put(vp);
1521         if (error != 0)
1522                 return (error);
1523
1524         error = statfs64_common(mp, sp, uap->buf);
1525
1526         return (error);
1527 }
1528
1529 /*
1530  * Get file system statistics in 64-bit mode
1531  */
1532 int
1533 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused register_t *retval)
1534 {
1535         struct vnode *vp;
1536         struct mount *mp;
1537         struct vfsstatfs *sp;
1538         int error;
1539
1540         AUDIT_ARG(fd, uap->fd);
1541
1542         if ( (error = file_vnode(uap->fd, &vp)) )
1543                 return (error);
1544
1545         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
1546
1547         mp = vp->v_mount;
1548         if (!mp) {
1549                 file_drop(uap->fd);
1550                 return (EBADF);
1551         }
1552         sp = &mp->mnt_vfsstat;
1553         if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
1554                 file_drop(uap->fd);
1555                 return (error);
1556         }
1557         file_drop(uap->fd);
1558
1559         error = statfs64_common(mp, sp, uap->buf);
1560
1561         return (error);
1562 }
1563
1564 struct getfsstat_struct {
1565         user_addr_t     sfsp;
1566         user_addr_t     *mp;
1567         int             count;
1568         int             maxcount;
1569         int             flags;
1570         int             error;
1571 };
1572
1573
1574 static int
1575 getfsstat_callback(mount_t mp, void * arg)
1576 {
1577
1578         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
1579         struct vfsstatfs *sp;
1580         int error, my_size;
1581         vfs_context_t ctx = vfs_context_current();
1582
1583         if (fstp->sfsp && fstp->count < fstp->maxcount) {
1584                 sp = &mp->mnt_vfsstat;
1585                 /*
1586                  * If MNT_NOWAIT is specified, do not refresh the
1587                  * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
1588                  */
1589                 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & MNT_WAIT)) &&
1590                         (error = vfs_update_vfsstat(mp, ctx,
1591                             VFS_USER_EVENT))) {
1592                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
1593                         return(VFS_RETURNED);
1594                 }
1595
1596                 /*
1597                  * Need to handle LP64 version of struct statfs
1598                  */
1599                 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
1600                 if (error) {
1601                         fstp->error = error;
1602                         return(VFS_RETURNED_DONE);
1603                 }
1604                 fstp->sfsp += my_size;
1605
1606                 if (fstp->mp) {
1607                         error = mac_mount_label_get(mp, *fstp->mp);
1608                         if (error) {
1609                                 fstp->error = error;
1610                                 return(VFS_RETURNED_DONE);
1611                         }
1612                         fstp->mp++;
1613                 }
1614         }
1615         fstp->count++;
1616         return(VFS_RETURNED);
1617 }
1618
1619 /*
1620  * Get statistics on all filesystems.
1621  */
1622 int
1623 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
1624 {
1625         struct __mac_getfsstat_args muap;
1626
1627         muap.buf = uap->buf;
1628         muap.bufsize = uap->bufsize;
1629         muap.mac = USER_ADDR_NULL;
1630         muap.macsize = 0;
1631         muap.flags = uap->flags;
1632
1633         return (__mac_getfsstat(p, &muap, retval));
1634 }
1635
1636 int
1637 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
1638 {
1639         user_addr_t sfsp;
1640         user_addr_t *mp;
1641         int count, maxcount;
1642         struct getfsstat_struct fst;
1643
1644         if (IS_64BIT_PROCESS(p)) {
1645                 maxcount = uap->bufsize / sizeof(struct user_statfs);
1646         }
1647         else {
1648                 maxcount = uap->bufsize / sizeof(struct statfs);
1649         }
1650         sfsp = uap->buf;
1651         count = 0;
1652
1653         mp = NULL;
1654
1655 #if CONFIG_MACF
1656         if (uap->mac != USER_ADDR_NULL) {
1657                 u_int32_t *mp0;
1658                 int error;
1659                 int i;
1660
1661                 count = (int)(uap->macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
1662                 if (count != maxcount)
1663                         return (EINVAL);
1664
1665                 /* Copy in the array */
1666                 MALLOC(mp0, u_int32_t *, uap->macsize, M_MACTEMP, M_WAITOK);
1667                 error = copyin(uap->mac, mp0, uap->macsize);
1668                 if (error)
1669                         return (error);
1670
1671                 /* Normalize to an array of user_addr_t */
1672                 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
1673                 for (i = 0; i < count; i++) {
1674                         if (IS_64BIT_PROCESS(p))
1675                                 mp[i] = ((user_addr_t *)mp0)[i];
1676                         else
1677                                 mp[i] = (user_addr_t)mp0[i];
1678                 }
1679                 FREE(mp0, M_MACTEMP);
1680         }
1681 #endif
1682
1683
1684         fst.sfsp = sfsp;
1685         fst.mp = mp;
1686         fst.flags = uap->flags;
1687         fst.count = 0;
1688         fst.error = 0;
1689         fst.maxcount = maxcount;
1690
1691
1692         vfs_iterate(0, getfsstat_callback, &fst);
1693
1694         if (mp)
1695                 FREE(mp, M_MACTEMP);
1696
1697         if (fst.error ) {
1698                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
1699                 return(fst.error);
1700         }
1701
1702         if (fst.sfsp && fst.count > fst.maxcount)
1703                 *retval = fst.maxcount;
1704         else
1705                 *retval = fst.count;
1706         return (0);
1707 }
1708
1709 static int
1710 getfsstat64_callback(mount_t mp, void * arg)
1711 {
1712         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
1713         struct vfsstatfs *sp;
1714         int error;
1715
1716         if (fstp->sfsp && fstp->count < fstp->maxcount) {
1717                 sp = &mp->mnt_vfsstat;
1718                 /*
1719                  * If MNT_NOWAIT is specified, do not refresh the
1720                  * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
1721                  */
1722                 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & MNT_WAIT)) &&
1723                     (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
1724                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
1725                         return(VFS_RETURNED);
1726                 }
1727
1728                 error = statfs64_common(mp, sp, fstp->sfsp);
1729                 if (error) {
1730                         fstp->error = error;
1731                         return(VFS_RETURNED_DONE);
1732                 }
1733                 fstp->sfsp += sizeof(struct statfs64);
1734         }
1735         fstp->count++;
1736         return(VFS_RETURNED);
1737 }
1738
1739 /*
1740  * Get statistics on all file systems in 64 bit mode.
1741  */
1742 int
1743 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
1744 {
1745         user_addr_t sfsp;
1746         int count, maxcount;
1747         struct getfsstat_struct fst;
1748
1749         maxcount = uap->bufsize / sizeof(struct statfs64);
1750
1751         sfsp = uap->buf;
1752         count = 0;
1753
1754         fst.sfsp = sfsp;
1755         fst.flags = uap->flags;
1756         fst.count = 0;
1757         fst.error = 0;
1758         fst.maxcount = maxcount;
1759
1760         vfs_iterate(0, getfsstat64_callback, &fst);
1761
1762         if (fst.error ) {
1763                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
1764                 return(fst.error);
1765         }
1766
1767         if (fst.sfsp && fst.count > fst.maxcount)
1768                 *retval = fst.maxcount;
1769         else
1770                 *retval = fst.count;
1771
1772         return (0);
1773 }
1774
1775 #if COMPAT_GETFSSTAT
1776 ogetfsstat(proc_t p, struct getfsstat_args *uap, register_t *retval)
1777 {
1778         return (ENOTSUP);
1779 }
1780 #endif
1781
1782 /*
1783  * Change current working directory to a given file descriptor.
1784  */
1785 /* ARGSUSED */
1786 static int
1787 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1788 {
1789         struct filedesc *fdp = p->p_fd;
1790         vnode_t vp;
1791         vnode_t tdp;
1792         vnode_t tvp;
1793         struct mount *mp;
1794         int error;
1795         vfs_context_t ctx = vfs_context_current();
1796
1797         if (per_thread && uap->fd == -1) {
1798                 /*
1799                  * Switching back from per-thread to per process CWD; verify we
1800                  * in fact have one before proceeding.  The only success case
1801                  * for this code path is to return 0 preemptively after zapping
1802                  * the thread structure contents.
1803                  */
1804                 thread_t th = vfs_context_thread(ctx);
1805                 if (th) {
1806                         uthread_t uth = get_bsdthread_info(th);
1807                         tvp = uth->uu_cdir;
1808                         uth->uu_cdir = NULLVP;
1809                         if (tvp != NULLVP) {
1810                                 vnode_rele(tvp);
1811                                 return (0);
1812                         }
1813                 }
1814                 return (EBADF);
1815         }
1816
1817         if ( (error = file_vnode(uap->fd, &vp)) )
1818                 return(error);
1819         if ( (error = vnode_getwithref(vp)) ) {
1820                 file_drop(uap->fd);
1821                 return(error);
1822         }
1823
1824         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1825
1826         if (vp->v_type != VDIR) {
1827                 error = ENOTDIR;
1828                 goto out;
1829         }
1830
1831 #if CONFIG_MACF
1832         error = mac_vnode_check_chdir(ctx, vp);
1833         if (error)
1834                 goto out;
1835 #endif
1836         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
1837         if (error)
1838                 goto out;
1839
1840         while (!error && (mp = vp->v_mountedhere) != NULL) {
1841                 if (vfs_busy(mp, LK_NOWAIT)) {
1842                         error = EACCES;
1843                         goto out;
1844                 }
1845                 error = VFS_ROOT(mp, &tdp, ctx);
1846                 vfs_unbusy(mp);
1847                 if (error)
1848                         break;
1849                 vnode_put(vp);
1850                 vp = tdp;
1851         }
1852         if (error)
1853                 goto out;
1854         if ( (error = vnode_ref(vp)) )
1855                 goto out;
1856         vnode_put(vp);
1857
1858         if (per_thread) {
1859                 thread_t th = vfs_context_thread(ctx);
1860                 if (th) {
1861                         uthread_t uth = get_bsdthread_info(th);
1862                         tvp = uth->uu_cdir;
1863                         uth->uu_cdir = vp;
1864                         OSBitOrAtomic(P_THCWD, (UInt32 *)&p->p_flag);
1865                 } else {
1866                         vnode_rele(vp);
1867                         return (ENOENT);
1868                 }
1869         } else {
1870                 proc_fdlock(p);
1871                 tvp = fdp->fd_cdir;
1872                 fdp->fd_cdir = vp;
1873                 proc_fdunlock(p);
1874         }
1875
1876         if (tvp)
1877                 vnode_rele(tvp);
1878         file_drop(uap->fd);
1879
1880         return (0);
1881 out:
1882         vnode_put(vp);
1883         file_drop(uap->fd);
1884
1885         return(error);
1886 }
1887
1888 int
1889 fchdir(proc_t p, struct fchdir_args *uap, __unused register_t *retval)
1890 {
1891         return common_fchdir(p, uap, 0);
1892 }
1893
1894 int
1895 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused register_t *retval)
1896 {
1897         return common_fchdir(p, (void *)uap, 1);
1898 }
1899
1900 /*
1901  * Change current working directory (``.'').
1902  *
1903  * Returns:     0                       Success
1904  *      change_dir:ENOTDIR
1905  *      change_dir:???
1906  *      vnode_ref:ENOENT                No such file or directory
1907  */
1908 /* ARGSUSED */
1909 static int
1910 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
1911 {
1912         struct filedesc *fdp = p->p_fd;
1913         int error;
1914         struct nameidata nd;
1915         vnode_t tvp;
1916         vfs_context_t ctx = vfs_context_current();
1917
1918         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1919                 UIO_USERSPACE, uap->path, ctx);
1920         error = change_dir(&nd, ctx);
1921         if (error)
1922                 return (error);
1923         if ( (error = vnode_ref(nd.ni_vp)) ) {
1924                 vnode_put(nd.ni_vp);
1925                 return (error);
1926         }
1927         /*
1928          * drop the iocount we picked up in change_dir
1929          */
1930         vnode_put(nd.ni_vp);
1931
1932         if (per_thread) {
1933                 thread_t th = vfs_context_thread(ctx);
1934                 if (th) {
1935                         uthread_t uth = get_bsdthread_info(th);
1936                         tvp = uth->uu_cdir;
1937                         uth->uu_cdir = nd.ni_vp;
1938                         OSBitOrAtomic(P_THCWD, (UInt32 *)&p->p_flag);
1939                 } else {
1940                         vnode_rele(nd.ni_vp);
1941                         return (ENOENT);
1942                 }
1943         } else {
1944                 proc_fdlock(p);
1945                 tvp = fdp->fd_cdir;
1946                 fdp->fd_cdir = nd.ni_vp;
1947                 proc_fdunlock(p);
1948         }
1949
1950         if (tvp)
1951                 vnode_rele(tvp);
1952
1953         return (0);
1954 }
1955
1956 int
1957 chdir(proc_t p, struct chdir_args *uap, __unused register_t *retval)
1958 {
1959         return common_chdir(p, (void *)uap, 0);
1960 }
1961
1962 int
1963 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused register_t *retval)
1964 {
1965         return common_chdir(p, (void *)uap, 1);
1966 }
1967
1968
1969 /*
1970  * Change notion of root (``/'') directory.
1971  */
1972 /* ARGSUSED */
1973 int
1974 chroot(proc_t p, struct chroot_args *uap, __unused register_t *retval)
1975 {
1976         struct filedesc *fdp = p->p_fd;
1977         int error;
1978         struct nameidata nd;
1979         vnode_t tvp;
1980         vfs_context_t ctx = vfs_context_current();
1981
1982         if ((error = suser(kauth_cred_get(), &p->p_acflag)))
1983                 return (error);
1984
1985         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1986                 UIO_USERSPACE, uap->path, ctx);
1987         error = change_dir(&nd, ctx);
1988         if (error)
1989                 return (error);
1990
1991 #if CONFIG_MACF
1992         error = mac_vnode_check_chroot(ctx, nd.ni_vp,
1993             &nd.ni_cnd);
1994         if (error) {
1995                 vnode_put(nd.ni_vp);
1996                 return (error);
1997         }
1998 #endif
1999
2000         if ( (error = vnode_ref(nd.ni_vp)) ) {
2001                 vnode_put(nd.ni_vp);
2002                 return (error);
2003         }
2004         vnode_put(nd.ni_vp);
2005
2006         proc_fdlock(p);
2007         tvp = fdp->fd_rdir;
2008         fdp->fd_rdir = nd.ni_vp;
2009         fdp->fd_flags |= FD_CHROOT;
2010         proc_fdunlock(p);
2011
2012         if (tvp != NULL)
2013                 vnode_rele(tvp);
2014
2015         return (0);
2016 }
2017
2018 /*
2019  * Common routine for chroot and chdir.
2020  *
2021  * Returns:     0                       Success
2022  *              ENOTDIR                 Not a directory
2023  *              namei:???               [anything namei can return]
2024  *              vnode_authorize:???     [anything vnode_authorize can return]
2025  */
2026 static int
2027 change_dir(struct nameidata *ndp, vfs_context_t ctx)
2028 {
2029         vnode_t vp;
2030         int error;
2031
2032         if ((error = namei(ndp)))
2033                 return (error);
2034         nameidone(ndp);
2035         vp = ndp->ni_vp;
2036
2037         if (vp->v_type != VDIR) {
2038                 vnode_put(vp);
2039                 return (ENOTDIR);
2040         }
2041
2042 #if CONFIG_MACF
2043         error = mac_vnode_check_chdir(ctx, vp);
2044         if (error) {
2045                 vnode_put(vp);
2046                 return (error);
2047         }
2048 #endif
2049
2050         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2051         if (error) {
2052                 vnode_put(vp);
2053                 return (error);
2054         }
2055
2056         return (error);
2057 }
2058
2059 /*
2060  * Check permissions, allocate an open file structure,
2061  * and call the device open routine if any.
2062  *
2063  * Returns:     0                       Success
2064  *              EINVAL
2065  *              EINTR
2066  *      falloc:ENFILE
2067  *      falloc:EMFILE
2068  *      falloc:ENOMEM
2069  *      vn_open_auth:???
2070  *      dupfdopen:???
2071  *      VNOP_ADVLOCK:???
2072  *      vnode_setsize:???
2073  */
2074 #warning XXX implement uid, gid
2075 int
2076 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, register_t *retval)
2077 {
2078         proc_t p = vfs_context_proc(ctx);
2079         uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2080         struct filedesc *fdp = p->p_fd;
2081         struct fileproc *fp;
2082         vnode_t vp;
2083         int flags, oflags;
2084         struct fileproc *nfp;
2085         int type, indx, error;
2086         struct flock lf;
2087         int no_controlling_tty = 0;
2088         int deny_controlling_tty = 0;
2089         struct session *sessp = SESSION_NULL;
2090         struct vfs_context context = *vfs_context_current();    /* local copy */
2091
2092         oflags = uflags;
2093
2094         if ((oflags & O_ACCMODE) == O_ACCMODE)
2095                 return(EINVAL);
2096         flags = FFLAGS(uflags);
2097
2098         AUDIT_ARG(fflags, oflags);
2099         AUDIT_ARG(mode, vap->va_mode);
2100
2101         if ( (error = falloc(p, &nfp, &indx, ctx)) ) {
2102                 return (error);
2103         }
2104         fp = nfp;
2105         uu->uu_dupfd = -indx - 1;
2106
2107         if (!(p->p_flag & P_CONTROLT)) {
2108                 sessp = proc_session(p);
2109                 no_controlling_tty = 1;
2110                 /*
2111                  * If conditions would warrant getting a controlling tty if
2112                  * the device being opened is a tty (see ttyopen in tty.c),
2113                  * but the open flags deny it, set a flag in the session to
2114                  * prevent it.
2115                  */
2116                 if (SESS_LEADER(p, sessp) &&
2117                     sessp->s_ttyvp == NULL &&
2118                     (flags & O_NOCTTY)) {
2119                         session_lock(sessp);
2120                         sessp->s_flags |= S_NOCTTY;
2121                         session_unlock(sessp);
2122                         deny_controlling_tty = 1;
2123                 }
2124         }
2125
2126         if ((error = vn_open_auth(ndp, &flags, vap))) {
2127                 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){        /* XXX from fdopen */
2128                         if ((error = dupfdopen(fdp, indx, uu->uu_dupfd, flags, error)) == 0) {
2129                                 fp_drop(p, indx, NULL, 0);
2130                                 *retval = indx;
2131                                 if (deny_controlling_tty) {
2132                                         session_lock(sessp);
2133                                         sessp->s_flags &= ~S_NOCTTY;
2134                                         session_unlock(sessp);
2135                                 }
2136                                 if (sessp != SESSION_NULL)
2137                                         session_rele(sessp);
2138                                 return (0);
2139                         }
2140                 }
2141                 if (error == ERESTART)
2142                         error = EINTR;
2143                 fp_free(p, indx, fp);
2144
2145                 if (deny_controlling_tty) {
2146                         session_lock(sessp);
2147                         sessp->s_flags &= ~S_NOCTTY;
2148                         session_unlock(sessp);
2149                 }
2150                 if (sessp != SESSION_NULL)
2151                         session_rele(sessp);
2152                 return (error);
2153         }
2154         uu->uu_dupfd = 0;
2155         vp = ndp->ni_vp;
2156
2157         fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
2158         fp->f_fglob->fg_type = DTYPE_VNODE;
2159         fp->f_fglob->fg_ops = &vnops;
2160         fp->f_fglob->fg_data = (caddr_t)vp;
2161
2162         if (flags & (O_EXLOCK | O_SHLOCK)) {
2163                 lf.l_whence = SEEK_SET;
2164                 lf.l_start = 0;
2165                 lf.l_len = 0;
2166                 if (flags & O_EXLOCK)
2167                         lf.l_type = F_WRLCK;
2168                 else
2169                         lf.l_type = F_RDLCK;
2170                 type = F_FLOCK;
2171                 if ((flags & FNONBLOCK) == 0)
2172                         type |= F_WAIT;
2173 #if CONFIG_MACF
2174                 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
2175                     F_SETLK, &lf);
2176                 if (error)
2177                         goto bad;
2178 #endif
2179                 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx)))
2180                         goto bad;
2181                 fp->f_fglob->fg_flag |= FHASLOCK;
2182         }
2183
2184         /* try to truncate by setting the size attribute */
2185         if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
2186                 goto bad;
2187
2188         /*
2189          * If the open flags denied the acquisition of a controlling tty,
2190          * clear the flag in the session structure that prevented the lower
2191          * level code from assigning one.
2192          */
2193         if (deny_controlling_tty) {
2194                 session_lock(sessp);
2195                 sessp->s_flags &= ~S_NOCTTY;
2196                 session_unlock(sessp);
2197         }
2198
2199         /*
2200          * If a controlling tty was set by the tty line discipline, then we
2201          * want to set the vp of the tty into the session structure.  We have
2202          * a race here because we can't get to the vp for the tp in ttyopen,
2203          * because it's not passed as a parameter in the open path.
2204          */
2205         if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
2206                 vnode_t ttyvp;
2207                 vnode_ref(vp);
2208                 session_lock(sessp);
2209                 ttyvp = sessp->s_ttyvp;
2210                 sessp->s_ttyvp = vp;
2211                 sessp->s_ttyvid = vnode_vid(vp);
2212                 session_unlock(sessp);
2213                 if (ttyvp != NULLVP)
2214                         vnode_rele(ttyvp);
2215         }
2216
2217         vnode_put(vp);
2218
2219         proc_fdlock(p);
2220         procfdtbl_releasefd(p, indx, NULL);
2221         fp_drop(p, indx, fp, 1);
2222         proc_fdunlock(p);
2223
2224         *retval = indx;
2225
2226         if (sessp != SESSION_NULL)
2227                 session_rele(sessp);
2228         return (0);
2229 bad:
2230         if (deny_controlling_tty) {
2231                 session_lock(sessp);
2232                 sessp->s_flags &= ~S_NOCTTY;
2233                 session_unlock(sessp);
2234         }
2235         if (sessp != SESSION_NULL)
2236                 session_rele(sessp);
2237
2238         /* Modify local copy (to not damage thread copy) */
2239         context.vc_ucred = fp->f_fglob->fg_cred;
2240
2241         vn_close(vp, fp->f_fglob->fg_flag, &context);
2242         vnode_put(vp);
2243         fp_free(p, indx, fp);
2244
2245         return (error);
2246
2247 }
2248
2249 /*
2250  * An open system call using an extended argument list compared to the regular
2251  * system call 'open'.
2252  *
2253  * Parameters:  p                       Process requesting the open
2254  *              uap                     User argument descriptor (see below)
2255  *              retval                  Pointer to an area to receive the
2256  *                                      return calue from the system call
2257  *
2258  * Indirect:    uap->path               Path to open (same as 'open')
2259  *              uap->flags              Flags to open (same as 'open'
2260  *              uap->uid                UID to set, if creating
2261  *              uap->gid                GID to set, if creating
2262  *              uap->mode               File mode, if creating (same as 'open')
2263  *              uap->xsecurity          ACL to set, if creating
2264  *
2265  * Returns:     0                       Success
2266  *              !0                      errno value
2267  *
2268  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
2269  *
2270  * XXX:         We should enummerate the possible errno values here, and where
2271  *              in the code they originated.
2272  */
2273 int
2274 open_extended(proc_t p, struct open_extended_args *uap, register_t *retval)
2275 {
2276         struct filedesc *fdp = p->p_fd;
2277         int ciferror;
2278         kauth_filesec_t xsecdst;
2279         struct vnode_attr va;
2280         struct nameidata nd;
2281         int cmode;
2282
2283         xsecdst = NULL;
2284         if ((uap->xsecurity != USER_ADDR_NULL) &&
2285             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
2286                 return ciferror;
2287
2288         VATTR_INIT(&va);
2289         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2290         VATTR_SET(&va, va_mode, cmode);
2291         if (uap->uid != KAUTH_UID_NONE)
2292                 VATTR_SET(&va, va_uid, uap->uid);
2293         if (uap->gid != KAUTH_GID_NONE)
2294                 VATTR_SET(&va, va_gid, uap->gid);
2295         if (xsecdst != NULL)
2296                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
2297
2298         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2299
2300         ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, retval);
2301         if (xsecdst != NULL)
2302                 kauth_filesec_free(xsecdst);
2303
2304         return ciferror;
2305 }
2306
2307 int
2308 open(proc_t p, struct open_args *uap, register_t *retval)
2309 {
2310         __pthread_testcancel(1);
2311         return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
2312 }
2313
2314
2315 int
2316 open_nocancel(proc_t p, struct open_nocancel_args *uap, register_t *retval)
2317 {
2318         struct filedesc *fdp = p->p_fd;
2319         struct vnode_attr va;
2320         struct nameidata nd;
2321         int cmode;
2322
2323         VATTR_INIT(&va);
2324         /* Mask off all but regular access permissions */
2325         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2326         VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
2327
2328         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2329
2330         return(open1(vfs_context_current(), &nd, uap->flags, &va, retval));
2331 }
2332
2333
2334 /*
2335  * Create a special file.
2336  */
2337 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
2338
2339 int
2340 mknod(proc_t p, struct mknod_args *uap, __unused register_t *retval)
2341 {
2342         struct vnode_attr va;
2343         vfs_context_t ctx = vfs_context_current();
2344         int error;
2345         int whiteout = 0;
2346         struct nameidata nd;
2347         vnode_t vp, dvp;
2348
2349         VATTR_INIT(&va);
2350         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2351         VATTR_SET(&va, va_rdev, uap->dev);
2352
2353         /* If it's a mknod() of a FIFO, call mkfifo1() instead */
2354         if ((uap->mode & S_IFMT) == S_IFIFO)
2355                 return(mkfifo1(ctx, uap->path, &va));
2356
2357         AUDIT_ARG(mode, uap->mode);
2358         AUDIT_ARG(dev, uap->dev);
2359
2360         if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
2361                 return (error);
2362         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2363                 UIO_USERSPACE, uap->path, ctx);
2364         error = namei(&nd);
2365         if (error)
2366                 return (error);
2367         dvp = nd.ni_dvp;
2368         vp = nd.ni_vp;
2369
2370         if (vp != NULL) {
2371                 error = EEXIST;
2372                 goto out;
2373         }
2374
2375         switch (uap->mode & S_IFMT) {
2376         case S_IFMT:    /* used by badsect to flag bad sectors */
2377                 VATTR_SET(&va, va_type, VBAD);
2378                 break;
2379         case S_IFCHR:
2380                 VATTR_SET(&va, va_type, VCHR);
2381                 break;
2382         case S_IFBLK:
2383                 VATTR_SET(&va, va_type, VBLK);
2384                 break;
2385         case S_IFWHT:
2386                 whiteout = 1;
2387                 break;
2388         default:
2389                 error = EINVAL;
2390                 goto out;
2391         }
2392
2393 #if CONFIG_MACF
2394         if (!whiteout) {
2395                 error = mac_vnode_check_create(ctx,
2396                     nd.ni_dvp, &nd.ni_cnd, &va);
2397                 if (error)
2398                         goto out;
2399         }
2400 #endif
2401
2402         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2403                 goto out;
2404
2405         if (whiteout) {
2406                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE, ctx);
2407         } else {
2408                 error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, ctx);
2409         }
2410         if (error)
2411                 goto out;
2412
2413         if (vp) {
2414                 int     update_flags = 0;
2415
2416                 // Make sure the name & parent pointers are hooked up
2417                 if (vp->v_name == NULL)
2418                         update_flags |= VNODE_UPDATE_NAME;
2419                 if (vp->v_parent == NULLVP)
2420                         update_flags |= VNODE_UPDATE_PARENT;
2421
2422                 if (update_flags)
2423                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
2424
2425 #if CONFIG_FSE
2426                 add_fsevent(FSE_CREATE_FILE, ctx,
2427                     FSE_ARG_VNODE, vp,
2428                     FSE_ARG_DONE);
2429 #endif
2430         }
2431
2432 out:
2433         /*
2434          * nameidone has to happen before we vnode_put(dvp)
2435          * since it may need to release the fs_nodelock on the dvp
2436          */
2437         nameidone(&nd);
2438
2439         if (vp)
2440                 vnode_put(vp);
2441         vnode_put(dvp);
2442
2443         return (error);
2444 }
2445
2446 /*
2447  * Create a named pipe.
2448  *
2449  * Returns:     0                       Success
2450  *              EEXIST
2451  *      namei:???
2452  *      vnode_authorize:???
2453  *      vn_create:???
2454  */
2455 static int
2456 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
2457 {
2458         vnode_t vp, dvp;
2459         int error;
2460         struct nameidata nd;
2461
2462         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2463                 UIO_USERSPACE, upath, ctx);
2464         error = namei(&nd);
2465         if (error)
2466                 return (error);
2467         dvp = nd.ni_dvp;
2468         vp = nd.ni_vp;
2469
2470         /* check that this is a new file and authorize addition */
2471         if (vp != NULL) {
2472                 error = EEXIST;
2473                 goto out;
2474         }
2475         VATTR_SET(vap, va_type, VFIFO);
2476
2477 #if CONFIG_MACF
2478         error = mac_vnode_check_create(ctx, nd.ni_dvp,
2479             &nd.ni_cnd, vap);
2480         if (error)
2481                 goto out;
2482 #endif
2483
2484
2485         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2486                 goto out;
2487
2488
2489         error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx);
2490 out:
2491         /*
2492          * nameidone has to happen before we vnode_put(dvp)
2493          * since it may need to release the fs_nodelock on the dvp
2494          */
2495         nameidone(&nd);
2496
2497         if (vp)
2498                 vnode_put(vp);
2499         vnode_put(dvp);
2500
2501         return error;
2502 }
2503
2504
2505 /*
2506  * A mkfifo system call using an extended argument list compared to the regular
2507  * system call 'mkfifo'.
2508  *
2509  * Parameters:  p                       Process requesting the open
2510  *              uap                     User argument descriptor (see below)
2511  *              retval                  (Ignored)
2512  *
2513  * Indirect:    uap->path               Path to fifo (same as 'mkfifo')
2514  *              uap->uid                UID to set
2515  *              uap->gid                GID to set
2516  *              uap->mode               File mode to set (same as 'mkfifo')
2517  *              uap->xsecurity          ACL to set, if creating
2518  *
2519  * Returns:     0                       Success
2520  *              !0                      errno value
2521  *
2522  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
2523  *
2524  * XXX:         We should enummerate the possible errno values here, and where
2525  *              in the code they originated.
2526  */
2527 int
2528 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused register_t *retval)
2529 {
2530         int ciferror;
2531         kauth_filesec_t xsecdst;
2532         struct vnode_attr va;
2533
2534         xsecdst = KAUTH_FILESEC_NONE;
2535         if (uap->xsecurity != USER_ADDR_NULL) {
2536                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
2537                         return ciferror;
2538         }
2539
2540         VATTR_INIT(&va);
2541         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2542         if (uap->uid != KAUTH_UID_NONE)
2543                 VATTR_SET(&va, va_uid, uap->uid);
2544         if (uap->gid != KAUTH_GID_NONE)
2545                 VATTR_SET(&va, va_gid, uap->gid);
2546         if (xsecdst != KAUTH_FILESEC_NONE)
2547                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
2548
2549         ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
2550
2551         if (xsecdst != KAUTH_FILESEC_NONE)
2552                 kauth_filesec_free(xsecdst);
2553         return ciferror;
2554 }
2555
2556 /* ARGSUSED */
2557 int
2558 mkfifo(proc_t p, struct mkfifo_args *uap, __unused register_t *retval)
2559 {
2560         struct vnode_attr va;
2561
2562         VATTR_INIT(&va);
2563         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2564
2565         return(mkfifo1(vfs_context_current(), uap->path, &va));
2566 }
2567
2568 /*
2569  * Make a hard file link.
2570  *
2571  * Returns:     0                       Success
2572  *              EPERM
2573  *              EEXIST
2574  *              EXDEV
2575  *      namei:???
2576  *      vnode_authorize:???
2577  *      VNOP_LINK:???
2578  */
2579 /* ARGSUSED */
2580 int
2581 link(__unused proc_t p, struct link_args *uap, __unused register_t *retval)
2582 {
2583         vnode_t vp, dvp, lvp;
2584         struct nameidata nd;
2585         vfs_context_t ctx = vfs_context_current();
2586         int error;
2587         fse_info finfo;
2588         int need_event, has_listeners;
2589         char *target_path = NULL;
2590
2591         vp = dvp = lvp = NULLVP;
2592
2593         /* look up the object we are linking to */
2594         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
2595                 UIO_USERSPACE, uap->path, ctx);
2596         error = namei(&nd);
2597         if (error)
2598                 return (error);
2599         vp = nd.ni_vp;
2600
2601         nameidone(&nd);
2602
2603         /*
2604          * Normally, linking to directories is not supported.
2605          * However, some file systems may have limited support.
2606          */
2607         if (vp->v_type == VDIR) {
2608                 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
2609                         error = EPERM;   /* POSIX */
2610                         goto out;
2611                 }
2612                 /* Linking to a directory requires ownership. */
2613                 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
2614                         struct vnode_attr dva;
2615
2616                         VATTR_INIT(&dva);
2617                         VATTR_WANTED(&dva, va_uid);
2618                         if (vnode_getattr(vp, &dva, ctx) != 0 ||
2619                             !VATTR_IS_SUPPORTED(&dva, va_uid) ||
2620                             (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
2621                                 error = EACCES;
2622                                 goto out;
2623                         }
2624                 }
2625         }
2626
2627         /* lookup the target node */
2628         nd.ni_cnd.cn_nameiop = CREATE;
2629         nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
2630         nd.ni_dirp = uap->link;
2631         error = namei(&nd);
2632         if (error != 0)
2633                 goto out;
2634         dvp = nd.ni_dvp;
2635         lvp = nd.ni_vp;
2636
2637 #if CONFIG_MACF
2638         if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
2639                 goto out2;
2640 #endif
2641
2642         /* or to anything that kauth doesn't want us to (eg. immutable items) */
2643         if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
2644                 goto out2;
2645
2646         /* target node must not exist */
2647         if (lvp != NULLVP) {
2648                 error = EEXIST;
2649                 goto out2;
2650         }
2651         /* cannot link across mountpoints */
2652         if (vnode_mount(vp) != vnode_mount(dvp)) {
2653                 error = EXDEV;
2654                 goto out2;
2655         }
2656
2657         /* authorize creation of the target note */
2658         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2659                 goto out2;
2660
2661         /* and finally make the link */
2662         error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
2663         if (error)
2664                 goto out2;
2665
2666 #if CONFIG_FSE
2667         need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2668 #else
2669         need_event = 0;
2670 #endif
2671         has_listeners = kauth_authorize_fileop_has_listeners();
2672
2673         if (need_event || has_listeners) {
2674                 char *link_to_path = NULL;
2675                 int len, link_name_len;
2676
2677                 /* build the path to the new link file */
2678                 GET_PATH(target_path);
2679                 if (target_path == NULL) {
2680                         error = ENOMEM;
2681                         goto out2;
2682                 }
2683
2684                 len = MAXPATHLEN;
2685                 vn_getpath(dvp, target_path, &len);
2686                 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
2687                     target_path[len-1] = '/';
2688                     strlcpy(&target_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
2689                     len += nd.ni_cnd.cn_namelen;
2690                 }
2691
2692                 if (has_listeners) {
2693                         /* build the path to file we are linking to */
2694                         GET_PATH(link_to_path);
2695                         if (link_to_path == NULL) {
2696                                 error = ENOMEM;
2697                                 goto out2;
2698                         }
2699
2700                         link_name_len = MAXPATHLEN;
2701                         vn_getpath(vp, link_to_path, &link_name_len);
2702
2703                         /*
2704                          * Call out to allow 3rd party notification of rename.
2705                          * Ignore result of kauth_authorize_fileop call.
2706                          */
2707                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
2708                                                (uintptr_t)link_to_path, (uintptr_t)target_path);
2709                         if (link_to_path != NULL) {
2710                                 RELEASE_PATH(link_to_path);
2711                         }
2712                 }
2713 #if CONFIG_FSE
2714                 if (need_event) {
2715                         /* construct fsevent */
2716                         if (get_fse_info(vp, &finfo, ctx) == 0) {
2717                                 // build the path to the destination of the link
2718                                 add_fsevent(FSE_CREATE_FILE, ctx,
2719                                             FSE_ARG_STRING, len, target_path,
2720                                             FSE_ARG_FINFO, &finfo,
2721                                             FSE_ARG_DONE);
2722                         }
2723                 }
2724 #endif
2725         }
2726 out2:
2727         /*
2728          * nameidone has to happen before we vnode_put(dvp)
2729          * since it may need to release the fs_nodelock on the dvp
2730          */
2731         nameidone(&nd);
2732         if (target_path != NULL) {
2733                 RELEASE_PATH(target_path);
2734         }
2735 out:
2736         if (lvp)
2737                 vnode_put(lvp);
2738         if (dvp)
2739                 vnode_put(dvp);
2740         vnode_put(vp);
2741         return (error);
2742 }
2743
2744 /*
2745  * Make a symbolic link.
2746  *
2747  * We could add support for ACLs here too...
2748  */
2749 /* ARGSUSED */
2750 int
2751 symlink(proc_t p, struct symlink_args *uap, __unused register_t *retval)
2752 {
2753         struct vnode_attr va;
2754         char *path;
2755         int error;
2756         struct nameidata nd;
2757         vfs_context_t ctx = vfs_context_current();
2758         vnode_t vp, dvp;
2759         size_t dummy=0;
2760
2761         MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
2762         error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
2763         if (error)
2764                 goto out;
2765         AUDIT_ARG(text, path);  /* This is the link string */
2766
2767         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2768                 UIO_USERSPACE, uap->link, ctx);
2769         error = namei(&nd);
2770         if (error)
2771                 goto out;
2772         dvp = nd.ni_dvp;
2773         vp = nd.ni_vp;
2774
2775         VATTR_INIT(&va);
2776         VATTR_SET(&va, va_type, VLNK);
2777         VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
2778 #if CONFIG_MACF
2779         error = mac_vnode_check_create(ctx,
2780                         dvp, &nd.ni_cnd, &va);
2781 #endif
2782         if (error != 0) {
2783             goto skipit;
2784         }
2785
2786         if (vp != NULL) {
2787             error = EEXIST;
2788             goto skipit;
2789         }
2790
2791         /* authorize */
2792         if (error == 0)
2793                 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
2794         /* get default ownership, etc. */
2795         if (error == 0)
2796                 error = vnode_authattr_new(dvp, &va, 0, ctx);
2797         if (error == 0)
2798                 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
2799
2800         /* do fallback attribute handling */
2801         if (error == 0)
2802                 error = vnode_setattr_fallback(vp, &va, ctx);
2803
2804         if (error == 0) {
2805                 int     update_flags = 0;
2806
2807                 if (vp == NULL) {
2808                         nd.ni_cnd.cn_nameiop = LOOKUP;
2809                         nd.ni_cnd.cn_flags = 0;
2810                         error = namei(&nd);
2811                         vp = nd.ni_vp;
2812
2813                         if (vp == NULL)
2814                                 goto skipit;
2815                 }
2816
2817 #if 0  /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
2818                 /* call out to allow 3rd party notification of rename.
2819                  * Ignore result of kauth_authorize_fileop call.
2820                  */
2821                 if (kauth_authorize_fileop_has_listeners() &&
2822                     namei(&nd) == 0) {
2823                         char *new_link_path = NULL;
2824                         int             len;
2825
2826                         /* build the path to the new link file */
2827                         new_link_path = get_pathbuff();
2828                         len = MAXPATHLEN;
2829                         vn_getpath(dvp, new_link_path, &len);
2830                         if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
2831                                 new_link_path[len - 1] = '/';
2832                                 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
2833                         }
2834
2835                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
2836                                            (uintptr_t)path, (uintptr_t)new_link_path);
2837                         if (new_link_path != NULL)
2838                                 release_pathbuff(new_link_path);
2839                 }
2840 #endif
2841                 // Make sure the name & parent pointers are hooked up
2842                 if (vp->v_name == NULL)
2843                         update_flags |= VNODE_UPDATE_NAME;
2844                 if (vp->v_parent == NULLVP)
2845                         update_flags |= VNODE_UPDATE_PARENT;
2846
2847                 if (update_flags)
2848                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
2849
2850 #if CONFIG_FSE
2851                 add_fsevent(FSE_CREATE_FILE, ctx,
2852                             FSE_ARG_VNODE, vp,
2853                             FSE_ARG_DONE);
2854 #endif
2855         }
2856
2857 skipit:
2858         /*
2859          * nameidone has to happen before we vnode_put(dvp)
2860          * since it may need to release the fs_nodelock on the dvp
2861          */
2862         nameidone(&nd);
2863
2864         if (vp)
2865                 vnode_put(vp);
2866         vnode_put(dvp);
2867 out:
2868         FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
2869
2870         return (error);
2871 }
2872
2873 /*
2874  * Delete a whiteout from the filesystem.
2875  */
2876 /* ARGSUSED */
2877 #warning XXX authorization not implmented for whiteouts
2878 int
2879 undelete(__unused proc_t p, struct undelete_args *uap, __unused register_t *retval)
2880 {
2881         int error;
2882         struct nameidata nd;
2883         vfs_context_t ctx = vfs_context_current();
2884         vnode_t vp, dvp;
2885
2886         NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT|AUDITVNPATH1,
2887                 UIO_USERSPACE, uap->path, ctx);
2888         error = namei(&nd);
2889         if (error)
2890                 return (error);
2891         dvp = nd.ni_dvp;
2892         vp = nd.ni_vp;
2893
2894         if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2895                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
2896         } else
2897                 error = EEXIST;
2898
2899         /*
2900          * nameidone has to happen before we vnode_put(dvp)
2901          * since it may need to release the fs_nodelock on the dvp
2902          */
2903         nameidone(&nd);
2904
2905         if (vp)
2906                 vnode_put(vp);
2907         vnode_put(dvp);
2908
2909         return (error);
2910 }
2911
2912 /*
2913  * Delete a name from the filesystem.
2914  */
2915 /* ARGSUSED */
2916 int
2917 unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
2918 {
2919         vnode_t vp, dvp;
2920         int error;
2921         struct componentname *cnp;
2922         char  *path = NULL;
2923         int  len;
2924         fse_info  finfo;
2925         int flags = 0;
2926         int need_event = 0;
2927         int has_listeners = 0;
2928
2929         ndp->ni_cnd.cn_flags |= LOCKPARENT;
2930         cnp = &ndp->ni_cnd;
2931
2932         error = namei(ndp);
2933         if (error)
2934                 return (error);
2935         dvp = ndp->ni_dvp;
2936         vp = ndp->ni_vp;
2937
2938         /* With Carbon delete semantics, busy files cannot be deleted */
2939         if (nodelbusy) {
2940                 flags |= VNODE_REMOVE_NODELETEBUSY;
2941         }
2942
2943         /*
2944          * Normally, unlinking of directories is not supported.
2945          * However, some file systems may have limited support.
2946          */
2947         if ((vp->v_type == VDIR) &&
2948             !(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
2949                 error = EPERM;  /* POSIX */
2950         }
2951
2952         /*
2953          * The root of a mounted filesystem cannot be deleted.
2954          */
2955         if (vp->v_flag & VROOT) {
2956                 error = EBUSY;
2957         }
2958         if (error)
2959                 goto out;
2960
2961
2962         /* authorize the delete operation */
2963 #if CONFIG_MACF
2964         if (!error)
2965                 error = mac_vnode_check_unlink(ctx,
2966                     dvp, vp, cnp);
2967 #endif /* MAC */
2968         if (!error)
2969                 error = vnode_authorize(vp, ndp->ni_dvp, KAUTH_VNODE_DELETE, ctx);
2970         if (error)
2971                 goto out;
2972
2973 #if CONFIG_FSE
2974         need_event = need_fsevent(FSE_DELETE, dvp);
2975         if (need_event) {
2976                 if ((vp->v_flag & VISHARDLINK) == 0) {
2977                         get_fse_info(vp, &finfo, ctx);
2978                 }
2979         }
2980 #endif
2981         has_listeners = kauth_authorize_fileop_has_listeners();
2982         if (need_event || has_listeners) {
2983                 GET_PATH(path);
2984                 if (path == NULL) {
2985                         error = ENOMEM;
2986                         goto out;
2987                 }
2988                 len = MAXPATHLEN;
2989                 vn_getpath(vp, path, &len);
2990         }
2991
2992 #if NAMEDRSRCFORK
2993         if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
2994                 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
2995         else
2996 #endif
2997                 error = VNOP_REMOVE(dvp, vp, &ndp->ni_cnd, flags, ctx);
2998
2999         /*
3000          * Call out to allow 3rd party notification of delete.
3001          * Ignore result of kauth_authorize_fileop call.
3002          */
3003         if (!error) {
3004                 if (has_listeners) {
3005                         kauth_authorize_fileop(vfs_context_ucred(ctx),
3006                                 KAUTH_FILEOP_DELETE,
3007                                 (uintptr_t)vp,
3008                                 (uintptr_t)path);
3009                 }
3010
3011                 if (vp->v_flag & VISHARDLINK) {
3012                     //
3013                     // if a hardlink gets deleted we want to blow away the
3014                     // v_parent link because the path that got us to this
3015                     // instance of the link is no longer valid.  this will
3016                     // force the next call to get the path to ask the file
3017                     // system instead of just following the v_parent link.
3018                     //
3019                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
3020                 }
3021
3022 #if CONFIG_FSE
3023                 if (need_event) {
3024                         if (vp->v_flag & VISHARDLINK) {
3025                                 get_fse_info(vp, &finfo, ctx);
3026                         }
3027                         add_fsevent(FSE_DELETE, ctx,
3028                                                 FSE_ARG_STRING, len, path,
3029                                                 FSE_ARG_FINFO, &finfo,
3030                                                 FSE_ARG_DONE);
3031                 }
3032 #endif
3033         }
3034         if (path != NULL)
3035                 RELEASE_PATH(path);
3036
3037         /*
3038          * nameidone has to happen before we vnode_put(dvp)
3039          * since it may need to release the fs_nodelock on the dvp
3040          */
3041 out:
3042         nameidone(ndp);
3043         vnode_put(dvp);
3044         vnode_put(vp);
3045         return (error);
3046 }
3047
3048 /*
3049  * Delete a name from the filesystem using POSIX semantics.
3050  */
3051 int
3052 unlink(__unused proc_t p, struct unlink_args *uap, __unused register_t *retval)
3053 {
3054         struct nameidata nd;
3055         vfs_context_t ctx = vfs_context_current();
3056
3057         NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3058         return unlink1(ctx, &nd, 0);
3059 }
3060
3061 /*
3062  * Delete a name from the filesystem using Carbon semantics.
3063  */
3064 int
3065 delete(__unused proc_t p, struct delete_args *uap, __unused register_t *retval)
3066 {
3067         struct nameidata nd;
3068         vfs_context_t ctx = vfs_context_current();
3069
3070         NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3071         return unlink1(ctx, &nd, 1);
3072 }
3073
3074 /*
3075  * Reposition read/write file offset.
3076  */
3077 int
3078 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
3079 {
3080         struct fileproc *fp;
3081         vnode_t vp;
3082         struct vfs_context *ctx;
3083         off_t offset = uap->offset, file_size;
3084         int error;
3085
3086         if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
3087                 if (error == ENOTSUP)
3088                         return (ESPIPE);
3089                 return (error);
3090         }
3091         if (vnode_isfifo(vp)) {
3092                 file_drop(uap->fd);
3093                 return(ESPIPE);
3094         }
3095
3096
3097         ctx = vfs_context_current();
3098 #if CONFIG_MACF
3099         if (uap->whence == L_INCR && uap->offset == 0)
3100                 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
3101                     fp->f_fglob);
3102         else
3103                 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
3104                     fp->f_fglob);
3105         if (error) {
3106                 file_drop(uap->fd);
3107                 return (error);
3108         }
3109 #endif
3110         if ( (error = vnode_getwithref(vp)) ) {
3111                 file_drop(uap->fd);
3112                 return(error);
3113         }
3114
3115         switch (uap->whence) {
3116         case L_INCR:
3117                 offset += fp->f_fglob->fg_offset;
3118                 break;
3119         case L_XTND:
3120                 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
3121                         break;
3122                 offset += file_size;
3123                 break;
3124         case L_SET:
3125                 break;
3126         default:
3127                 error = EINVAL;
3128         }
3129         if (error == 0) {
3130                 if (uap->offset > 0 && offset < 0) {
3131                         /* Incremented/relative move past max size */
3132                         error = EOVERFLOW;
3133                 } else {
3134                         /*
3135                          * Allow negative offsets on character devices, per
3136                          * POSIX 1003.1-2001.  Most likely for writing disk
3137                          * labels.
3138                          */
3139                         if (offset < 0 && vp->v_type != VCHR) {
3140                                 /* Decremented/relative move before start */
3141                                 error = EINVAL;
3142                         } else {
3143                                 /* Success */
3144                                 fp->f_fglob->fg_offset = offset;
3145                                 *retval = fp->f_fglob->fg_offset;
3146                         }
3147                 }
3148         }
3149         (void)vnode_put(vp);
3150         file_drop(uap->fd);
3151         return (error);
3152 }
3153
3154
3155 /*
3156  * Check access permissions.
3157  *
3158  * Returns:     0                       Success
3159  *              vnode_authorize:???
3160  */
3161 static int
3162 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
3163 {
3164         kauth_action_t action;
3165         int error;
3166
3167         /*
3168          * If just the regular access bits, convert them to something
3169          * that vnode_authorize will understand.
3170          */
3171         if (!(uflags & _ACCESS_EXTENDED_MASK)) {
3172                 action = 0;
3173                 if (uflags & R_OK)
3174                         action |= KAUTH_VNODE_READ_DATA;        /* aka KAUTH_VNODE_LIST_DIRECTORY */
3175                 if (uflags & W_OK) {
3176                         if (vnode_isdir(vp)) {
3177                                 action |= KAUTH_VNODE_ADD_FILE |
3178                                     KAUTH_VNODE_ADD_SUBDIRECTORY;
3179                                 /* might want delete rights here too */
3180                         } else {
3181                                 action |= KAUTH_VNODE_WRITE_DATA;
3182                         }
3183                 }
3184                 if (uflags & X_OK) {
3185                         if (vnode_isdir(vp)) {
3186                                 action |= KAUTH_VNODE_SEARCH;
3187                         } else {
3188                                 action |= KAUTH_VNODE_EXECUTE;
3189                         }
3190                 }
3191         } else {
3192                 /* take advantage of definition of uflags */
3193                 action = uflags >> 8;
3194         }
3195
3196 #if CONFIG_MACF
3197         error = mac_vnode_check_access(ctx, vp, uflags);
3198         if (error)
3199                 return (error);
3200 #endif /* MAC */
3201
3202         /* action == 0 means only check for existence */
3203         if (action != 0) {
3204                 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
3205         } else {
3206                 error = 0;
3207         }
3208
3209         return(error);
3210 }
3211
3212
3213
3214 /*
3215  * access_extended
3216  *
3217  * Description: uap->entries                    Pointer to argument descriptor
3218  *              uap->size                       Size of the area pointed to by
3219  *                                              the descriptor
3220  *              uap->results                    Pointer to the results array
3221  *
3222  * Returns:     0                       Success
3223  *              ENOMEM                  Insufficient memory
3224  *              EINVAL                  Invalid arguments
3225  *              namei:EFAULT            Bad address
3226  *              namei:ENAMETOOLONG      Filename too long
3227  *              namei:ENOENT            No such file or directory
3228  *              namei:ELOOP             Too many levels of symbolic links
3229  *              namei:EBADF             Bad file descriptor
3230  *              namei:ENOTDIR           Not a directory
3231  *              namei:???
3232  *              access1:
3233  *
3234  * Implicit returns:
3235  *              uap->results            Array contents modified
3236  *
3237  * Notes:       The uap->entries are structured as an arbitrary length array
3238  *              of accessx descriptors, followed by one or more NULL terniated
3239  *              strings
3240  *
3241  *                      struct accessx_descriptor[0]
3242  *                      ...
3243  *                      struct accessx_descriptor[n]
3244  *                      char name_data[0];
3245  *
3246  *              We determine the entry count by walking the buffer containing
3247  *              the uap->entries argument descriptor.  For each descrptor we
3248  *              see, the valid values for the offset ad_name_offset will be
3249  *              in the byte range:
3250  *
3251  *                      [ uap->entries + sizeof(struct accessx_descriptor) ]
3252  *                                              to
3253  *                              [ uap->entries + uap->size - 2 ]
3254  *
3255  *              since we must have at least one string, and the string must
3256  *              be at least one character plus the NUL terminator in length.
3257  *
3258  * XXX:         Need to support the check-as uid argument
3259  */
3260 int
3261 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused register_t *retval)
3262 {
3263         struct accessx_descriptor *input = NULL;
3264         errno_t *result = NULL;
3265         errno_t error = 0;
3266         int wantdelete = 0;
3267         unsigned int desc_max, desc_actual, i, j;
3268         struct vfs_context context;
3269         struct nameidata nd;
3270         int niopts;
3271         vnode_t vp = NULL;
3272         vnode_t dvp = NULL;
3273 #define ACCESSX_MAX_DESCR_ON_STACK 10
3274         struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
3275
3276         context.vc_ucred = NULL;
3277
3278         /*
3279          * Validate parameters; if valid, copy the descriptor array and string
3280          * arguments into local memory.  Before proceeding, the following
3281          * conditions must have been met:
3282          *
3283          * o    The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
3284          * o    There must be sufficient room in the request for at least one
3285          *      descriptor and a one yte NUL terminated string.
3286          * o    The allocation of local storage must not fail.
3287          */
3288         if (uap->size > ACCESSX_MAX_TABLESIZE)
3289                 return(ENOMEM);
3290         if (uap->size < (sizeof(struct accessx_descriptor) + 2))
3291                 return(EINVAL);
3292         if (uap->size <= sizeof (stack_input)) {
3293                 input = stack_input;
3294         } else {
3295         MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
3296         if (input == NULL) {
3297                 error = ENOMEM;
3298                 goto out;
3299         }
3300         }
3301         error = copyin(uap->entries, input, uap->size);
3302         if (error)
3303                 goto out;
3304
3305         /*
3306          * Force NUL termination of the copyin buffer to avoid nami() running
3307          * off the end.  If the caller passes us bogus data, they may get a
3308          * bogus result.
3309          */
3310         ((char *)input)[uap->size - 1] = 0;
3311
3312         /*
3313          * Access is defined as checking against the process' real identity,
3314          * even if operations are checking the effective identity.  This
3315          * requires that we use a local vfs context.
3316          */
3317         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
3318         context.vc_thread = current_thread();
3319
3320         /*
3321          * Find out how many entries we have, so we can allocate the result
3322          * array by walking the list and adjusting the count downward by the
3323          * earliest string offset we see.
3324          */
3325         desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
3326         desc_actual = desc_max;
3327         for (i = 0; i < desc_actual; i++) {
3328                 /*
3329                  * Take the offset to the name string for this entry and
3330                  * convert to an input array index, which would be one off
3331                  * the end of the array if this entry was the lowest-addressed
3332                  * name string.
3333                  */
3334                 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
3335
3336                 /*
3337                  * An offset greater than the max allowable offset is an error.
3338                  * It is also an error for any valid entry to point
3339                  * to a location prior to the end of the current entry, if
3340                  * it's not a reference to the string of the previous entry.
3341                  */
3342                 if (j > desc_max || (j != 0 && j <= i)) {
3343                         error = EINVAL;
3344                         goto out;
3345                 }
3346
3347                 /*
3348                  * An offset of 0 means use the previous descriptor's offset;
3349                  * this is used to chain multiple requests for the same file
3350                  * to avoid multiple lookups.
3351                  */
3352                 if (j == 0) {
3353                         /* This is not valid for the first entry */
3354                         if (i == 0) {
3355                                 error = EINVAL;
3356                                 goto out;
3357                         }
3358                         continue;
3359                 }
3360
3361                 /*
3362                  * If the offset of the string for this descriptor is before
3363                  * what we believe is the current actual last descriptor,
3364                  * then we need to adjust our estimate downward; this permits
3365                  * the string table following the last descriptor to be out
3366                  * of order relative to the descriptor list.
3367                  */
3368                 if (j < desc_actual)
3369                         desc_actual = j;
3370         }
3371
3372         /*
3373          * We limit the actual number of descriptors we are willing to process
3374          * to a hard maximum of ACCESSX_MAX_DESCRIPTORS.  If the number being
3375          * requested does not exceed this limit,
3376          */
3377         if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
3378                 error = ENOMEM;
3379                 goto out;
3380         }
3381         MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
3382         if (result == NULL) {
3383                 error = ENOMEM;
3384                 goto out;
3385         }
3386
3387         /*
3388          * Do the work by iterating over the descriptor entries we know to
3389          * at least appear to contain valid data.
3390          */
3391         error = 0;
3392         for (i = 0; i < desc_actual; i++) {
3393                 /*
3394                  * If the ad_name_offset is 0, then we use the previous
3395                  * results to make the check; otherwise, we are looking up
3396                  * a new file name.
3397                  */
3398                 if (input[i].ad_name_offset != 0) {
3399                         /* discard old vnodes */
3400                         if (vp) {
3401                                 vnode_put(vp);
3402                                 vp = NULL;
3403                         }
3404                         if (dvp) {
3405                                 vnode_put(dvp);
3406                                 dvp = NULL;
3407                         }
3408
3409                         /*
3410                          * Scan forward in the descriptor list to see if we
3411                          * need the parent vnode.  We will need it if we are
3412                          * deleting, since we must have rights  to remove
3413                          * entries in the parent directory, as well as the
3414                          * rights to delete the object itself.
3415                          */
3416                         wantdelete = input[i].ad_flags & _DELETE_OK;
3417                         for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
3418                                 if (input[j].ad_flags & _DELETE_OK)
3419                                         wantdelete = 1;
3420
3421                         niopts = FOLLOW | AUDITVNPATH1;
3422
3423                         /* need parent for vnode_authorize for deletion test */
3424                         if (wantdelete)
3425                                 niopts |= WANTPARENT;
3426
3427                         /* do the lookup */
3428                         NDINIT(&nd, LOOKUP, niopts, UIO_SYSSPACE, CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset), &context);
3429                         error = namei(&nd);
3430                         if (!error) {
3431                                 vp = nd.ni_vp;
3432                                 if (wantdelete)
3433                                         dvp = nd.ni_dvp;
3434                         }
3435                         nameidone(&nd);
3436                 }
3437
3438                 /*
3439                  * Handle lookup errors.
3440                  */
3441                 switch(error) {
3442                 case ENOENT:
3443                 case EACCES:
3444                 case EPERM:
3445                 case ENOTDIR:
3446                         result[i] = error;
3447                         break;
3448                 case 0:
3449                         /* run this access check */
3450                         result[i] = access1(vp, dvp, input[i].ad_flags, &context);
3451                         break;
3452                 default:
3453                         /* fatal lookup error */
3454
3455                         goto out;
3456                 }
3457         }
3458
3459         /* copy out results */
3460         error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
3461
3462 out:
3463         if (input && input != stack_input)
3464                 FREE(input, M_TEMP);
3465         if (result)
3466                 FREE(result, M_TEMP);
3467         if (vp)
3468                 vnode_put(vp);
3469         if (dvp)
3470                 vnode_put(dvp);
3471         if (IS_VALID_CRED(context.vc_ucred))
3472                 kauth_cred_unref(&context.vc_ucred);
3473         return(error);
3474 }
3475
3476
3477 /*
3478  * Returns:     0                       Success
3479  *              namei:EFAULT            Bad address
3480  *              namei:ENAMETOOLONG      Filename too long
3481  *              namei:ENOENT            No such file or directory
3482  *              namei:ELOOP             Too many levels of symbolic links
3483  *              namei:EBADF             Bad file descriptor
3484  *              namei:ENOTDIR           Not a directory
3485  *              namei:???
3486  *              access1:
3487  */
3488 int
3489 access(__unused proc_t p, struct access_args *uap, __unused register_t *retval)
3490 {
3491         int error;
3492         struct nameidata nd;
3493         int niopts;
3494         struct vfs_context context;
3495
3496 #if NAMEDRSRCFORK
3497         int is_namedstream = 0;
3498 #endif
3499
3500         /*
3501          * Access is defined as checking against the process'
3502          * real identity, even if operations are checking the
3503          * effective identity.  So we need to tweak the credential
3504          * in the context.
3505          */
3506         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
3507         context.vc_thread = current_thread();
3508
3509         niopts = FOLLOW | AUDITVNPATH1;
3510         /* need parent for vnode_authorize for deletion test */
3511         if (uap->flags & _DELETE_OK)
3512                 niopts |= WANTPARENT;
3513         NDINIT(&nd, LOOKUP, niopts, UIO_USERSPACE, uap->path, &context);
3514
3515 #if NAMEDRSRCFORK
3516         /* access(F_OK) calls are allowed for resource forks. */
3517         if (uap->flags == F_OK)
3518                 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3519 #endif
3520         error = namei(&nd);
3521         if (error)
3522                 goto out;
3523
3524 #if NAMEDRSRCFORK
3525         /* Grab reference on the shadow stream file vnode to
3526          * force an inactive on release which will mark it for
3527          * recycle
3528          */
3529         if (vnode_isnamedstream(nd.ni_vp) &&
3530                         (nd.ni_vp->v_parent != NULLVP) &&
3531                         ((nd.ni_vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0)) {
3532                 is_namedstream = 1;
3533                 vnode_ref(nd.ni_vp);
3534         }
3535 #endif
3536
3537         error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
3538
3539 #if NAMEDRSRCFORK
3540         if (is_namedstream) {
3541                 vnode_rele(nd.ni_vp);
3542         }
3543 #endif
3544
3545         vnode_put(nd.ni_vp);
3546         if (uap->flags & _DELETE_OK)
3547                 vnode_put(nd.ni_dvp);
3548         nameidone(&nd);
3549
3550 out:
3551         kauth_cred_unref(&context.vc_ucred);
3552         return(error);
3553 }
3554
3555
3556 /*
3557  * Returns:     0                       Success
3558  *              EFAULT
3559  *      copyout:EFAULT
3560  *      namei:???
3561  *      vn_stat:???
3562  */
3563 static int
3564 stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3565 {
3566         struct stat sb;
3567         struct stat64 sb64;
3568         struct user_stat user_sb;
3569         struct user_stat64 user_sb64;
3570         caddr_t sbp;
3571         int error, my_size;
3572         kauth_filesec_t fsec;
3573         size_t xsecurity_bufsize;
3574         void * statptr;
3575
3576 #if NAMEDRSRCFORK
3577         int is_namedstream = 0;
3578         /* stat calls are allowed for resource forks. */
3579         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3580 #endif
3581         error = namei(ndp);
3582         if (error)
3583                 return (error);
3584         fsec = KAUTH_FILESEC_NONE;
3585         if (isstat64 != 0)
3586                 statptr  = (void *)&sb64;
3587         else
3588                 statptr  = (void *)&sb;
3589
3590 #if NAMEDRSRCFORK
3591         /* Grab reference on the shadow stream file vnode to
3592          * force an inactive on release which will mark it for
3593          * recycle.
3594          */
3595         if (vnode_isnamedstream(ndp->ni_vp) &&
3596                         (ndp->ni_vp->v_parent != NULLVP) &&
3597                         ((ndp->ni_vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0)) {
3598                 is_namedstream = 1;
3599                 vnode_ref (ndp->ni_vp);
3600         }
3601 #endif
3602
3603         error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
3604
3605 #if NAMEDRSRCFORK
3606         if (is_namedstream) {
3607                 vnode_rele (ndp->ni_vp);
3608         }
3609 #endif
3610
3611         vnode_put(ndp->ni_vp);
3612         nameidone(ndp);
3613
3614         if (error)
3615                 return (error);
3616         /* Zap spare fields */
3617         if (isstat64 != 0) {
3618                 sb64.st_lspare = 0;
3619                 sb64.st_qspare[0] = 0LL;
3620                 sb64.st_qspare[1] = 0LL;
3621                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
3622                         munge_stat64(&sb64, &user_sb64);
3623                         my_size = sizeof(user_sb64);
3624                         sbp = (caddr_t)&user_sb64;
3625                 } else {
3626                         my_size = sizeof(sb64);
3627                         sbp = (caddr_t)&sb64;
3628                 }
3629                 /*
3630                  * Check if we raced (post lookup) against the last unlink of a file.
3631                  */
3632                 if ((sb64.st_nlink == 0) && S_ISREG(sb64.st_mode)) {
3633                         sb64.st_nlink = 1;
3634                 }
3635         } else {
3636                 sb.st_lspare = 0;
3637                 sb.st_qspare[0] = 0LL;
3638                 sb.st_qspare[1] = 0LL;
3639                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
3640                         munge_stat(&sb, &user_sb);
3641                         my_size = sizeof(user_sb);
3642                         sbp = (caddr_t)&user_sb;
3643                 } else {
3644                         my_size = sizeof(sb);
3645                         sbp = (caddr_t)&sb;
3646                 }
3647
3648                 /*
3649                  * Check if we raced (post lookup) against the last unlink of a file.
3650                  */
3651                 if ((sb.st_nlink == 0) && S_ISREG(sb.st_mode)) {
3652                         sb.st_nlink = 1;
3653                 }
3654         }
3655         if ((error = copyout(sbp, ub, my_size)) != 0)
3656                 goto out;
3657
3658         /* caller wants extended security information? */
3659         if (xsecurity != USER_ADDR_NULL) {
3660
3661                 /* did we get any? */
3662                 if (fsec == KAUTH_FILESEC_NONE) {
3663                         if (susize(xsecurity_size, 0) != 0) {
3664                                 error = EFAULT;
3665                                 goto out;
3666                         }
3667                 } else {
3668                         /* find the user buffer size */
3669                         xsecurity_bufsize = fusize(xsecurity_size);
3670
3671                         /* copy out the actual data size */
3672                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
3673                                 error = EFAULT;
3674                                 goto out;
3675                         }
3676
3677                         /* if the caller supplied enough room, copy out to it */
3678                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
3679                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3680                 }
3681         }
3682 out:
3683         if (fsec != KAUTH_FILESEC_NONE)
3684                 kauth_filesec_free(fsec);
3685         return (error);
3686 }
3687
3688 /*
3689  * Get file status; this version follows links.
3690  *
3691  * Returns:     0                       Success
3692  *      stat2:???                       [see stat2() in this file]
3693  */
3694 static int
3695 stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3696 {
3697         struct nameidata nd;
3698         vfs_context_t ctx = vfs_context_current();
3699
3700         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
3701             UIO_USERSPACE, path, ctx);
3702         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
3703 }
3704
3705 int
3706 stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused register_t *retval)
3707 {
3708         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
3709 }
3710
3711 /*
3712  * Returns:     0                       Success
3713  *      stat1:???                       [see stat1() in this file]
3714  */
3715 int
3716 stat(__unused proc_t p, struct stat_args *uap, __unused register_t *retval)
3717 {
3718         return(stat1(uap->path, uap->ub, 0, 0, 0));
3719 }
3720
3721 int
3722 stat64(__unused proc_t p, struct stat64_args *uap, __unused register_t *retval)
3723 {
3724         return(stat1(uap->path, uap->ub, 0, 0, 1));
3725 }
3726
3727 int
3728 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused register_t *retval)
3729 {
3730         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
3731 }
3732 /*
3733  * Get file status; this version does not follow links.
3734  */
3735 static int
3736 lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3737 {
3738         struct nameidata nd;
3739         vfs_context_t ctx = vfs_context_current();
3740
3741         NDINIT(&nd, LOOKUP, NOTRIGGER | NOFOLLOW | AUDITVNPATH1,
3742             UIO_USERSPACE, path, ctx);
3743
3744         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
3745 }
3746
3747 int
3748 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused register_t *retval)
3749 {
3750         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
3751 }
3752
3753 int
3754 lstat(__unused proc_t p, struct lstat_args *uap, __unused register_t *retval)
3755 {
3756         return(lstat1(uap->path, uap->ub, 0, 0, 0));
3757 }
3758 int
3759 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused register_t *retval)
3760 {
3761         return(lstat1(uap->path, uap->ub, 0, 0, 1));
3762 }
3763
3764 int
3765 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused register_t *retval)
3766 {
3767         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
3768 }
3769
3770 /*
3771  * Get configurable pathname variables.
3772  *
3773  * Returns:     0                       Success
3774  *      namei:???
3775  *      vn_pathconf:???
3776  *
3777  * Notes:       Global implementation  constants are intended to be
3778  *              implemented in this function directly; all other constants
3779  *              are per-FS implementation, and therefore must be handled in
3780  *              each respective FS, instead.
3781  *
3782  * XXX We implement some things globally right now that should actually be
3783  * XXX per-FS; we will need to deal with this at some point.
3784  */
3785 /* ARGSUSED */
3786 int
3787 pathconf(__unused proc_t p, struct pathconf_args *uap, register_t *retval)
3788 {
3789         int error;
3790         struct nameidata nd;
3791         vfs_context_t ctx = vfs_context_current();
3792
3793         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
3794                 UIO_USERSPACE, uap->path, ctx);
3795         error = namei(&nd);
3796         if (error)
3797                 return (error);
3798
3799         error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
3800
3801         vnode_put(nd.ni_vp);
3802         nameidone(&nd);
3803         return (error);
3804 }
3805
3806 /*
3807  * Return target name of a symbolic link.
3808  */
3809 /* ARGSUSED */
3810 int
3811 readlink(proc_t p, struct readlink_args *uap, register_t *retval)
3812 {
3813         vnode_t vp;
3814         uio_t auio;
3815         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3816         int error;
3817         struct nameidata nd;
3818         vfs_context_t ctx = vfs_context_current();
3819         char uio_buf[ UIO_SIZEOF(1) ];
3820
3821         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNPATH1,
3822                 UIO_USERSPACE, uap->path, ctx);
3823         error = namei(&nd);
3824         if (error)
3825                 return (error);
3826         vp = nd.ni_vp;
3827
3828         nameidone(&nd);
3829
3830         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
3831                                                                   &uio_buf[0], sizeof(uio_buf));
3832         uio_addiov(auio, uap->buf, uap->count);
3833         if (vp->v_type != VLNK)
3834                 error = EINVAL;
3835         else {
3836 #if CONFIG_MACF
3837                 error = mac_vnode_check_readlink(ctx,
3838                     vp);
3839 #endif
3840                 if (error == 0)
3841                         error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
3842                 if (error == 0)
3843                         error = VNOP_READLINK(vp, auio, ctx);
3844         }
3845         vnode_put(vp);
3846         // LP64todo - fix this
3847         *retval = uap->count - (int)uio_resid(auio);
3848         return (error);
3849 }
3850
3851 /*
3852  * Change file flags.
3853  */
3854 static int
3855 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
3856 {
3857         struct vnode_attr va;
3858         kauth_action_t action;
3859         int error;
3860
3861         VATTR_INIT(&va);
3862         VATTR_SET(&va, va_flags, flags);
3863
3864 #if CONFIG_MACF
3865         error = mac_vnode_check_setflags(ctx, vp, flags);
3866         if (error)
3867                 goto out;
3868 #endif
3869
3870         /* request authorisation, disregard immutability */
3871         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
3872                 goto out;
3873         /*
3874          * Request that the auth layer disregard those file flags it's allowed to when
3875          * authorizing this operation; we need to do this in order to be able to
3876          * clear immutable flags.
3877          */
3878         if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
3879                 goto out;
3880         error = vnode_setattr(vp, &va, ctx);
3881
3882         if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
3883                 error = ENOTSUP;
3884         }
3885 out:
3886         vnode_put(vp);
3887         return(error);
3888 }
3889
3890 /*
3891  * Change flags of a file given a path name.
3892  */
3893 /* ARGSUSED */
3894 int
3895 chflags(__unused proc_t p, struct chflags_args *uap, __unused register_t *retval)
3896 {
3897         vnode_t vp;
3898         vfs_context_t ctx = vfs_context_current();
3899         int error;
3900         struct nameidata nd;
3901
3902         AUDIT_ARG(fflags, uap->flags);
3903         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
3904                 UIO_USERSPACE, uap->path, ctx);
3905         error = namei(&nd);
3906         if (error)
3907                 return (error);
3908         vp = nd.ni_vp;
3909         nameidone(&nd);
3910
3911         error = chflags1(vp, uap->flags, ctx);
3912
3913         return(error);
3914 }
3915
3916 /*
3917  * Change flags of a file given a file descriptor.
3918  */
3919 /* ARGSUSED */
3920 int
3921 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused register_t *retval)
3922 {
3923         vnode_t vp;
3924         int error;
3925
3926         AUDIT_ARG(fd, uap->fd);
3927         AUDIT_ARG(fflags, uap->flags);
3928         if ( (error = file_vnode(uap->fd, &vp)) )
3929                 return (error);
3930
3931         if ((error = vnode_getwithref(vp))) {
3932                 file_drop(uap->fd);
3933                 return(error);
3934         }
3935
3936         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3937
3938         error = chflags1(vp, uap->flags, vfs_context_current());
3939
3940         file_drop(uap->fd);
3941         return (error);
3942 }
3943
3944 /*
3945  * Change security information on a filesystem object.
3946  *
3947  * Returns:     0                       Success
3948  *              EPERM                   Operation not permitted
3949  *              vnode_authattr:???      [anything vnode_authattr can return]
3950  *              vnode_authorize:???     [anything vnode_authorize can return]
3951  *              vnode_setattr:???       [anything vnode_setattr can return]
3952  *
3953  * Notes:       If vnode_authattr or vnode_authorize return EACCES, it will be
3954  *              translated to EPERM before being returned.
3955  */
3956 static int
3957 chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
3958 {
3959         kauth_action_t action;
3960         int error;
3961
3962         AUDIT_ARG(mode, (mode_t)vap->va_mode);
3963 #warning XXX audit new args
3964
3965 #if NAMEDSTREAMS
3966         /* chmod calls are not allowed for resource forks. */
3967         if (vp->v_flag & VISNAMEDSTREAM) {
3968                 return (EPERM);
3969         }
3970 #endif
3971
3972 #if CONFIG_MACF
3973         error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode);
3974         if (error)
3975                 return (error);
3976 #endif
3977
3978         /* make sure that the caller is allowed to set this security information */
3979         if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
3980             ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
3981                 if (error == EACCES)
3982                         error = EPERM;
3983                 return(error);
3984         }
3985
3986         error = vnode_setattr(vp, vap, ctx);
3987
3988         return (error);
3989 }
3990
3991
3992 /*
3993  * Change mode of a file given path name.
3994  *
3995  * Returns:     0                       Success
3996  *              namei:???               [anything namei can return]
3997  *              chmod2:???              [anything chmod2 can return]
3998  */
3999 static int
4000 chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
4001 {
4002         struct nameidata nd;
4003         int error;
4004
4005         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4006                 UIO_USERSPACE, path, ctx);
4007         if ((error = namei(&nd)))
4008                 return (error);
4009         error = chmod2(ctx, nd.ni_vp, vap);
4010         vnode_put(nd.ni_vp);
4011         nameidone(&nd);
4012         return(error);
4013 }
4014
4015 /*
4016  * A chmod system call using an extended argument list compared to the regular
4017  * system call 'mkfifo'.
4018  *
4019  * Parameters:  p                       Process requesting the open
4020  *              uap                     User argument descriptor (see below)
4021  *              retval                  (ignored)
4022  *
4023  * Indirect:    uap->path               Path to object (same as 'chmod')
4024  *              uap->uid                UID to set
4025  *              uap->gid                GID to set
4026  *              uap->mode               File mode to set (same as 'chmod')
4027  *              uap->xsecurity          ACL to set (or delete)
4028  *
4029  * Returns:     0                       Success
4030  *              !0                      errno value
4031  *
4032  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
4033  *
4034  * XXX:         We should enummerate the possible errno values here, and where
4035  *              in the code they originated.
4036  */
4037 int
4038 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused register_t *retval)
4039 {
4040         int error;
4041         struct vnode_attr va;
4042         kauth_filesec_t xsecdst;
4043
4044         VATTR_INIT(&va);
4045         if (uap->mode != -1)
4046                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4047         if (uap->uid != KAUTH_UID_NONE)
4048                 VATTR_SET(&va, va_uid, uap->uid);
4049         if (uap->gid != KAUTH_GID_NONE)
4050                 VATTR_SET(&va, va_gid, uap->gid);
4051
4052         xsecdst = NULL;
4053         switch(uap->xsecurity) {
4054                 /* explicit remove request */
4055         case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */
4056                 VATTR_SET(&va, va_acl, NULL);
4057                 break;
4058                 /* not being set */
4059         case USER_ADDR_NULL:
4060                 break;
4061         default:
4062                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4063                         return(error);
4064                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4065                 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
4066         }
4067
4068         error = chmod1(vfs_context_current(), uap->path, &va);
4069
4070         if (xsecdst != NULL)
4071                 kauth_filesec_free(xsecdst);
4072         return(error);
4073 }
4074
4075 /*
4076  * Returns:     0                       Success
4077  *              chmod1:???              [anything chmod1 can return]
4078  */
4079 int
4080 chmod(__unused proc_t p, struct chmod_args *uap, __unused register_t *retval)
4081 {
4082         struct vnode_attr va;
4083
4084         VATTR_INIT(&va);
4085         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4086
4087         return(chmod1(vfs_context_current(), uap->path, &va));
4088 }
4089
4090 /*
4091  * Change mode of a file given a file descriptor.
4092  */
4093 static int
4094 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
4095 {
4096         vnode_t vp;
4097         int error;
4098
4099         AUDIT_ARG(fd, fd);
4100
4101         if ((error = file_vnode(fd, &vp)) != 0)
4102                 return (error);
4103         if ((error = vnode_getwithref(vp)) != 0) {
4104                 file_drop(fd);
4105                 return(error);
4106         }
4107         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4108
4109         error = chmod2(vfs_context_current(), vp, vap);
4110         (void)vnode_put(vp);
4111         file_drop(fd);
4112
4113         return (error);
4114 }
4115
4116 int
4117 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused register_t *retval)
4118 {
4119         int error;
4120         struct vnode_attr va;
4121         kauth_filesec_t xsecdst;
4122
4123         VATTR_INIT(&va);
4124         if (uap->mode != -1)
4125                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4126         if (uap->uid != KAUTH_UID_NONE)
4127                 VATTR_SET(&va, va_uid, uap->uid);
4128         if (uap->gid != KAUTH_GID_NONE)
4129                 VATTR_SET(&va, va_gid, uap->gid);
4130
4131         xsecdst = NULL;
4132         switch(uap->xsecurity) {
4133         case USER_ADDR_NULL:
4134                 VATTR_SET(&va, va_acl, NULL);
4135                 break;
4136         case CAST_USER_ADDR_T(-1):
4137                 break;
4138         default:
4139                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4140                         return(error);
4141                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4142         }
4143
4144         error = fchmod1(p, uap->fd, &va);
4145
4146
4147         switch(uap->xsecurity) {
4148         case USER_ADDR_NULL:
4149         case CAST_USER_ADDR_T(-1):
4150                 break;
4151         default:
4152                 if (xsecdst != NULL)
4153                         kauth_filesec_free(xsecdst);
4154         }
4155         return(error);
4156 }
4157
4158 int
4159 fchmod(proc_t p, struct fchmod_args *uap, __unused register_t *retval)
4160 {
4161         struct vnode_attr va;
4162
4163         VATTR_INIT(&va);
4164         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4165
4166         return(fchmod1(p, uap->fd, &va));
4167 }
4168
4169
4170 /*
4171  * Set ownership given a path name.
4172  */
4173 /* ARGSUSED */
4174 static int
4175 chown1(vfs_context_t ctx, struct chown_args *uap, __unused register_t *retval, int follow)
4176 {
4177         vnode_t vp;
4178         struct vnode_attr va;
4179         int error;
4180         struct nameidata nd;
4181         kauth_action_t action;
4182
4183         AUDIT_ARG(owner, uap->uid, uap->gid);
4184
4185         NDINIT(&nd, LOOKUP, (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1,
4186                 UIO_USERSPACE, uap->path, ctx);
4187         error = namei(&nd);
4188         if (error)
4189                 return (error);
4190         vp = nd.ni_vp;
4191
4192         nameidone(&nd);
4193
4194         VATTR_INIT(&va);
4195         if (uap->uid != VNOVAL)
4196                 VATTR_SET(&va, va_uid, uap->uid);
4197         if (uap->gid != VNOVAL)
4198                 VATTR_SET(&va, va_gid, uap->gid);
4199
4200 #if CONFIG_MACF
4201         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
4202         if (error)
4203                 goto out;
4204 #endif
4205
4206         /* preflight and authorize attribute changes */
4207         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4208                 goto out;
4209         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
4210                 goto out;
4211         error = vnode_setattr(vp, &va, ctx);
4212
4213 out:
4214         /*
4215          * EACCES is only allowed from namei(); permissions failure should
4216          * return EPERM, so we need to translate the error code.
4217          */
4218         if (error == EACCES)
4219                 error = EPERM;
4220
4221         vnode_put(vp);
4222         return (error);
4223 }
4224
4225 int
4226 chown(__unused proc_t p, struct chown_args *uap, register_t *retval)
4227 {
4228         return chown1(vfs_context_current(), uap, retval, 1);
4229 }
4230
4231 int
4232 lchown(__unused proc_t p, struct lchown_args *uap, register_t *retval)
4233 {
4234         /* Argument list identical, but machine generated; cast for chown1() */
4235         return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
4236 }
4237
4238 /*
4239  * Set ownership given a file descriptor.
4240  */
4241 /* ARGSUSED */
4242 int
4243 fchown(__unused proc_t p, struct fchown_args *uap, __unused register_t *retval)
4244 {
4245         struct vnode_attr va;
4246         vfs_context_t ctx = vfs_context_current();
4247         vnode_t vp;
4248         int error;
4249         kauth_action_t action;
4250
4251         AUDIT_ARG(owner, uap->uid, uap->gid);
4252         AUDIT_ARG(fd, uap->fd);
4253
4254         if ( (error = file_vnode(uap->fd, &vp)) )
4255                 return (error);
4256
4257         if ( (error = vnode_getwithref(vp)) ) {
4258                 file_drop(uap->fd);
4259                 return(error);
4260         }
4261         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4262
4263         VATTR_INIT(&va);
4264         if (uap->uid != VNOVAL)
4265                 VATTR_SET(&va, va_uid, uap->uid);
4266         if (uap->gid != VNOVAL)
4267                 VATTR_SET(&va, va_gid, uap->gid);
4268
4269 #if NAMEDSTREAMS
4270         /* chown calls are not allowed for resource forks. */
4271         if (vp->v_flag & VISNAMEDSTREAM) {
4272                 error = EPERM;
4273                 goto out;
4274         }
4275 #endif
4276
4277 #if CONFIG_MACF
4278         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
4279         if (error)
4280                 goto out;
4281 #endif
4282
4283         /* preflight and authorize attribute changes */
4284         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4285                 goto out;
4286         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4287                 if (error == EACCES)
4288                         error = EPERM;
4289                 goto out;
4290         }
4291         error = vnode_setattr(vp, &va, ctx);
4292
4293 out:
4294         (void)vnode_put(vp);
4295         file_drop(uap->fd);
4296         return (error);
4297 }
4298
4299 static int
4300 getutimes(user_addr_t usrtvp, struct timespec *tsp)
4301 {
4302         struct user_timeval tv[2];
4303         int error;
4304
4305         if (usrtvp == USER_ADDR_NULL) {
4306                 struct timeval old_tv;
4307                 /* XXX Y2038 bug because of microtime argument */
4308                 microtime(&old_tv);
4309                 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
4310                 tsp[1] = tsp[0];
4311         } else {
4312                 if (IS_64BIT_PROCESS(current_proc())) {
4313                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
4314                 } else {
4315                         struct timeval old_tv[2];
4316                         error = copyin(usrtvp, (void *)old_tv, sizeof(old_tv));
4317                         tv[0].tv_sec = old_tv[0].tv_sec;
4318                         tv[0].tv_usec = old_tv[0].tv_usec;
4319                         tv[1].tv_sec = old_tv[1].tv_sec;
4320                         tv[1].tv_usec = old_tv[1].tv_usec;
4321                 }
4322                 if (error)
4323                         return (error);
4324                 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
4325                 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
4326         }
4327         return 0;
4328 }
4329
4330 static int
4331 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
4332         int nullflag)
4333 {
4334         int error;
4335         struct vnode_attr va;
4336         kauth_action_t action;
4337
4338         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4339
4340         VATTR_INIT(&va);
4341         VATTR_SET(&va, va_access_time, ts[0]);
4342         VATTR_SET(&va, va_modify_time, ts[1]);
4343         if (nullflag)
4344                 va.va_vaflags |= VA_UTIMES_NULL;
4345
4346 #if NAMEDSTREAMS
4347         /* utimes calls are not allowed for resource forks. */
4348         if (vp->v_flag & VISNAMEDSTREAM) {
4349                 error = EPERM;
4350                 goto out;
4351         }
4352 #endif
4353
4354 #if CONFIG_MACF
4355         error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
4356         if (error)
4357                 goto out;
4358 #endif
4359         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
4360                 if (!nullflag && error == EACCES)
4361                         error = EPERM;
4362                 goto out;
4363         }
4364
4365         /* since we may not need to auth anything, check here */
4366         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4367                 if (!nullflag && error == EACCES)
4368                         error = EPERM;
4369                 goto out;
4370         }
4371         error = vnode_setattr(vp, &va, ctx);
4372
4373 out:
4374         return error;
4375 }
4376
4377 /*
4378  * Set the access and modification times of a file.
4379  */
4380 /* ARGSUSED */
4381 int
4382 utimes(__unused proc_t p, struct utimes_args *uap, __unused register_t *retval)
4383 {
4384         struct timespec ts[2];
4385         user_addr_t usrtvp;
4386         int error;
4387         struct nameidata nd;
4388         vfs_context_t ctx = vfs_context_current();
4389
4390         /*
4391          * AUDIT: Needed to change the order of operations to do the
4392          * name lookup first because auditing wants the path.
4393          */
4394         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4395                 UIO_USERSPACE, uap->path, ctx);
4396         error = namei(&nd);
4397         if (error)
4398                 return (error);
4399         nameidone(&nd);
4400
4401         /*
4402          * Fetch the user-supplied time.  If usrtvp is USER_ADDR_NULL, we fetch
4403          * the current time instead.
4404          */
4405         usrtvp = uap->tptr;
4406         if ((error = getutimes(usrtvp, ts)) != 0)
4407                 goto out;
4408
4409         error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
4410
4411 out:
4412         vnode_put(nd.ni_vp);
4413         return (error);
4414 }
4415
4416 /*
4417  * Set the access and modification times of a file.
4418  */
4419 /* ARGSUSED */
4420 int
4421 futimes(__unused proc_t p, struct futimes_args *uap, __unused register_t *retval)
4422 {
4423         struct timespec ts[2];
4424         vnode_t vp;
4425         user_addr_t usrtvp;
4426         int error;
4427
4428         AUDIT_ARG(fd, uap->fd);
4429         usrtvp = uap->tptr;
4430         if ((error = getutimes(usrtvp, ts)) != 0)
4431                 return (error);
4432         if ((error = file_vnode(uap->fd, &vp)) != 0)
4433                 return (error);
4434         if((error = vnode_getwithref(vp))) {
4435                 file_drop(uap->fd);
4436                 return(error);
4437         }
4438
4439         error =  setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
4440         vnode_put(vp);
4441         file_drop(uap->fd);
4442         return(error);
4443 }
4444
4445 /*
4446  * Truncate a file given its path name.
4447  */
4448 /* ARGSUSED */
4449 int
4450 truncate(__unused proc_t p, struct truncate_args *uap, __unused register_t *retval)
4451 {
4452         vnode_t vp;
4453         struct vnode_attr va;
4454         vfs_context_t ctx = vfs_context_current();
4455         int error;
4456         struct nameidata nd;
4457         kauth_action_t action;
4458
4459         if (uap->length < 0)
4460                 return(EINVAL);
4461         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4462                 UIO_USERSPACE, uap->path, ctx);
4463         if ((error = namei(&nd)))
4464                 return (error);
4465         vp = nd.ni_vp;
4466
4467         nameidone(&nd);
4468
4469         VATTR_INIT(&va);
4470         VATTR_SET(&va, va_data_size, uap->length);
4471
4472 #if CONFIG_MACF
4473         error = mac_vnode_check_truncate(ctx, NOCRED, vp);
4474         if (error)
4475                 goto out;
4476 #endif
4477
4478         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4479                 goto out;
4480         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
4481                 goto out;
4482         error = vnode_setattr(vp, &va, ctx);
4483 out:
4484         vnode_put(vp);
4485         return (error);
4486 }
4487
4488 /*
4489  * Truncate a file given a file descriptor.
4490  */
4491 /* ARGSUSED */
4492 int
4493 ftruncate(proc_t p, struct ftruncate_args *uap, register_t *retval)
4494 {
4495         vfs_context_t ctx = vfs_context_current();
4496         struct vnode_attr va;
4497         vnode_t vp;
4498         struct fileproc *fp;
4499         int error ;
4500         int fd = uap->fd;
4501
4502         AUDIT_ARG(fd, uap->fd);
4503         if (uap->length < 0)
4504                 return(EINVAL);
4505
4506         if ( (error = fp_lookup(p,fd,&fp,0)) ) {
4507                 return(error);
4508         }
4509
4510         if (fp->f_fglob->fg_type == DTYPE_PSXSHM) {
4511                 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
4512                 goto out;
4513         }
4514         if (fp->f_fglob->fg_type != DTYPE_VNODE)  {
4515                 error = EINVAL;
4516                 goto out;
4517         }
4518
4519         vp = (vnode_t)fp->f_fglob->fg_data;
4520
4521         if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
4522                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
4523                 error = EINVAL;
4524                 goto out;
4525         }
4526
4527         if ((error = vnode_getwithref(vp)) != 0) {
4528                 goto out;
4529         }
4530
4531         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4532
4533 #if CONFIG_MACF
4534         error = mac_vnode_check_truncate(ctx,
4535             fp->f_fglob->fg_cred, vp);
4536         if (error) {
4537                 (void)vnode_put(vp);
4538                 goto out;
4539         }
4540 #endif
4541         VATTR_INIT(&va);
4542         VATTR_SET(&va, va_data_size, uap->length);
4543         error = vnode_setattr(vp, &va, ctx);
4544         (void)vnode_put(vp);
4545 out:
4546         file_drop(fd);
4547         return (error);
4548 }
4549
4550
4551 /*
4552  * Sync an open file.
4553  */
4554 /* ARGSUSED */
4555 int
4556 fsync(proc_t p, struct fsync_args *uap, register_t *retval)
4557 {
4558         __pthread_testcancel(1);
4559         return(fsync_nocancel(p, (struct fsync_nocancel_args *)uap, retval));
4560 }
4561
4562 int
4563 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused register_t *retval)
4564 {
4565         vnode_t vp;
4566         struct fileproc *fp;
4567         vfs_context_t ctx = vfs_context_current();
4568         int error;
4569
4570         if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
4571                 return (error);
4572         if ( (error = vnode_getwithref(vp)) ) {
4573                 file_drop(uap->fd);
4574                 return(error);
4575         }
4576
4577         error = VNOP_FSYNC(vp, MNT_WAIT, ctx);
4578
4579 #if NAMEDRSRCFORK
4580         /* Sync resource fork shadow file if necessary. */
4581         if ((error == 0) &&
4582             (vp->v_flag & VISNAMEDSTREAM) &&
4583             (vp->v_parent != NULLVP) &&
4584             !(vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) &&
4585             (fp->f_flags & FP_WRITTEN)) {
4586                 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
4587         }
4588 #endif
4589
4590         (void)vnode_put(vp);
4591         file_drop(uap->fd);
4592         return (error);
4593 }
4594
4595 /*
4596  * Duplicate files.  Source must be a file, target must be a file or
4597  * must not exist.
4598  *
4599  * XXX Copyfile authorisation checking is woefully inadequate, and will not
4600  *     perform inheritance correctly.
4601  */
4602 /* ARGSUSED */
4603 int
4604 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused register_t *retval)
4605 {
4606         vnode_t tvp, fvp, tdvp, sdvp;
4607         struct nameidata fromnd, tond;
4608         int error;
4609         vfs_context_t ctx = vfs_context_current();
4610
4611         /* Check that the flags are valid. */
4612
4613         if (uap->flags & ~CPF_MASK) {
4614                 return(EINVAL);
4615         }
4616
4617         NDINIT(&fromnd, LOOKUP, SAVESTART | AUDITVNPATH1,
4618                 UIO_USERSPACE, uap->from, ctx);
4619         if ((error = namei(&fromnd)))
4620                 return (error);
4621         fvp = fromnd.ni_vp;
4622
4623         NDINIT(&tond, CREATE,  LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
4624             UIO_USERSPACE, uap->to, ctx);
4625         if ((error = namei(&tond))) {
4626                 goto out1;
4627         }
4628         tdvp = tond.ni_dvp;
4629         tvp = tond.ni_vp;
4630
4631         if (tvp != NULL) {
4632                 if (!(uap->flags & CPF_OVERWRITE)) {
4633                         error = EEXIST;
4634                         goto out;
4635                 }
4636         }
4637         if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
4638                 error = EISDIR;
4639                 goto out;
4640         }
4641
4642         if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4643                 goto out;
4644
4645         if (fvp == tdvp)
4646                 error = EINVAL;
4647         /*
4648          * If source is the same as the destination (that is the
4649          * same inode number) then there is nothing to do.
4650          * (fixed to have POSIX semantics - CSM 3/2/98)
4651          */
4652         if (fvp == tvp)
4653                 error = -1;
4654         if (!error)
4655                 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
4656 out:
4657         sdvp = tond.ni_startdir;
4658         /*
4659          * nameidone has to happen before we vnode_put(tdvp)
4660          * since it may need to release the fs_nodelock on the tdvp
4661          */
4662         nameidone(&tond);
4663
4664         if (tvp)
4665                 vnode_put(tvp);
4666         vnode_put(tdvp);
4667         vnode_put(sdvp);
4668 out1:
4669         vnode_put(fvp);
4670
4671         if (fromnd.ni_startdir)
4672                 vnode_put(fromnd.ni_startdir);
4673         nameidone(&fromnd);
4674
4675         if (error == -1)
4676                 return (0);
4677         return (error);
4678 }
4679
4680
4681 /*
4682  * Rename files.  Source and destination must either both be directories,
4683  * or both not be directories.  If target is a directory, it must be empty.
4684  */
4685 /* ARGSUSED */
4686 int
4687 rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval)
4688 {
4689         vnode_t tvp, tdvp;
4690         vnode_t fvp, fdvp;
4691         struct nameidata fromnd, tond;
4692         vfs_context_t ctx = vfs_context_current();
4693         int error;
4694         int mntrename;
4695         int need_event;
4696         const char *oname;
4697         char *from_name = NULL, *to_name = NULL;
4698         int from_len, to_len;
4699         int holding_mntlock;
4700         mount_t locked_mp = NULL;
4701         vnode_t oparent;
4702         fse_info from_finfo, to_finfo;
4703
4704         holding_mntlock = 0;
4705 retry:
4706         fvp = tvp = NULL;
4707         fdvp = tdvp = NULL;
4708         mntrename = FALSE;
4709
4710         NDINIT(&fromnd, DELETE, WANTPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->from, ctx);
4711
4712         if ( (error = namei(&fromnd)) )
4713                 goto out1;
4714         fdvp = fromnd.ni_dvp;
4715         fvp  = fromnd.ni_vp;
4716
4717 #if CONFIG_MACF
4718         error = mac_vnode_check_rename_from(ctx, fdvp, fvp, &fromnd.ni_cnd);
4719         if (error)
4720                 goto out1;
4721 #endif
4722
4723         NDINIT(&tond, RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK , UIO_USERSPACE, uap->to, ctx);
4724         if (fvp->v_type == VDIR)
4725                 tond.ni_cnd.cn_flags |= WILLBEDIR;
4726
4727         if ( (error = namei(&tond)) ) {
4728                 /*
4729                  * Translate error code for rename("dir1", "dir2/.").
4730                  */
4731                 if (error == EISDIR && fvp->v_type == VDIR)
4732                         error = EINVAL;
4733                 goto out1;
4734         }
4735         tdvp = tond.ni_dvp;
4736         tvp  = tond.ni_vp;
4737
4738 #if CONFIG_MACF
4739         error = mac_vnode_check_rename_to(ctx,
4740             tdvp, tvp, fdvp == tdvp, &tond.ni_cnd);
4741         if (error)
4742                 goto out1;
4743 #endif
4744
4745         if (tvp != NULL) {
4746                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
4747                         error = ENOTDIR;
4748                         goto out1;
4749                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
4750                         error = EISDIR;
4751                         goto out1;
4752                 }
4753         }
4754         if (fvp == tdvp) {
4755                 error = EINVAL;
4756                 goto out1;
4757         }
4758         /*
4759          * If the source and destination are the same (i.e. they're
4760          * links to the same vnode) and the target file system is
4761          * case sensitive, then there is nothing to do.
4762          */
4763         if (fvp == tvp) {
4764                 int pathconf_val;
4765
4766                 /*
4767                  * Note: if _PC_CASE_SENSITIVE selector isn't supported,
4768                  * then assume that this file system is case sensitive.
4769                  */
4770                 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
4771                     pathconf_val != 0) {
4772                         goto out1;
4773                 }
4774         }
4775
4776         /*
4777          * Authorization.
4778          *
4779          * If tvp is a directory and not the same as fdvp, or tdvp is not
4780          * the same as fdvp, the node is moving between directories and we
4781          * need rights to remove from the old and add to the new.
4782          *
4783          * If tvp already exists and is not a directory, we need to be
4784          * allowed to delete it.
4785          *
4786          * Note that we do not inherit when renaming.
4787          *
4788          * XXX This needs to be revisited to implement the deferred-inherit bit
4789          */
4790         {
4791                 int moving = 0;
4792
4793                 error = 0;
4794                 if ((tvp != NULL) && vnode_isdir(tvp)) {
4795                         if (tvp != fdvp)
4796                                 moving = 1;
4797                 } else if (tdvp != fdvp) {
4798                         moving = 1;
4799                 }
4800                 /*
4801                  * must have delete rights to remove the old name even in
4802                  * the simple case of fdvp == tdvp.
4803                  *
4804                  * If fvp is a directory, and we are changing it's parent,
4805                  * then we also need rights to rewrite its ".." entry as well.
4806                  */
4807                 if (vnode_isdir(fvp)) {
4808                         if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
4809                                 goto auth_exit;
4810                 } else {
4811                 if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx)) != 0)
4812                         goto auth_exit;
4813                 }
4814                 if (moving) {
4815                         /* moving into tdvp or tvp, must have rights to add */
4816                         if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
4817                                  NULL,
4818                                  vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
4819                                  ctx)) != 0)
4820                                 goto auth_exit;
4821                 } else {
4822                         /* node staying in same directory, must be allowed to add new name */
4823                         if ((error = vnode_authorize(fdvp, NULL,
4824                                  vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4825                                 goto auth_exit;
4826                 }
4827                 /* overwriting tvp */
4828                 if ((tvp != NULL) && !vnode_isdir(tvp) &&
4829                     ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0))
4830                         goto auth_exit;
4831
4832                 /* XXX more checks? */
4833
4834 auth_exit:
4835                 /* authorization denied */
4836                 if (error != 0)
4837                         goto out1;
4838         }
4839         /*
4840          * Allow the renaming of mount points.
4841          * - target must not exist
4842          * - target must reside in the same directory as source
4843          * - union mounts cannot be renamed
4844          * - "/" cannot be renamed
4845          */
4846         if ((fvp->v_flag & VROOT) &&
4847             (fvp->v_type == VDIR) &&
4848             (tvp == NULL)  &&
4849             (fvp->v_mountedhere == NULL)  &&
4850             (fdvp == tdvp)  &&
4851             ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0)  &&
4852             (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
4853                 vnode_t coveredvp;
4854
4855                 /* switch fvp to the covered vnode */
4856                 coveredvp = fvp->v_mount->mnt_vnodecovered;
4857                 if ( (vnode_getwithref(coveredvp)) ) {
4858                         error = ENOENT;
4859                         goto out1;
4860                 }
4861                 vnode_put(fvp);
4862
4863                 fvp = coveredvp;
4864                 mntrename = TRUE;
4865         }
4866         /*
4867          * Check for cross-device rename.
4868          */
4869         if ((fvp->v_mount != tdvp->v_mount) ||
4870             (tvp && (fvp->v_mount != tvp->v_mount))) {
4871                 error = EXDEV;
4872                 goto out1;
4873         }
4874         /*
4875          * Avoid renaming "." and "..".
4876          */
4877         if (fvp->v_type == VDIR &&
4878             ((fdvp == fvp) ||
4879              (fromnd.ni_cnd.cn_namelen == 1 && fromnd.ni_cnd.cn_nameptr[0] == '.') ||
4880              ((fromnd.ni_cnd.cn_flags | tond.ni_cnd.cn_flags) & ISDOTDOT)) ) {
4881                 error = EINVAL;
4882                 goto out1;
4883         }
4884         /*
4885          * The following edge case is caught here:
4886          * (to cannot be a descendent of from)
4887          *
4888          *       o fdvp
4889          *      /
4890          *     /
4891          *    o fvp
4892          *     \
4893          *      \
4894          *       o tdvp
4895          *      /
4896          *     /
4897          *    o tvp
4898          */
4899         if (tdvp->v_parent == fvp) {
4900                 error = EINVAL;
4901                 goto out1;
4902         }
4903
4904         /*
4905          * If source is the same as the destination (that is the
4906          * same inode number) then there is nothing to do...
4907          * EXCEPT if the underlying file system supports case
4908          * insensitivity and is case preserving.  In this case
4909          * the file system needs to handle the special case of
4910          * getting the same vnode as target (fvp) and source (tvp).
4911          *
4912          * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
4913          * and _PC_CASE_PRESERVING can have this exception, and they need to
4914          * handle the special case of getting the same vnode as target and
4915          * source.  NOTE: Then the target is unlocked going into vnop_rename,
4916          * so not to cause locking problems. There is a single reference on tvp.
4917          *
4918          * NOTE - that fvp == tvp also occurs if they are hard linked - NOTE
4919          * that correct behaviour then is just to remove the source (link)
4920          */
4921         if (fvp == tvp && fdvp == tdvp) {
4922                 if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
4923                     !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
4924                           fromnd.ni_cnd.cn_namelen)) {
4925                         goto out1;
4926                 }
4927         }
4928
4929         if (holding_mntlock && fvp->v_mount != locked_mp) {
4930                 /*
4931                  * we're holding a reference and lock
4932                  * on locked_mp, but it no longer matches
4933                  * what we want to do... so drop our hold
4934                  */
4935                 mount_unlock_renames(locked_mp);
4936                 mount_drop(locked_mp, 0);
4937                 holding_mntlock = 0;
4938         }
4939         if (tdvp != fdvp && fvp->v_type == VDIR) {
4940                 /*
4941                  * serialize renames that re-shape
4942                  * the tree... if holding_mntlock is
4943                  * set, then we're ready to go...
4944                  * otherwise we
4945                  * first need to drop the iocounts
4946                  * we picked up, second take the
4947                  * lock to serialize the access,
4948                  * then finally start the lookup
4949                  * process over with the lock held
4950                  */
4951                 if (!holding_mntlock) {
4952                         /*
4953                          * need to grab a reference on
4954                          * the mount point before we
4955                          * drop all the iocounts... once
4956                          * the iocounts are gone, the mount
4957                          * could follow
4958                          */
4959                         locked_mp = fvp->v_mount;
4960                         mount_ref(locked_mp, 0);
4961
4962                         /*
4963                          * nameidone has to happen before we vnode_put(tvp)
4964                          * since it may need to release the fs_nodelock on the tvp
4965                          */
4966                         nameidone(&tond);
4967
4968                         if (tvp)
4969                                 vnode_put(tvp);
4970                         vnode_put(tdvp);
4971
4972                         /*
4973                          * nameidone has to happen before we vnode_put(fdvp)
4974                          * since it may need to release the fs_nodelock on the fvp
4975                          */
4976                         nameidone(&fromnd);
4977
4978                         vnode_put(fvp);
4979                         vnode_put(fdvp);
4980
4981                         mount_lock_renames(locked_mp);
4982                         holding_mntlock = 1;
4983
4984                         goto retry;
4985                 }
4986         } else {
4987                 /*
4988                  * when we dropped the iocounts to take
4989                  * the lock, we allowed the identity of
4990                  * the various vnodes to change... if they did,
4991                  * we may no longer be dealing with a rename
4992                  * that reshapes the tree... once we're holding
4993                  * the iocounts, the vnodes can't change type
4994                  * so we're free to drop the lock at this point
4995                  * and continue on
4996                  */
4997                 if (holding_mntlock) {
4998                         mount_unlock_renames(locked_mp);
4999                         mount_drop(locked_mp, 0);
5000                         holding_mntlock = 0;
5001                 }
5002         }
5003         // save these off so we can later verify that fvp is the same
5004         oname   = fvp->v_name;
5005         oparent = fvp->v_parent;
5006
5007 #if CONFIG_FSE
5008         need_event = need_fsevent(FSE_RENAME, fvp);
5009         if (need_event) {
5010                 get_fse_info(fvp, &from_finfo, ctx);
5011
5012                 if (tvp) {
5013                         get_fse_info(tvp, &to_finfo, ctx);
5014                 }
5015         }
5016 #else
5017         need_event = 0;
5018 #endif /* CONFIG_FSE */
5019
5020         if (need_event || kauth_authorize_fileop_has_listeners()) {
5021                 GET_PATH(from_name);
5022                 if (from_name == NULL) {
5023                         error = ENOMEM;
5024                         goto out1;
5025                 }
5026                 from_len = MAXPATHLEN;
5027                 vn_getpath(fdvp, from_name, &from_len);
5028                 if ((from_len + 1 + fromnd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
5029                     if (from_len > 2) {
5030                         from_name[from_len-1] = '/';
5031                     } else {
5032                         from_len--;
5033                     }
5034                     strlcpy(&from_name[from_len], fromnd.ni_cnd.cn_nameptr, MAXPATHLEN-from_len);
5035                     from_len += fromnd.ni_cnd.cn_namelen + 1;
5036                     from_name[from_len] = '\0';
5037                 }
5038
5039                 GET_PATH(to_name);
5040                 if (to_name == NULL) {
5041                         error = ENOMEM;
5042                         goto out1;
5043                 }
5044
5045                 to_len = MAXPATHLEN;
5046                 vn_getpath(tdvp, to_name, &to_len);
5047                 // if the path is not just "/", then append a "/"
5048                 if ((to_len + 1 + tond.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
5049                     if (to_len > 2) {
5050                         to_name[to_len-1] = '/';
5051                     } else {
5052                         to_len--;
5053                     }
5054                     strlcpy(&to_name[to_len], tond.ni_cnd.cn_nameptr, MAXPATHLEN-to_len);
5055                     to_len += tond.ni_cnd.cn_namelen + 1;
5056                     to_name[to_len] = '\0';
5057                 }
5058         }
5059
5060         error = VNOP_RENAME(fdvp, fvp, &fromnd.ni_cnd,
5061                             tdvp, tvp, &tond.ni_cnd,
5062                             ctx);
5063
5064         if (holding_mntlock) {
5065                 /*
5066                  * we can drop our serialization
5067                  * lock now
5068                  */
5069                 mount_unlock_renames(locked_mp);
5070                 mount_drop(locked_mp, 0);
5071                 holding_mntlock = 0;
5072         }
5073         if (error) {
5074
5075                 goto out1;
5076         }
5077
5078         /* call out to allow 3rd party notification of rename.
5079          * Ignore result of kauth_authorize_fileop call.
5080          */
5081         kauth_authorize_fileop(vfs_context_ucred(ctx),
5082                         KAUTH_FILEOP_RENAME,
5083                         (uintptr_t)from_name, (uintptr_t)to_name);
5084
5085 #if CONFIG_FSE
5086         if (from_name != NULL && to_name != NULL) {
5087                 if (tvp) {
5088                         add_fsevent(FSE_RENAME, ctx,
5089                                     FSE_ARG_STRING, from_len, from_name,
5090                                     FSE_ARG_FINFO, &from_finfo,
5091                                     FSE_ARG_STRING, to_len, to_name,
5092                                     FSE_ARG_FINFO, &to_finfo,
5093                                     FSE_ARG_DONE);
5094                 } else {
5095                         add_fsevent(FSE_RENAME, ctx,
5096                                     FSE_ARG_STRING, from_len, from_name,
5097                                     FSE_ARG_FINFO, &from_finfo,
5098                                     FSE_ARG_STRING, to_len, to_name,
5099                                     FSE_ARG_DONE);
5100                 }
5101         }
5102 #endif /* CONFIG_FSE */
5103
5104         /*
5105          * update filesystem's mount point data
5106          */
5107         if (mntrename) {
5108                 char *cp, *pathend, *mpname;
5109                 char * tobuf;
5110                 struct mount *mp;
5111                 int maxlen;
5112                 size_t len = 0;
5113
5114                 mp = fvp->v_mountedhere;
5115
5116                 if (vfs_busy(mp, LK_NOWAIT)) {
5117                         error = EBUSY;
5118                         goto out1;
5119                 }
5120                 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
5121
5122                 error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
5123                 if (!error) {
5124                         /* find current mount point prefix */
5125                         pathend = &mp->mnt_vfsstat.f_mntonname[0];
5126                         for (cp = pathend; *cp != '\0'; ++cp) {
5127                                 if (*cp == '/')
5128                                         pathend = cp + 1;
5129                         }
5130                         /* find last component of target name */
5131                         for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
5132                                 if (*cp == '/')
5133                                         mpname = cp + 1;
5134                         }
5135                         /* append name to prefix */
5136                         maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
5137                         bzero(pathend, maxlen);
5138                         strlcpy(pathend, mpname, maxlen);
5139                 }
5140                 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
5141
5142                 vfs_unbusy(mp);
5143         }
5144         /*
5145          * fix up name & parent pointers.  note that we first
5146          * check that fvp has the same name/parent pointers it
5147          * had before the rename call... this is a 'weak' check
5148          * at best...
5149          */
5150         if (oname == fvp->v_name && oparent == fvp->v_parent) {
5151                 int update_flags;
5152
5153                 update_flags = VNODE_UPDATE_NAME;
5154
5155                 if (fdvp != tdvp)
5156                         update_flags |= VNODE_UPDATE_PARENT;
5157
5158                 vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags);
5159         }
5160 out1:
5161         if (to_name != NULL)
5162                 RELEASE_PATH(to_name);
5163         if (from_name != NULL)
5164                 RELEASE_PATH(from_name);
5165
5166         if (holding_mntlock) {
5167                 mount_unlock_renames(locked_mp);
5168                 mount_drop(locked_mp, 0);
5169         }
5170         if (tdvp) {
5171                 /*
5172                  * nameidone has to happen before we vnode_put(tdvp)
5173                  * since it may need to release the fs_nodelock on the tdvp
5174                  */
5175                 nameidone(&tond);
5176
5177                 if (tvp)
5178                         vnode_put(tvp);
5179                 vnode_put(tdvp);
5180         }
5181         if (fdvp) {
5182                 /*
5183                  * nameidone has to happen before we vnode_put(fdvp)
5184                  * since it may need to release the fs_nodelock on the fdvp
5185                  */
5186                 nameidone(&fromnd);
5187
5188                 if (fvp)
5189                         vnode_put(fvp);
5190                 vnode_put(fdvp);
5191         }
5192         return (error);
5193 }
5194
5195 /*
5196  * Make a directory file.
5197  *
5198  * Returns:     0                       Success
5199  *              EEXIST
5200  *      namei:???
5201  *      vnode_authorize:???
5202  *      vn_create:???
5203  */
5204 /* ARGSUSED */
5205 static int
5206 mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
5207 {
5208         vnode_t vp, dvp;
5209         int error;
5210         int update_flags = 0;
5211         struct nameidata nd;
5212
5213         AUDIT_ARG(mode, vap->va_mode);
5214         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
5215                 UIO_USERSPACE, path, ctx);
5216         nd.ni_cnd.cn_flags |= WILLBEDIR;
5217         error = namei(&nd);
5218         if (error)
5219                 return (error);
5220         dvp = nd.ni_dvp;
5221         vp = nd.ni_vp;
5222
5223         if (vp != NULL) {
5224                 error = EEXIST;
5225                 goto out;
5226         }
5227
5228         VATTR_SET(vap, va_type, VDIR);
5229
5230 #if CONFIG_MACF
5231         error = mac_vnode_check_create(ctx,
5232             nd.ni_dvp, &nd.ni_cnd, vap);
5233         if (error)
5234                 goto out;
5235 #endif
5236
5237         /* authorize addition of a directory to the parent */
5238         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
5239                 goto out;
5240
5241
5242         /* make the directory */
5243         if ((error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx)) != 0)
5244                 goto out;
5245
5246         // Make sure the name & parent pointers are hooked up
5247         if (vp->v_name == NULL)
5248                 update_flags |= VNODE_UPDATE_NAME;
5249         if (vp->v_parent == NULLVP)
5250                 update_flags |= VNODE_UPDATE_PARENT;
5251
5252         if (update_flags)
5253                 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
5254
5255 #if CONFIG_FSE
5256         add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
5257 #endif
5258
5259 out:
5260         /*
5261          * nameidone has to happen before we vnode_put(dvp)
5262          * since it may need to release the fs_nodelock on the dvp
5263          */
5264         nameidone(&nd);
5265
5266         if (vp)
5267                 vnode_put(vp);
5268         vnode_put(dvp);
5269
5270         return (error);
5271 }
5272
5273
5274 int
5275 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused register_t *retval)
5276 {
5277         int ciferror;
5278         kauth_filesec_t xsecdst;
5279         struct vnode_attr va;
5280
5281         xsecdst = NULL;
5282         if ((uap->xsecurity != USER_ADDR_NULL) &&
5283             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
5284                 return ciferror;
5285
5286         VATTR_INIT(&va);
5287         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
5288         if (xsecdst != NULL)
5289                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5290
5291         ciferror = mkdir1(vfs_context_current(), uap->path, &va);
5292         if (xsecdst != NULL)
5293                 kauth_filesec_free(xsecdst);
5294         return ciferror;
5295 }
5296
5297 int
5298 mkdir(proc_t p, struct mkdir_args *uap, __unused register_t *retval)
5299 {
5300         struct vnode_attr va;
5301
5302         VATTR_INIT(&va);
5303         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
5304
5305         return(mkdir1(vfs_context_current(), uap->path, &va));
5306 }
5307
5308 /*
5309  * Remove a directory file.
5310  */
5311 /* ARGSUSED */
5312 int
5313 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused register_t *retval)
5314 {
5315         vnode_t vp, dvp;
5316         int error;
5317         struct nameidata nd;
5318         vfs_context_t ctx = vfs_context_current();
5319
5320         int restart_flag, oldvp_id = -1;
5321
5322         /*
5323          * This loop exists to restart rmdir in the unlikely case that two
5324          * processes are simultaneously trying to remove the same directory
5325          * containing orphaned appleDouble files.
5326          */
5327         do {
5328                 restart_flag = 0;
5329
5330                 NDINIT(&nd, DELETE, LOCKPARENT | AUDITVNPATH1,
5331                                 UIO_USERSPACE, uap->path, ctx);
5332                 error = namei(&nd);
5333                 if (error)
5334                         return (error);
5335
5336                 dvp = nd.ni_dvp;
5337                 vp = nd.ni_vp;
5338
5339
5340                 /*
5341                  * If being restarted check if the new vp
5342                  * still has the same v_id.
5343                  */
5344                 if (oldvp_id != -1 && oldvp_id != vp->v_id) {
5345                         error = ENOENT;
5346                         goto out;
5347                 }
5348
5349                 if (vp->v_type != VDIR) {
5350                         /*
5351                          * rmdir only deals with directories
5352                          */
5353                         error = ENOTDIR;
5354                 } else if (dvp == vp) {
5355                         /*
5356                          * No rmdir "." please.
5357                          */
5358                         error = EINVAL;
5359                 } else if (vp->v_flag & VROOT) {
5360                         /*
5361                          * The root of a mounted filesystem cannot be deleted.
5362                          */
5363                         error = EBUSY;
5364                 } else {
5365 #if CONFIG_MACF
5366                         error = mac_vnode_check_unlink(ctx, dvp,
5367                                         vp, &nd.ni_cnd);
5368                         if (!error)
5369 #endif
5370                                 error = vnode_authorize(vp, nd.ni_dvp, KAUTH_VNODE_DELETE, ctx);
5371                 }
5372                 if (!error) {
5373                         char     *path = NULL;
5374                         int       len;
5375                         fse_info  finfo;
5376                         int has_listeners = 0;
5377                         int need_event = 0;
5378
5379 #if CONFIG_FSE
5380                         need_event = need_fsevent(FSE_DELETE, dvp);
5381                         if (need_event) {
5382                                 get_fse_info(vp, &finfo, ctx);
5383                         }
5384 #endif
5385                         has_listeners = kauth_authorize_fileop_has_listeners();
5386                         if (need_event || has_listeners) {
5387                                 GET_PATH(path);
5388                                 if (path == NULL) {
5389                                         error = ENOMEM;
5390                                         goto out;
5391                                 }
5392                                 len = MAXPATHLEN;
5393                                 vn_getpath(vp, path, &len);
5394                         }
5395
5396                         error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
5397
5398                         /*
5399                          * Special case to remove orphaned AppleDouble
5400                          * files. I don't like putting this in the kernel,
5401                          * but carbon does not like putting this in carbon either,
5402                          * so here we are.
5403                          */
5404                         if (error == ENOTEMPTY) {
5405                                 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
5406                                 if (error == EBUSY) {
5407                                         oldvp_id = vp->v_id;
5408                                         goto out;
5409                                 }
5410
5411
5412                                 /*
5413                                  * Assuming everything went well, we will try the RMDIR again
5414                                  */
5415                                 if (!error)
5416                                         error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
5417                         }
5418
5419                         /*
5420                          * Call out to allow 3rd party notification of delete.
5421                          * Ignore result of kauth_authorize_fileop call.
5422                          */
5423                         if (!error) {
5424                                 if (has_listeners) {
5425                                         kauth_authorize_fileop(vfs_context_ucred(ctx),
5426                                                         KAUTH_FILEOP_DELETE,
5427                                                         (uintptr_t)vp,
5428                                                         (uintptr_t)path);
5429                                 }
5430
5431                                 if (vp->v_flag & VISHARDLINK) {
5432                                     // see the comment in unlink1() about why we update
5433                                     // the parent of a hard link when it is removed
5434                                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
5435                                 }
5436
5437 #if CONFIG_FSE
5438                                 if (need_event) {
5439                                         add_fsevent(FSE_DELETE, ctx,
5440                                                         FSE_ARG_STRING, len, path,
5441                                                         FSE_ARG_FINFO, &finfo,
5442                                                         FSE_ARG_DONE);
5443                                 }
5444 #endif
5445                         }
5446                         if (path != NULL)
5447                                 RELEASE_PATH(path);
5448                 }
5449
5450 out:
5451                 /*
5452                  * nameidone has to happen before we vnode_put(dvp)
5453                  * since it may need to release the fs_nodelock on the dvp
5454                  */
5455                 nameidone(&nd);
5456
5457                 vnode_put(dvp);
5458                 vnode_put(vp);
5459
5460                 if (restart_flag == 0) {
5461                         wakeup_one((caddr_t)vp);
5462                         return (error);
5463                 }
5464                 tsleep(vp, PVFS, "rm AD", 1);
5465
5466         } while (restart_flag != 0);
5467
5468         return (error);
5469
5470 }
5471
5472 /* Get direntry length padded to 8 byte alignment */
5473 #define DIRENT64_LEN(namlen) \
5474         ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
5475
5476 static errno_t
5477 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
5478                 int *numdirent, vfs_context_t ctxp)
5479 {
5480         /* Check if fs natively supports VNODE_READDIR_EXTENDED */
5481         if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) {
5482                 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
5483         } else {
5484                 size_t bufsize;
5485                 void * bufptr;
5486                 uio_t auio;
5487                 struct direntry entry64;
5488                 struct dirent *dep;
5489                 int bytesread;
5490                 int error;
5491
5492                 /*
5493                  * Our kernel buffer needs to be smaller since re-packing
5494                  * will expand each dirent.  The worse case (when the name
5495                  * length is 3) corresponds to a struct direntry size of 32
5496                  * bytes (8-byte aligned) and a struct dirent size of 12 bytes
5497                  * (4-byte aligned).  So having a buffer that is 3/8 the size
5498                  * will prevent us from reading more than we can pack.
5499                  *
5500                  * Since this buffer is wired memory, we will limit the
5501                  * buffer size to a maximum of 32K. We would really like to
5502                  * use 32K in the MIN(), but we use magic number 87371 to
5503                  * prevent uio_resid() * 3 / 8 from overflowing.
5504                  */
5505                 bufsize = 3 * MIN(uio_resid(uio), 87371) / 8;
5506                 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
5507
5508                 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
5509                 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
5510                 auio->uio_offset = uio->uio_offset;
5511
5512                 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
5513
5514                 dep = (struct dirent *)bufptr;
5515                 bytesread = bufsize - uio_resid(auio);
5516
5517                 /*
5518                  * Convert all the entries and copy them out to user's buffer.
5519                  */
5520                 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
5521                         /* Convert a dirent to a dirent64. */
5522                         entry64.d_ino = dep->d_ino;
5523                         entry64.d_seekoff = 0;
5524                         entry64.d_reclen = DIRENT64_LEN(dep->d_namlen);
5525                         entry64.d_namlen = dep->d_namlen;
5526                         entry64.d_type = dep->d_type;
5527                         bcopy(dep->d_name, entry64.d_name, dep->d_namlen + 1);
5528
5529                         /* Move to next entry. */
5530                         dep = (struct dirent *)((char *)dep + dep->d_reclen);
5531
5532                         /* Copy entry64 to user's buffer. */
5533                         error = uiomove((caddr_t)&entry64, entry64.d_reclen, uio);
5534                 }
5535
5536                 /* Update the real offset using the offset we got from VNOP_READDIR. */
5537                 if (error == 0) {
5538                         uio->uio_offset = auio->uio_offset;
5539                 }
5540                 uio_free(auio);
5541                 FREE(bufptr, M_TEMP);
5542                 return (error);
5543         }
5544 }
5545
5546 /*
5547  * Read a block of directory entries in a file system independent format.
5548  */
5549 static int
5550 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
5551                      off_t *offset, int flags)
5552 {
5553         vnode_t vp;
5554         struct vfs_context context = *vfs_context_current();    /* local copy */
5555         struct fileproc *fp;
5556         uio_t auio;
5557         int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5558         off_t loff;
5559         int error, eofflag, numdirent;
5560         char uio_buf[ UIO_SIZEOF(1) ];
5561
5562         error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
5563         if (error) {
5564                 return (error);
5565         }
5566         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
5567                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5568                 error = EBADF;
5569                 goto out;
5570         }
5571
5572 #if CONFIG_MACF
5573         error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
5574         if (error)
5575                 goto out;
5576 #endif
5577         if ( (error = vnode_getwithref(vp)) ) {
5578                 goto out;
5579         }
5580         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5581
5582 unionread:
5583         if (vp->v_type != VDIR) {
5584                 (void)vnode_put(vp);
5585                 error = EINVAL;
5586                 goto out;
5587         }
5588
5589 #if CONFIG_MACF
5590         error = mac_vnode_check_readdir(&context, vp);
5591         if (error != 0) {
5592                 (void)vnode_put(vp);
5593                 goto out;
5594         }
5595 #endif /* MAC */
5596
5597         loff = fp->f_fglob->fg_offset;
5598         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
5599         uio_addiov(auio, bufp, bufsize);
5600
5601         if (flags & VNODE_READDIR_EXTENDED) {
5602                 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
5603                 fp->f_fglob->fg_offset = uio_offset(auio);
5604         } else {
5605                 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
5606                 fp->f_fglob->fg_offset = uio_offset(auio);
5607         }
5608         if (error) {
5609                 (void)vnode_put(vp);
5610                 goto out;
5611         }
5612
5613         if ((user_ssize_t)bufsize == uio_resid(auio)){
5614                 if (union_dircheckp) {
5615                         error = union_dircheckp(&vp, fp, &context);
5616                         if (error == -1)
5617                                 goto unionread;
5618                         if (error)
5619                                 goto out;
5620                 }
5621
5622                 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) {
5623                         struct vnode *tvp = vp;
5624                         vp = vp->v_mount->mnt_vnodecovered;
5625                         vnode_getwithref(vp);
5626                         vnode_ref(vp);
5627                         fp->f_fglob->fg_data = (caddr_t) vp;
5628                         fp->f_fglob->fg_offset = 0;
5629                         vnode_rele(tvp);
5630                         vnode_put(tvp);
5631                         goto unionread;
5632                 }
5633         }
5634
5635         vnode_put(vp);
5636         if (offset) {
5637                 *offset = loff;
5638         }
5639         // LP64todo - fix this
5640         *bytesread = bufsize - uio_resid(auio);
5641 out:
5642         file_drop(fd);
5643         return (error);
5644 }
5645
5646
5647 int
5648 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, register_t *retval)
5649 {
5650         off_t offset;
5651         long loff;
5652         ssize_t bytesread;
5653         int error;
5654
5655         AUDIT_ARG(fd, uap->fd);
5656         error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
5657
5658         if (error == 0) {
5659                 loff = (long)offset;
5660                 error = copyout((caddr_t)&loff, uap->basep, sizeof(long));
5661                 *retval = bytesread;
5662         }
5663         return (error);
5664 }
5665
5666 int
5667 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
5668 {
5669         off_t offset;
5670         ssize_t bytesread;
5671         int error;
5672
5673         AUDIT_ARG(fd, uap->fd);
5674         error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
5675
5676         if (error == 0) {
5677                 *retval = bytesread;
5678                 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
5679         }
5680         return (error);
5681 }
5682
5683
5684 /*
5685  * Set the mode mask for creation of filesystem nodes.
5686  */
5687 #warning XXX implement xsecurity
5688
5689 #define UMASK_NOXSECURITY        (void *)1      /* leave existing xsecurity alone */
5690 static int
5691 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, register_t *retval)
5692 {
5693         struct filedesc *fdp;
5694
5695         AUDIT_ARG(mask, newmask);
5696         proc_fdlock(p);
5697         fdp = p->p_fd;
5698         *retval = fdp->fd_cmask;
5699         fdp->fd_cmask = newmask & ALLPERMS;
5700         proc_fdunlock(p);
5701         return (0);
5702 }
5703
5704
5705 int
5706 umask_extended(proc_t p, struct umask_extended_args *uap, register_t *retval)
5707 {
5708         int ciferror;
5709         kauth_filesec_t xsecdst;
5710
5711         xsecdst = KAUTH_FILESEC_NONE;
5712         if (uap->xsecurity != USER_ADDR_NULL) {
5713                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5714                         return ciferror;
5715         } else {
5716                 xsecdst = KAUTH_FILESEC_NONE;
5717         }
5718
5719         ciferror = umask1(p, uap->newmask, xsecdst, retval);
5720
5721         if (xsecdst != KAUTH_FILESEC_NONE)
5722                 kauth_filesec_free(xsecdst);
5723         return ciferror;
5724 }
5725
5726 int
5727 umask(proc_t p, struct umask_args *uap, register_t *retval)
5728 {
5729         return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
5730 }
5731
5732 /*
5733  * Void all references to file by ripping underlying filesystem
5734  * away from vnode.
5735  */
5736 /* ARGSUSED */
5737 int
5738 revoke(proc_t p, struct revoke_args *uap, __unused register_t *retval)
5739 {
5740         vnode_t vp;
5741         struct vnode_attr va;
5742         vfs_context_t ctx = vfs_context_current();
5743         int error;
5744         struct nameidata nd;
5745
5746         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
5747                 UIO_USERSPACE, uap->path, ctx);
5748         error = namei(&nd);
5749         if (error)
5750                 return (error);
5751         vp = nd.ni_vp;
5752
5753         nameidone(&nd);
5754
5755 #if CONFIG_MACF
5756         error = mac_vnode_check_revoke(ctx, vp);
5757         if (error)
5758                 goto out;
5759 #endif
5760
5761         VATTR_INIT(&va);
5762         VATTR_WANTED(&va, va_uid);
5763         if ((error = vnode_getattr(vp, &va, ctx)))
5764                 goto out;
5765         if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
5766             (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
5767                 goto out;
5768         if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
5769                 VNOP_REVOKE(vp, REVOKEALL, ctx);
5770 out:
5771         vnode_put(vp);
5772         return (error);
5773 }
5774
5775
5776 /*
5777  *  HFS/HFS PlUS SPECIFIC SYSTEM CALLS
5778  *  The following system calls are designed to support features
5779  *  which are specific to the HFS & HFS Plus volume formats
5780  */
5781
5782 #ifdef __APPLE_API_OBSOLETE
5783
5784 /************************************************/
5785 /* *** Following calls will be deleted soon *** */
5786 /************************************************/
5787
5788 /*
5789  * Make a complex file.  A complex file is one with multiple forks (data streams)
5790  */
5791 /* ARGSUSED */
5792 int
5793 mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused register_t *retval)
5794 {
5795         return (ENOTSUP);
5796 }
5797
5798 /*
5799  * Extended stat call which returns volumeid and vnodeid as well as other info
5800  */
5801 /* ARGSUSED */
5802 int
5803 statv(__unused proc_t p,
5804           __unused struct statv_args *uap,
5805           __unused register_t *retval)
5806 {
5807         return (ENOTSUP);       /*  We'll just return an error for now */
5808
5809 } /* end of statv system call */
5810
5811 /*
5812 * Extended lstat call which returns volumeid and vnodeid as well as other info
5813 */
5814 /* ARGSUSED */
5815 int
5816 lstatv(__unused proc_t p,
5817            __unused struct lstatv_args *uap,
5818            __unused register_t *retval)
5819 {
5820        return (ENOTSUP);        /*  We'll just return an error for now */
5821 } /* end of lstatv system call */
5822
5823 /*
5824 * Extended fstat call which returns volumeid and vnodeid as well as other info
5825 */
5826 /* ARGSUSED */
5827 int
5828 fstatv(__unused proc_t p,
5829            __unused struct fstatv_args *uap,
5830            __unused register_t *retval)
5831 {
5832        return (ENOTSUP);        /*  We'll just return an error for now */
5833 } /* end of fstatv system call */
5834
5835
5836 /************************************************/
5837 /* *** Preceding calls will be deleted soon *** */
5838 /************************************************/
5839
5840 #endif /* __APPLE_API_OBSOLETE */
5841
5842 /*
5843 * Obtain attribute information on objects in a directory while enumerating
5844 * the directory.  This call does not yet support union mounted directories.
5845 * TO DO
5846 *  1.union mounted directories.
5847 */
5848
5849 /* ARGSUSED */
5850 int
5851 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, register_t *retval)
5852 {
5853         vnode_t vp;
5854         struct fileproc *fp;
5855         uio_t auio = NULL;
5856         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5857         uint32_t count;
5858         uint32_t newstate;
5859         int error, eofflag;
5860         uint32_t loff;
5861         struct attrlist attributelist;
5862         vfs_context_t ctx = vfs_context_current();
5863         int fd = uap->fd;
5864         char uio_buf[ UIO_SIZEOF(1) ];
5865         kauth_action_t action;
5866
5867         AUDIT_ARG(fd, fd);
5868
5869         /* Get the attributes into kernel space */
5870         if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
5871                 return(error);
5872         }
5873         if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
5874                 return(error);
5875         }
5876         if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
5877                 return (error);
5878         }
5879         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
5880                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5881                 error = EBADF;
5882                 goto out;
5883         }
5884
5885
5886 #if CONFIG_MACF
5887         error = mac_file_check_change_offset(vfs_context_ucred(ctx),
5888             fp->f_fglob);
5889         if (error)
5890                 goto out;
5891 #endif
5892
5893
5894         if ( (error = vnode_getwithref(vp)) )
5895                 goto out;
5896
5897         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5898
5899         if (vp->v_type != VDIR) {
5900                 (void)vnode_put(vp);
5901                 error = EINVAL;
5902                 goto out;
5903         }
5904
5905 #if CONFIG_MACF
5906         error = mac_vnode_check_readdir(ctx, vp);
5907         if (error != 0) {
5908                 (void)vnode_put(vp);
5909                 goto out;
5910         }
5911 #endif /* MAC */
5912
5913         /* set up the uio structure which will contain the users return buffer */
5914         loff = fp->f_fglob->fg_offset;
5915         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ,
5916             &uio_buf[0], sizeof(uio_buf));
5917         uio_addiov(auio, uap->buffer, uap->buffersize);
5918
5919         /*
5920          * If the only item requested is file names, we can let that past with
5921          * just LIST_DIRECTORY.  If they want any other attributes, that means
5922          * they need SEARCH as well.
5923          */
5924         action = KAUTH_VNODE_LIST_DIRECTORY;
5925         if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
5926             attributelist.fileattr || attributelist.dirattr)
5927                 action |= KAUTH_VNODE_SEARCH;
5928
5929         if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
5930                 u_long ulcount = count;
5931
5932                 error = VNOP_READDIRATTR(vp, &attributelist, auio,
5933                                          count,
5934                                          uap->options, (unsigned long *)&newstate, &eofflag,
5935                                          &ulcount, ctx);
5936                 if (!error)
5937                         count = ulcount;
5938         }
5939         (void)vnode_put(vp);
5940
5941         if (error)
5942                 goto out;
5943         fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
5944
5945         if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
5946                 goto out;
5947         if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
5948                 goto out;
5949         if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
5950                 goto out;
5951
5952         *retval = eofflag;  /* similar to getdirentries */
5953         error = 0;
5954 out:
5955         file_drop(fd);
5956         return (error); /* return error earlier, an retval of 0 or 1 now */
5957
5958 } /* end of getdirentryattr system call */
5959
5960 /*
5961 * Exchange data between two files
5962 */
5963
5964 /* ARGSUSED */
5965 int
5966 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused register_t *retval)
5967 {
5968
5969         struct nameidata fnd, snd;
5970         vfs_context_t ctx = vfs_context_current();
5971         vnode_t fvp;
5972         vnode_t svp;
5973         int error;
5974         u_long nameiflags;
5975         char *fpath = NULL;
5976         char *spath = NULL;
5977         int   flen, slen;
5978         fse_info f_finfo, s_finfo;
5979
5980         nameiflags = 0;
5981         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
5982
5983     NDINIT(&fnd, LOOKUP, nameiflags | AUDITVNPATH1,
5984                 UIO_USERSPACE, uap->path1, ctx);
5985
5986     error = namei(&fnd);
5987     if (error)
5988         goto out2;
5989
5990         nameidone(&fnd);
5991         fvp = fnd.ni_vp;
5992
5993     NDINIT(&snd, LOOKUP | CN_NBMOUNTLOOK, nameiflags | AUDITVNPATH2,
5994                 UIO_USERSPACE, uap->path2, ctx);
5995
5996     error = namei(&snd);
5997     if (error) {
5998                 vnode_put(fvp);
5999                 goto out2;
6000     }
6001         nameidone(&snd);
6002         svp = snd.ni_vp;
6003
6004         /*
6005          * if the files are the same, return an inval error
6006          */
6007         if (svp == fvp) {
6008                 error = EINVAL;
6009                 goto out;
6010         }
6011
6012         /*
6013          * if the files are on different volumes, return an error
6014          */
6015         if (svp->v_mount != fvp->v_mount) {
6016                 error = EXDEV;
6017                 goto out;
6018         }
6019
6020 #if CONFIG_MACF
6021         error = mac_vnode_check_exchangedata(ctx,
6022             fvp, svp);
6023         if (error)
6024                 goto out;
6025 #endif
6026         if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
6027             ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
6028                 goto out;
6029
6030         if (
6031 #if CONFIG_FSE
6032         need_fsevent(FSE_EXCHANGE, fvp) ||
6033 #endif
6034         kauth_authorize_fileop_has_listeners()) {
6035                 GET_PATH(fpath);
6036                 GET_PATH(spath);
6037                 if (fpath == NULL || spath == NULL) {
6038                         error = ENOMEM;
6039                         goto out;
6040                 }
6041                 flen = MAXPATHLEN;
6042                 slen = MAXPATHLEN;
6043                 if (vn_getpath(fvp, fpath, &flen) != 0 || fpath[0] == '\0') {
6044                         printf("exchange: vn_getpath(fvp=%p) failed <<%s>>\n",
6045                                fvp, fpath);
6046                 }
6047                 if (vn_getpath(svp, spath, &slen) != 0 || spath[0] == '\0') {
6048                         printf("exchange: vn_getpath(svp=%p) failed <<%s>>\n",
6049                                svp, spath);
6050                 }
6051 #if CONFIG_FSE
6052                 get_fse_info(fvp, &f_finfo, ctx);
6053                 get_fse_info(svp, &s_finfo, ctx);
6054 #endif
6055         }
6056         /* Ok, make the call */
6057         error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
6058
6059         if (error == 0) {
6060             const char *tmpname;
6061
6062             if (fpath != NULL && spath != NULL) {
6063                     /* call out to allow 3rd party notification of exchangedata.
6064                      * Ignore result of kauth_authorize_fileop call.
6065                      */
6066                     kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
6067                                            (uintptr_t)fpath, (uintptr_t)spath);
6068             }
6069             name_cache_lock();
6070
6071             tmpname     = fvp->v_name;
6072             fvp->v_name = svp->v_name;
6073             svp->v_name = tmpname;
6074
6075             if (fvp->v_parent != svp->v_parent) {
6076                 vnode_t tmp;
6077
6078                 tmp           = fvp->v_parent;
6079                 fvp->v_parent = svp->v_parent;
6080                 svp->v_parent = tmp;
6081             }
6082             name_cache_unlock();
6083
6084 #if CONFIG_FSE
6085             if (fpath != NULL && spath != NULL) {
6086                     add_fsevent(FSE_EXCHANGE, ctx,
6087                                 FSE_ARG_STRING, flen, fpath,
6088                                 FSE_ARG_FINFO, &f_finfo,
6089                                 FSE_ARG_STRING, slen, spath,
6090                                 FSE_ARG_FINFO, &s_finfo,
6091                                 FSE_ARG_DONE);
6092             }
6093 #endif
6094         }
6095
6096 out:
6097         if (fpath != NULL)
6098                 RELEASE_PATH(fpath);
6099         if (spath != NULL)
6100                 RELEASE_PATH(spath);
6101         vnode_put(svp);
6102         vnode_put(fvp);
6103 out2:
6104         return (error);
6105 }
6106
6107
6108 /* ARGSUSED */
6109
6110 int
6111 searchfs(proc_t p, struct searchfs_args *uap, __unused register_t *retval)
6112 {
6113         vnode_t vp;
6114         int error=0;
6115         int fserror = 0;
6116         struct nameidata nd;
6117         struct user_fssearchblock searchblock;
6118         struct searchstate *state;
6119         struct attrlist *returnattrs;
6120         void *searchparams1,*searchparams2;
6121         uio_t auio = NULL;
6122         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6123         u_long nummatches;
6124         int mallocsize;
6125         u_long nameiflags;
6126         vfs_context_t ctx = vfs_context_current();
6127         char uio_buf[ UIO_SIZEOF(1) ];
6128
6129         /* Start by copying in fsearchblock paramater list */
6130     if (IS_64BIT_PROCESS(p)) {
6131        error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
6132     }
6133     else {
6134         struct fssearchblock tmp_searchblock;
6135         error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
6136         // munge into 64-bit version
6137         searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
6138         searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
6139         searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
6140         searchblock.maxmatches = tmp_searchblock.maxmatches;
6141         searchblock.timelimit.tv_sec = tmp_searchblock.timelimit.tv_sec;
6142         searchblock.timelimit.tv_usec = tmp_searchblock.timelimit.tv_usec;
6143         searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
6144         searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
6145         searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
6146         searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
6147         searchblock.searchattrs = tmp_searchblock.searchattrs;
6148     }
6149         if (error)
6150                 return(error);
6151
6152         /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
6153          */
6154         if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
6155                 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
6156                 return(EINVAL);
6157
6158         /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
6159         /* It all has to do into local memory and it's not that big so we might as well  put it all together. */
6160         /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
6161         /* block.                                                                                             */
6162
6163         mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
6164                       sizeof(struct attrlist) + sizeof(struct searchstate);
6165
6166         MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
6167
6168         /* Now set up the various pointers to the correct place in our newly allocated memory */
6169
6170         searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
6171         returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
6172         state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
6173
6174         /* Now copy in the stuff given our local variables. */
6175
6176         if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
6177                 goto freeandexit;
6178
6179         if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
6180                 goto freeandexit;
6181
6182         if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
6183                 goto freeandexit;
6184
6185         if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
6186                 goto freeandexit;
6187
6188         /* set up the uio structure which will contain the users return buffer */
6189
6190         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
6191                                                                   &uio_buf[0], sizeof(uio_buf));
6192     uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
6193
6194         nameiflags = 0;
6195         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6196         NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1,
6197                 UIO_USERSPACE, uap->path, ctx);
6198
6199         error = namei(&nd);
6200         if (error)
6201                 goto freeandexit;
6202
6203         nameidone(&nd);
6204         vp = nd.ni_vp;
6205
6206
6207         /*
6208          * If searchblock.maxmatches == 0, then skip the search. This has happened
6209          * before and sometimes the underlyning code doesnt deal with it well.
6210          */
6211          if (searchblock.maxmatches == 0) {
6212                 nummatches = 0;
6213                 goto saveandexit;
6214          }
6215
6216         /*
6217            Allright, we have everything we need, so lets make that call.
6218
6219            We keep special track of the return value from the file system:
6220            EAGAIN is an acceptable error condition that shouldn't keep us
6221            from copying out any results...
6222          */
6223
6224         fserror = VNOP_SEARCHFS(vp,
6225                                                         searchparams1,
6226                                                         searchparams2,
6227                                                         &searchblock.searchattrs,
6228                                                         searchblock.maxmatches,
6229                                                         &searchblock.timelimit,
6230                                                         returnattrs,
6231                                                         &nummatches,
6232                                                         uap->scriptcode,
6233                                                         uap->options,
6234                                                         auio,
6235                                                         state,
6236                                                         ctx);
6237
6238 saveandexit:
6239
6240         vnode_put(vp);
6241
6242         /* Now copy out the stuff that needs copying out. That means the number of matches, the
6243            search state.  Everything was already put into he return buffer by the vop call. */
6244
6245         if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
6246                 goto freeandexit;
6247
6248     if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
6249                 goto freeandexit;
6250
6251         error = fserror;
6252
6253 freeandexit:
6254
6255         FREE(searchparams1,M_TEMP);
6256
6257         return(error);
6258
6259
6260 } /* end of searchfs system call */
6261
6262
6263 /*
6264  * Make a filesystem-specific control call:
6265  */
6266 /* ARGSUSED */
6267 int
6268 fsctl (proc_t p, struct fsctl_args *uap, __unused register_t *retval)
6269 {
6270         int error;
6271         boolean_t is64bit;
6272         struct nameidata nd;
6273         u_long nameiflags;
6274         u_long cmd = uap->cmd;
6275         u_int size;
6276 #define STK_PARAMS 128
6277         char stkbuf[STK_PARAMS];
6278         caddr_t data, memp;
6279         vfs_context_t ctx = vfs_context_current();
6280
6281         size = IOCPARM_LEN(cmd);
6282         if (size > IOCPARM_MAX) return (EINVAL);
6283
6284     is64bit = proc_is64bit(p);
6285
6286         memp = NULL;
6287         if (size > sizeof (stkbuf)) {
6288                 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
6289                 data = memp;
6290         } else {
6291                 data = &stkbuf[0];
6292         };
6293
6294         if (cmd & IOC_IN) {
6295                 if (size) {
6296                         error = copyin(uap->data, data, size);
6297                         if (error) goto FSCtl_Exit;
6298                 } else {
6299                     if (is64bit) {
6300                         *(user_addr_t *)data = uap->data;
6301                     }
6302                     else {
6303                         *(uint32_t *)data = (uint32_t)uap->data;
6304                     }
6305                 };
6306         } else if ((cmd & IOC_OUT) && size) {
6307                 /*
6308                  * Zero the buffer so the user always
6309                  * gets back something deterministic.
6310                  */
6311                 bzero(data, size);
6312         } else if (cmd & IOC_VOID) {
6313         if (is64bit) {
6314             *(user_addr_t *)data = uap->data;
6315         }
6316         else {
6317             *(uint32_t *)data = (uint32_t)uap->data;
6318         }
6319         }
6320
6321         /* Get the vnode for the file we are getting info on:  */
6322         nameiflags = 0;
6323         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6324         NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, uap->path, ctx);
6325         if ((error = namei(&nd))) goto FSCtl_Exit;
6326
6327 #if CONFIG_MACF
6328         error = mac_mount_check_fsctl(ctx, vnode_mount(nd.ni_vp), cmd);
6329         if (error) {
6330                 vnode_put(nd.ni_vp);
6331                 nameidone(&nd);
6332                 goto FSCtl_Exit;
6333         }
6334 #endif
6335
6336         /* Invoke the filesystem-specific code */
6337         error = VNOP_IOCTL(nd.ni_vp, IOCBASECMD(cmd), data, uap->options, ctx);
6338
6339         vnode_put(nd.ni_vp);
6340         nameidone(&nd);
6341
6342         /*
6343          * Copy any data to user, size was
6344          * already set and checked above.
6345          */
6346         if (error == 0 && (cmd & IOC_OUT) && size)
6347                 error = copyout(data, uap->data, size);
6348
6349 FSCtl_Exit:
6350         if (memp) kfree(memp, size);
6351
6352         return error;
6353 }
6354 /* end of fsctl system call */
6355
6356 /*
6357  * An in-kernel sync for power management to call.
6358  */
6359 __private_extern__ int
6360 sync_internal(void)
6361 {
6362         int error;
6363
6364         struct sync_args data;
6365
6366         int retval[2];
6367
6368
6369         error = sync(current_proc(), &data, &retval[0]);
6370
6371
6372         return (error);
6373 } /* end of sync_internal call */
6374
6375
6376 /*
6377  *  Retrieve the data of an extended attribute.
6378  */
6379 int
6380 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
6381 {
6382         vnode_t vp;
6383         struct nameidata nd;
6384         char attrname[XATTR_MAXNAMELEN+1];
6385         vfs_context_t ctx = vfs_context_current();
6386         uio_t auio = NULL;
6387         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6388         size_t attrsize = 0;
6389         size_t namelen;
6390         u_long nameiflags;
6391         int error;
6392         char uio_buf[ UIO_SIZEOF(1) ];
6393
6394         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6395                 return (EINVAL);
6396
6397         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6398         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6399         if ((error = namei(&nd))) {
6400                 return (error);
6401         }
6402         vp = nd.ni_vp;
6403         nameidone(&nd);
6404
6405         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6406                 goto out;
6407         }
6408         if (xattr_protected(attrname)) {
6409                 error = EPERM;
6410                 goto out;
6411         }
6412         if (uap->value && uap->size > 0) {
6413                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
6414                                             &uio_buf[0], sizeof(uio_buf));
6415                 uio_addiov(auio, uap->value, uap->size);
6416         }
6417
6418         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
6419 out:
6420         vnode_put(vp);
6421
6422         if (auio) {
6423                 *retval = uap->size - uio_resid(auio);
6424         } else {
6425                 *retval = (user_ssize_t)attrsize;
6426         }
6427
6428         return (error);
6429 }
6430
6431 /*
6432  * Retrieve the data of an extended attribute.
6433  */
6434 int
6435 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
6436 {
6437         vnode_t vp;
6438         char attrname[XATTR_MAXNAMELEN+1];
6439         uio_t auio = NULL;
6440         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6441         size_t attrsize = 0;
6442         size_t namelen;
6443         int error;
6444         char uio_buf[ UIO_SIZEOF(1) ];
6445
6446         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6447                 return (EINVAL);
6448
6449         if ( (error = file_vnode(uap->fd, &vp)) ) {
6450                 return (error);
6451         }
6452         if ( (error = vnode_getwithref(vp)) ) {
6453                 file_drop(uap->fd);
6454                 return(error);
6455         }
6456         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6457                 goto out;
6458         }
6459         if (xattr_protected(attrname)) {
6460                 error = EPERM;
6461                 goto out;
6462         }
6463         if (uap->value && uap->size > 0) {
6464                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
6465                                             &uio_buf[0], sizeof(uio_buf));
6466                 uio_addiov(auio, uap->value, uap->size);
6467         }
6468
6469         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
6470 out:
6471         (void)vnode_put(vp);
6472         file_drop(uap->fd);
6473
6474         if (auio) {
6475                 *retval = uap->size - uio_resid(auio);
6476         } else {
6477                 *retval = (user_ssize_t)attrsize;
6478         }
6479         return (error);
6480 }
6481
6482 /*
6483  * Set the data of an extended attribute.
6484  */
6485 int
6486 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
6487 {
6488         vnode_t vp;
6489         struct nameidata nd;
6490         char attrname[XATTR_MAXNAMELEN+1];
6491         vfs_context_t ctx = vfs_context_current();
6492         uio_t auio = NULL;
6493         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6494         size_t namelen;
6495         u_long nameiflags;
6496         int error;
6497         char uio_buf[ UIO_SIZEOF(1) ];
6498
6499         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6500                 return (EINVAL);
6501
6502         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6503                 return (error);
6504         }
6505         if (xattr_protected(attrname))
6506                 return(EPERM);
6507         if (uap->size != 0 && uap->value == 0) {
6508                 return (EINVAL);
6509         }
6510
6511         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6512         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6513         if ((error = namei(&nd))) {
6514                 return (error);
6515         }
6516         vp = nd.ni_vp;
6517         nameidone(&nd);
6518
6519         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
6520                                     &uio_buf[0], sizeof(uio_buf));
6521         uio_addiov(auio, uap->value, uap->size);
6522
6523         error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
6524 #if CONFIG_FSE
6525         if (error == 0) {
6526                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
6527                     FSE_ARG_VNODE, vp,
6528                     FSE_ARG_DONE);
6529         }
6530 #endif
6531         vnode_put(vp);
6532         *retval = 0;
6533         return (error);
6534 }
6535
6536 /*
6537  * Set the data of an extended attribute.
6538  */
6539 int
6540 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
6541 {
6542         vnode_t vp;
6543         char attrname[XATTR_MAXNAMELEN+1];
6544         uio_t auio = NULL;
6545         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6546         size_t namelen;
6547         int error;
6548         char uio_buf[ UIO_SIZEOF(1) ];
6549         vfs_context_t ctx = vfs_context_current();
6550
6551         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6552                 return (EINVAL);
6553
6554         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6555                 return (error);
6556         }
6557         if (xattr_protected(attrname))
6558                 return(EPERM);
6559         if (uap->size != 0 && uap->value == 0) {
6560                 return (EINVAL);
6561         }
6562         if ( (error = file_vnode(uap->fd, &vp)) ) {
6563                 return (error);
6564         }
6565         if ( (error = vnode_getwithref(vp)) ) {
6566                 file_drop(uap->fd);
6567                 return(error);
6568         }
6569         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
6570                                     &uio_buf[0], sizeof(uio_buf));
6571         uio_addiov(auio, uap->value, uap->size);
6572
6573         error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
6574 #if CONFIG_FSE
6575         if (error == 0) {
6576                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
6577                     FSE_ARG_VNODE, vp,
6578                     FSE_ARG_DONE);
6579         }
6580 #endif
6581         vnode_put(vp);
6582         file_drop(uap->fd);
6583         *retval = 0;
6584         return (error);
6585 }
6586
6587 /*
6588  * Remove an extended attribute.
6589  */
6590 #warning "code duplication"
6591 int
6592 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
6593 {
6594         vnode_t vp;
6595         struct nameidata nd;
6596         char attrname[XATTR_MAXNAMELEN+1];
6597         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6598         vfs_context_t ctx = vfs_context_current();
6599         size_t namelen;
6600         u_long nameiflags;
6601         int error;
6602
6603         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6604                 return (EINVAL);
6605
6606         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
6607         if (error != 0) {
6608                 return (error);
6609         }
6610         if (xattr_protected(attrname))
6611                 return(EPERM);
6612         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6613         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6614         if ((error = namei(&nd))) {
6615                 return (error);
6616         }
6617         vp = nd.ni_vp;
6618         nameidone(&nd);
6619
6620         error = vn_removexattr(vp, attrname, uap->options, ctx);
6621 #if CONFIG_FSE
6622         if (error == 0) {
6623                 add_fsevent(FSE_XATTR_REMOVED, ctx,
6624                     FSE_ARG_VNODE, vp,
6625                     FSE_ARG_DONE);
6626         }
6627 #endif
6628         vnode_put(vp);
6629         *retval = 0;
6630         return (error);
6631 }
6632
6633 /*
6634  * Remove an extended attribute.
6635  */
6636 #warning "code duplication"
6637 int
6638 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
6639 {
6640         vnode_t vp;
6641         char attrname[XATTR_MAXNAMELEN+1];
6642         size_t namelen;
6643         int error;
6644         vfs_context_t ctx = vfs_context_current();
6645
6646         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6647                 return (EINVAL);
6648
6649         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
6650         if (error != 0) {
6651                 return (error);
6652         }
6653         if (xattr_protected(attrname))
6654                 return(EPERM);
6655         if ( (error = file_vnode(uap->fd, &vp)) ) {
6656                 return (error);
6657         }
6658         if ( (error = vnode_getwithref(vp)) ) {
6659                 file_drop(uap->fd);
6660                 return(error);
6661         }
6662
6663         error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
6664 #if CONFIG_FSE
6665         if (error == 0) {
6666                 add_fsevent(FSE_XATTR_REMOVED, ctx,
6667                     FSE_ARG_VNODE, vp,
6668                     FSE_ARG_DONE);
6669         }
6670 #endif
6671         vnode_put(vp);
6672         file_drop(uap->fd);
6673         *retval = 0;
6674         return (error);
6675 }
6676
6677 /*
6678  * Retrieve the list of extended attribute names.
6679  */
6680 #warning "code duplication"
6681 int
6682 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
6683 {
6684         vnode_t vp;
6685         struct nameidata nd;
6686         vfs_context_t ctx = vfs_context_current();
6687         uio_t auio = NULL;
6688         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6689         size_t attrsize = 0;
6690         u_long nameiflags;
6691         int error;
6692         char uio_buf[ UIO_SIZEOF(1) ];
6693
6694         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6695                 return (EINVAL);
6696
6697         nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
6698         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6699         if ((error = namei(&nd))) {
6700                 return (error);
6701         }
6702         vp = nd.ni_vp;
6703         nameidone(&nd);
6704         if (uap->namebuf != 0 && uap->bufsize > 0) {
6705                 // LP64todo - fix this!
6706                 auio = uio_createwithbuffer(1, 0, spacetype,
6707                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
6708                 uio_addiov(auio, uap->namebuf, uap->bufsize);
6709         }
6710
6711         error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
6712
6713         vnode_put(vp);
6714         if (auio) {
6715                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
6716         } else {
6717                 *retval = (user_ssize_t)attrsize;
6718         }
6719         return (error);
6720 }
6721
6722 /*
6723  * Retrieve the list of extended attribute names.
6724  */
6725 #warning "code duplication"
6726 int
6727 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
6728 {
6729         vnode_t vp;
6730         uio_t auio = NULL;
6731         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6732         size_t attrsize = 0;
6733         int error;
6734         char uio_buf[ UIO_SIZEOF(1) ];
6735
6736         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6737                 return (EINVAL);
6738
6739         if ( (error = file_vnode(uap->fd, &vp)) ) {
6740                 return (error);
6741         }
6742         if ( (error = vnode_getwithref(vp)) ) {
6743                 file_drop(uap->fd);
6744                 return(error);
6745         }
6746         if (uap->namebuf != 0 && uap->bufsize > 0) {
6747                 // LP64todo - fix this!
6748                 auio = uio_createwithbuffer(1, 0, spacetype,
6749                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
6750                 uio_addiov(auio, uap->namebuf, uap->bufsize);
6751         }
6752
6753         error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
6754
6755         vnode_put(vp);
6756         file_drop(uap->fd);
6757         if (auio) {
6758                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
6759         } else {
6760                 *retval = (user_ssize_t)attrsize;
6761         }
6762         return (error);
6763 }
6764
6765 /*
6766  * Common routine to handle various flavors of statfs data heading out
6767  *      to user space.
6768  *
6769  * Returns:     0                       Success
6770  *              EFAULT
6771  */
6772 static int
6773 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
6774     user_addr_t bufp, int *sizep, boolean_t is_64_bit,
6775     boolean_t partial_copy)
6776 {
6777         int             error;
6778         int             my_size, copy_size;
6779
6780         if (is_64_bit) {
6781                 struct user_statfs sfs;
6782                 my_size = copy_size = sizeof(sfs);
6783                 bzero(&sfs, my_size);
6784                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
6785                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
6786                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
6787                 sfs.f_bsize = (user_long_t)sfsp->f_bsize;
6788                 sfs.f_iosize = (user_long_t)sfsp->f_iosize;
6789                 sfs.f_blocks = (user_long_t)sfsp->f_blocks;
6790                 sfs.f_bfree = (user_long_t)sfsp->f_bfree;
6791                 sfs.f_bavail = (user_long_t)sfsp->f_bavail;
6792                 sfs.f_files = (user_long_t)sfsp->f_files;
6793                 sfs.f_ffree = (user_long_t)sfsp->f_ffree;
6794                 sfs.f_fsid = sfsp->f_fsid;
6795                 sfs.f_owner = sfsp->f_owner;
6796                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
6797                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
6798                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
6799
6800                 if (partial_copy) {
6801                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
6802                 }
6803                 error = copyout((caddr_t)&sfs, bufp, copy_size);
6804         }
6805         else {
6806                 struct statfs sfs;
6807                 my_size = copy_size = sizeof(sfs);
6808                 bzero(&sfs, my_size);
6809
6810                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
6811                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
6812                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
6813
6814                 /*
6815                  * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
6816                  * have to fudge the numbers here in that case.   We inflate the blocksize in order
6817                  * to reflect the filesystem size as best we can.
6818                  */
6819                 if ((sfsp->f_blocks > LONG_MAX)
6820                         /* Hack for 4061702 . I think the real fix is for Carbon to
6821                          * look for some volume capability and not depend on hidden
6822                          * semantics agreed between a FS and carbon.
6823                          * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
6824                          * for Carbon to set bNoVolumeSizes volume attribute.
6825                          * Without this the webdavfs files cannot be copied onto
6826                          * disk as they look huge. This change should not affect
6827                          * XSAN as they should not setting these to -1..
6828                          */
6829                          && (sfsp->f_blocks != 0xffffffffffffffffULL)
6830                          && (sfsp->f_bfree != 0xffffffffffffffffULL)
6831                          && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
6832                         int             shift;
6833
6834                         /*
6835                          * Work out how far we have to shift the block count down to make it fit.
6836                          * Note that it's possible to have to shift so far that the resulting
6837                          * blocksize would be unreportably large.  At that point, we will clip
6838                          * any values that don't fit.
6839                          *
6840                          * For safety's sake, we also ensure that f_iosize is never reported as
6841                          * being smaller than f_bsize.
6842                          */
6843                         for (shift = 0; shift < 32; shift++) {
6844                                 if ((sfsp->f_blocks >> shift) <= LONG_MAX)
6845                                         break;
6846                                 if ((sfsp->f_bsize << (shift + 1)) > LONG_MAX)
6847                                         break;
6848                         }
6849 #define __SHIFT_OR_CLIP(x, s)   ((((x) >> (s)) > LONG_MAX) ? LONG_MAX : ((x) >> (s)))
6850                         sfs.f_blocks = (long)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
6851                         sfs.f_bfree = (long)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
6852                         sfs.f_bavail = (long)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
6853 #undef __SHIFT_OR_CLIP
6854                         sfs.f_bsize = (long)(sfsp->f_bsize << shift);
6855                         sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
6856                 } else {
6857                         /* filesystem is small enough to be reported honestly */
6858                         sfs.f_bsize = (long)sfsp->f_bsize;
6859                         sfs.f_iosize = (long)sfsp->f_iosize;
6860                         sfs.f_blocks = (long)sfsp->f_blocks;
6861                         sfs.f_bfree = (long)sfsp->f_bfree;
6862                         sfs.f_bavail = (long)sfsp->f_bavail;
6863                 }
6864                 sfs.f_files = (long)sfsp->f_files;
6865                 sfs.f_ffree = (long)sfsp->f_ffree;
6866                 sfs.f_fsid = sfsp->f_fsid;
6867                 sfs.f_owner = sfsp->f_owner;
6868                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
6869                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
6870                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
6871
6872                 if (partial_copy) {
6873                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
6874                 }
6875                 error = copyout((caddr_t)&sfs, bufp, copy_size);
6876         }
6877
6878         if (sizep != NULL) {
6879                 *sizep = my_size;
6880         }
6881         return(error);
6882 }
6883
6884 /*
6885  * copy stat structure into user_stat structure.
6886  */
6887 void munge_stat(struct stat *sbp, struct user_stat *usbp)
6888 {
6889         bzero(usbp, sizeof(struct user_stat));
6890
6891         usbp->st_dev = sbp->st_dev;
6892         usbp->st_ino = sbp->st_ino;
6893         usbp->st_mode = sbp->st_mode;
6894         usbp->st_nlink = sbp->st_nlink;
6895         usbp->st_uid = sbp->st_uid;
6896         usbp->st_gid = sbp->st_gid;
6897         usbp->st_rdev = sbp->st_rdev;
6898 #ifndef _POSIX_C_SOURCE
6899         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
6900         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
6901         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
6902         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
6903         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
6904         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
6905 #else
6906         usbp->st_atime = sbp->st_atime;
6907         usbp->st_atimensec = sbp->st_atimensec;
6908         usbp->st_mtime = sbp->st_mtime;
6909         usbp->st_mtimensec = sbp->st_mtimensec;
6910         usbp->st_ctime = sbp->st_ctime;
6911         usbp->st_ctimensec = sbp->st_ctimensec;
6912 #endif
6913         usbp->st_size = sbp->st_size;
6914         usbp->st_blocks = sbp->st_blocks;
6915         usbp->st_blksize = sbp->st_blksize;
6916         usbp->st_flags = sbp->st_flags;
6917         usbp->st_gen = sbp->st_gen;
6918         usbp->st_lspare = sbp->st_lspare;
6919         usbp->st_qspare[0] = sbp->st_qspare[0];
6920         usbp->st_qspare[1] = sbp->st_qspare[1];
6921 }
6922
6923 /*
6924  * copy stat64 structure into user_stat64 structure.
6925  */
6926 void munge_stat64(struct stat64 *sbp, struct user_stat64 *usbp)
6927 {
6928         bzero(usbp, sizeof(struct user_stat));
6929
6930         usbp->st_dev = sbp->st_dev;
6931         usbp->st_ino = sbp->st_ino;
6932         usbp->st_mode = sbp->st_mode;
6933         usbp->st_nlink = sbp->st_nlink;
6934         usbp->st_uid = sbp->st_uid;
6935         usbp->st_gid = sbp->st_gid;
6936         usbp->st_rdev = sbp->st_rdev;
6937 #ifndef _POSIX_C_SOURCE
6938         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
6939         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
6940         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
6941         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
6942         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
6943         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
6944         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
6945         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
6946 #else
6947         usbp->st_atime = sbp->st_atime;
6948         usbp->st_atimensec = sbp->st_atimensec;
6949         usbp->st_mtime = sbp->st_mtime;
6950         usbp->st_mtimensec = sbp->st_mtimensec;
6951         usbp->st_ctime = sbp->st_ctime;
6952         usbp->st_ctimensec = sbp->st_ctimensec;
6953         usbp->st_birthtime = sbp->st_birthtime;
6954         usbp->st_birthtimensec = sbp->st_birthtimensec;
6955 #endif
6956         usbp->st_size = sbp->st_size;
6957         usbp->st_blocks = sbp->st_blocks;
6958         usbp->st_blksize = sbp->st_blksize;
6959         usbp->st_flags = sbp->st_flags;
6960         usbp->st_gen = sbp->st_gen;
6961         usbp->st_lspare = sbp->st_lspare;
6962         usbp->st_qspare[0] = sbp->st_qspare[0];
6963         usbp->st_qspare[1] = sbp->st_qspare[1];
6964 }